mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Merge branch 'saba/fix-unit-tests'
This commit is contained in:
commit
4832a107d7
6 changed files with 30 additions and 26 deletions
|
@ -15,6 +15,10 @@
|
|||
conda env create -f environment.yml
|
||||
conda activate semantic-search
|
||||
#+end_src
|
||||
*** Install Environment Dependencies
|
||||
#+begin_src shell
|
||||
sudo apt-get -y install libimage-exiftool-perl
|
||||
#+end_src
|
||||
|
||||
** Configure
|
||||
Configure application search types and their underlying data source/files in ~sample_config.yml~
|
||||
|
|
|
@ -18,9 +18,9 @@ from src.utils.rawconfig import FullConfig
|
|||
from src.processor.conversation.gpt import converse, message_to_log, message_to_prompt, understand
|
||||
|
||||
# Application Global State
|
||||
config = FullConfig()
|
||||
model = SearchModels()
|
||||
processor_config = ProcessorConfigModel()
|
||||
config = {}
|
||||
config_file = ""
|
||||
verbose = 0
|
||||
app = FastAPI()
|
||||
|
@ -33,11 +33,11 @@ def ui(request: Request):
|
|||
return templates.TemplateResponse("config.html", context={'request': request})
|
||||
|
||||
@app.get('/config', response_model=FullConfig)
|
||||
def config():
|
||||
def config_data():
|
||||
return config
|
||||
|
||||
@app.post('/config')
|
||||
async def config(updated_config: FullConfig):
|
||||
async def config_data(updated_config: FullConfig):
|
||||
global config
|
||||
config = updated_config
|
||||
with open(config_file, 'w') as outfile:
|
||||
|
@ -92,7 +92,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
|
|||
|
||||
@app.get('/regenerate')
|
||||
def regenerate(t: Optional[SearchType] = None):
|
||||
initialize_search(regenerate=True, t=t)
|
||||
initialize_search(config, regenerate=True, t=t)
|
||||
return {'status': 'ok', 'message': 'regeneration completed'}
|
||||
|
||||
|
||||
|
|
|
@ -149,7 +149,7 @@ def collate_results(hits, entries, count=5):
|
|||
in hits[0:count]]
|
||||
|
||||
|
||||
def setup(config: TextSearchConfig, regenerate: bool, verbose: bool) -> TextSearchModel:
|
||||
def setup(config: TextSearchConfig, regenerate: bool, verbose: bool=False) -> TextSearchModel:
|
||||
# Initialize Model
|
||||
bi_encoder, cross_encoder, top_k = initialize_model()
|
||||
|
||||
|
|
|
@ -154,7 +154,7 @@ def collate_results(hits, image_names, image_directory, count=5):
|
|||
in hits[0:count]]
|
||||
|
||||
|
||||
def setup(config: ImageSearchConfig, regenerate: bool, verbose: bool) -> ImageSearchModel:
|
||||
def setup(config: ImageSearchConfig, regenerate: bool, verbose: bool=False) -> ImageSearchModel:
|
||||
# Initialize Model
|
||||
encoder = initialize_model()
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@ def model_dir(tmp_path_factory):
|
|||
# Generate Image Embeddings from Test Images
|
||||
search_config = ContentTypeConfig()
|
||||
search_config.image = ImageSearchConfig(
|
||||
input_directory = Path('tests/data'),
|
||||
input_directory = 'tests/data',
|
||||
embeddings_file = model_dir.joinpath('.image_embeddings.pt'),
|
||||
batch_size = 10,
|
||||
use_xmp_metadata = False)
|
||||
|
@ -23,12 +23,12 @@ def model_dir(tmp_path_factory):
|
|||
|
||||
# Generate Notes Embeddings from Test Notes
|
||||
search_config.org = TextSearchConfig(
|
||||
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
||||
input_files = ['tests/data/main_readme.org', 'tests/data/interface_emacs_readme.org'],
|
||||
input_filter = None,
|
||||
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'),
|
||||
embeddings_file = model_dir.joinpath('.note_embeddings.pt'))
|
||||
|
||||
asymmetric.setup(search_config.notes, regenerate=False, verbose=True)
|
||||
asymmetric.setup(search_config.org, regenerate=False, verbose=True)
|
||||
|
||||
return model_dir
|
||||
|
||||
|
@ -37,14 +37,14 @@ def model_dir(tmp_path_factory):
|
|||
def search_config(model_dir):
|
||||
search_config = ContentTypeConfig()
|
||||
search_config.org = TextSearchConfig(
|
||||
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
||||
input_files = ['tests/data/main_readme.org', 'tests/data/interface_emacs_readme.org'],
|
||||
input_filter = None,
|
||||
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'),
|
||||
embeddings_file = model_dir.joinpath('.note_embeddings.pt'))
|
||||
|
||||
search_config.image = ImageSearchConfig(
|
||||
input_directory = Path('tests/data'),
|
||||
embeddings_file = Path('tests/data/.image_embeddings.pt'),
|
||||
input_directory = 'tests/data',
|
||||
embeddings_file = 'tests/data/.image_embeddings.pt',
|
||||
batch_size = 10,
|
||||
use_xmp_metadata = False)
|
||||
|
||||
|
|
|
@ -8,13 +8,12 @@ from fastapi.testclient import TestClient
|
|||
from src.main import app, model, config
|
||||
from src.search_type import asymmetric, image_search
|
||||
from src.utils.helpers import resolve_absolute_path
|
||||
from src.utils.rawconfig import FullConfig
|
||||
from src.utils.rawconfig import ContentTypeConfig
|
||||
|
||||
|
||||
# Arrange
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
client = TestClient(app)
|
||||
config = FullConfig()
|
||||
|
||||
# Test
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
|
@ -30,9 +29,10 @@ def test_search_with_invalid_search_type():
|
|||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_search_with_valid_search_type(search_config):
|
||||
def test_search_with_valid_search_type(search_config: ContentTypeConfig):
|
||||
# Arrange
|
||||
config.content_type.image = search_config.image
|
||||
config.content_type = search_config
|
||||
# config.content_type.image = search_config.image
|
||||
for search_type in ["notes", "ledger", "music", "image"]:
|
||||
# Act
|
||||
response = client.get(f"/search?q=random&t={search_type}")
|
||||
|
@ -50,9 +50,9 @@ def test_regenerate_with_invalid_search_type():
|
|||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_regenerate_with_valid_search_type(search_config):
|
||||
def test_regenerate_with_valid_search_type(search_config: ContentTypeConfig):
|
||||
# Arrange
|
||||
config.content_type.image = search_config.image
|
||||
config.content_type = search_config
|
||||
for search_type in ["notes", "ledger", "music", "image"]:
|
||||
# Act
|
||||
response = client.get(f"/regenerate?t={search_type}")
|
||||
|
@ -61,9 +61,9 @@ def test_regenerate_with_valid_search_type(search_config):
|
|||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_image_search(search_config):
|
||||
def test_image_search(search_config: ContentTypeConfig):
|
||||
# Arrange
|
||||
config.content_type.image = search_config.image
|
||||
config.content_type = search_config
|
||||
model.image_search = image_search.setup(search_config.image, regenerate=False)
|
||||
query_expected_image_pairs = [("brown kitten next to fallen plant", "kitten_park.jpg"),
|
||||
("a horse and dog on a leash", "horse_dog.jpg"),
|
||||
|
@ -83,9 +83,9 @@ def test_image_search(search_config):
|
|||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_notes_search(search_config):
|
||||
def test_notes_search(search_config: ContentTypeConfig):
|
||||
# Arrange
|
||||
model.notes_search = asymmetric.setup(search_config.notes, regenerate=False)
|
||||
model.notes_search = asymmetric.setup(search_config.org, regenerate=False)
|
||||
user_query = "How to git install application?"
|
||||
|
||||
# Act
|
||||
|
@ -99,9 +99,9 @@ def test_notes_search(search_config):
|
|||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_notes_search_with_include_filter(search_config):
|
||||
def test_notes_search_with_include_filter(search_config: ContentTypeConfig):
|
||||
# Arrange
|
||||
model.notes_search = asymmetric.setup(search_config.notes, regenerate=False)
|
||||
model.notes_search = asymmetric.setup(search_config.org, regenerate=False)
|
||||
user_query = "How to git install application? +Emacs"
|
||||
|
||||
# Act
|
||||
|
@ -115,9 +115,9 @@ def test_notes_search_with_include_filter(search_config):
|
|||
|
||||
|
||||
# ----------------------------------------------------------------------------------------------------
|
||||
def test_notes_search_with_exclude_filter(search_config):
|
||||
def test_notes_search_with_exclude_filter(search_config: ContentTypeConfig):
|
||||
# Arrange
|
||||
model.notes_search = asymmetric.setup(search_config.notes, regenerate=False)
|
||||
model.notes_search = asymmetric.setup(search_config.org, regenerate=False)
|
||||
user_query = "How to git install application? -clone"
|
||||
|
||||
# Act
|
||||
|
|
Loading…
Reference in a new issue