mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 08:04:21 +00:00
Merge branch 'saba/fix-unit-tests'
This commit is contained in:
commit
4832a107d7
6 changed files with 30 additions and 26 deletions
|
@ -15,6 +15,10 @@
|
||||||
conda env create -f environment.yml
|
conda env create -f environment.yml
|
||||||
conda activate semantic-search
|
conda activate semantic-search
|
||||||
#+end_src
|
#+end_src
|
||||||
|
*** Install Environment Dependencies
|
||||||
|
#+begin_src shell
|
||||||
|
sudo apt-get -y install libimage-exiftool-perl
|
||||||
|
#+end_src
|
||||||
|
|
||||||
** Configure
|
** Configure
|
||||||
Configure application search types and their underlying data source/files in ~sample_config.yml~
|
Configure application search types and their underlying data source/files in ~sample_config.yml~
|
||||||
|
|
|
@ -18,9 +18,9 @@ from src.utils.rawconfig import FullConfig
|
||||||
from src.processor.conversation.gpt import converse, message_to_log, message_to_prompt, understand
|
from src.processor.conversation.gpt import converse, message_to_log, message_to_prompt, understand
|
||||||
|
|
||||||
# Application Global State
|
# Application Global State
|
||||||
|
config = FullConfig()
|
||||||
model = SearchModels()
|
model = SearchModels()
|
||||||
processor_config = ProcessorConfigModel()
|
processor_config = ProcessorConfigModel()
|
||||||
config = {}
|
|
||||||
config_file = ""
|
config_file = ""
|
||||||
verbose = 0
|
verbose = 0
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
@ -33,11 +33,11 @@ def ui(request: Request):
|
||||||
return templates.TemplateResponse("config.html", context={'request': request})
|
return templates.TemplateResponse("config.html", context={'request': request})
|
||||||
|
|
||||||
@app.get('/config', response_model=FullConfig)
|
@app.get('/config', response_model=FullConfig)
|
||||||
def config():
|
def config_data():
|
||||||
return config
|
return config
|
||||||
|
|
||||||
@app.post('/config')
|
@app.post('/config')
|
||||||
async def config(updated_config: FullConfig):
|
async def config_data(updated_config: FullConfig):
|
||||||
global config
|
global config
|
||||||
config = updated_config
|
config = updated_config
|
||||||
with open(config_file, 'w') as outfile:
|
with open(config_file, 'w') as outfile:
|
||||||
|
@ -92,7 +92,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None):
|
||||||
|
|
||||||
@app.get('/regenerate')
|
@app.get('/regenerate')
|
||||||
def regenerate(t: Optional[SearchType] = None):
|
def regenerate(t: Optional[SearchType] = None):
|
||||||
initialize_search(regenerate=True, t=t)
|
initialize_search(config, regenerate=True, t=t)
|
||||||
return {'status': 'ok', 'message': 'regeneration completed'}
|
return {'status': 'ok', 'message': 'regeneration completed'}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -149,7 +149,7 @@ def collate_results(hits, entries, count=5):
|
||||||
in hits[0:count]]
|
in hits[0:count]]
|
||||||
|
|
||||||
|
|
||||||
def setup(config: TextSearchConfig, regenerate: bool, verbose: bool) -> TextSearchModel:
|
def setup(config: TextSearchConfig, regenerate: bool, verbose: bool=False) -> TextSearchModel:
|
||||||
# Initialize Model
|
# Initialize Model
|
||||||
bi_encoder, cross_encoder, top_k = initialize_model()
|
bi_encoder, cross_encoder, top_k = initialize_model()
|
||||||
|
|
||||||
|
|
|
@ -154,7 +154,7 @@ def collate_results(hits, image_names, image_directory, count=5):
|
||||||
in hits[0:count]]
|
in hits[0:count]]
|
||||||
|
|
||||||
|
|
||||||
def setup(config: ImageSearchConfig, regenerate: bool, verbose: bool) -> ImageSearchModel:
|
def setup(config: ImageSearchConfig, regenerate: bool, verbose: bool=False) -> ImageSearchModel:
|
||||||
# Initialize Model
|
# Initialize Model
|
||||||
encoder = initialize_model()
|
encoder = initialize_model()
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ def model_dir(tmp_path_factory):
|
||||||
# Generate Image Embeddings from Test Images
|
# Generate Image Embeddings from Test Images
|
||||||
search_config = ContentTypeConfig()
|
search_config = ContentTypeConfig()
|
||||||
search_config.image = ImageSearchConfig(
|
search_config.image = ImageSearchConfig(
|
||||||
input_directory = Path('tests/data'),
|
input_directory = 'tests/data',
|
||||||
embeddings_file = model_dir.joinpath('.image_embeddings.pt'),
|
embeddings_file = model_dir.joinpath('.image_embeddings.pt'),
|
||||||
batch_size = 10,
|
batch_size = 10,
|
||||||
use_xmp_metadata = False)
|
use_xmp_metadata = False)
|
||||||
|
@ -23,12 +23,12 @@ def model_dir(tmp_path_factory):
|
||||||
|
|
||||||
# Generate Notes Embeddings from Test Notes
|
# Generate Notes Embeddings from Test Notes
|
||||||
search_config.org = TextSearchConfig(
|
search_config.org = TextSearchConfig(
|
||||||
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
input_files = ['tests/data/main_readme.org', 'tests/data/interface_emacs_readme.org'],
|
||||||
input_filter = None,
|
input_filter = None,
|
||||||
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'),
|
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'),
|
||||||
embeddings_file = model_dir.joinpath('.note_embeddings.pt'))
|
embeddings_file = model_dir.joinpath('.note_embeddings.pt'))
|
||||||
|
|
||||||
asymmetric.setup(search_config.notes, regenerate=False, verbose=True)
|
asymmetric.setup(search_config.org, regenerate=False, verbose=True)
|
||||||
|
|
||||||
return model_dir
|
return model_dir
|
||||||
|
|
||||||
|
@ -37,14 +37,14 @@ def model_dir(tmp_path_factory):
|
||||||
def search_config(model_dir):
|
def search_config(model_dir):
|
||||||
search_config = ContentTypeConfig()
|
search_config = ContentTypeConfig()
|
||||||
search_config.org = TextSearchConfig(
|
search_config.org = TextSearchConfig(
|
||||||
input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')],
|
input_files = ['tests/data/main_readme.org', 'tests/data/interface_emacs_readme.org'],
|
||||||
input_filter = None,
|
input_filter = None,
|
||||||
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'),
|
compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'),
|
||||||
embeddings_file = model_dir.joinpath('.note_embeddings.pt'))
|
embeddings_file = model_dir.joinpath('.note_embeddings.pt'))
|
||||||
|
|
||||||
search_config.image = ImageSearchConfig(
|
search_config.image = ImageSearchConfig(
|
||||||
input_directory = Path('tests/data'),
|
input_directory = 'tests/data',
|
||||||
embeddings_file = Path('tests/data/.image_embeddings.pt'),
|
embeddings_file = 'tests/data/.image_embeddings.pt',
|
||||||
batch_size = 10,
|
batch_size = 10,
|
||||||
use_xmp_metadata = False)
|
use_xmp_metadata = False)
|
||||||
|
|
||||||
|
|
|
@ -8,13 +8,12 @@ from fastapi.testclient import TestClient
|
||||||
from src.main import app, model, config
|
from src.main import app, model, config
|
||||||
from src.search_type import asymmetric, image_search
|
from src.search_type import asymmetric, image_search
|
||||||
from src.utils.helpers import resolve_absolute_path
|
from src.utils.helpers import resolve_absolute_path
|
||||||
from src.utils.rawconfig import FullConfig
|
from src.utils.rawconfig import ContentTypeConfig
|
||||||
|
|
||||||
|
|
||||||
# Arrange
|
# Arrange
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
client = TestClient(app)
|
client = TestClient(app)
|
||||||
config = FullConfig()
|
|
||||||
|
|
||||||
# Test
|
# Test
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@ -30,9 +29,10 @@ def test_search_with_invalid_search_type():
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_search_with_valid_search_type(search_config):
|
def test_search_with_valid_search_type(search_config: ContentTypeConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
config.content_type.image = search_config.image
|
config.content_type = search_config
|
||||||
|
# config.content_type.image = search_config.image
|
||||||
for search_type in ["notes", "ledger", "music", "image"]:
|
for search_type in ["notes", "ledger", "music", "image"]:
|
||||||
# Act
|
# Act
|
||||||
response = client.get(f"/search?q=random&t={search_type}")
|
response = client.get(f"/search?q=random&t={search_type}")
|
||||||
|
@ -50,9 +50,9 @@ def test_regenerate_with_invalid_search_type():
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_regenerate_with_valid_search_type(search_config):
|
def test_regenerate_with_valid_search_type(search_config: ContentTypeConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
config.content_type.image = search_config.image
|
config.content_type = search_config
|
||||||
for search_type in ["notes", "ledger", "music", "image"]:
|
for search_type in ["notes", "ledger", "music", "image"]:
|
||||||
# Act
|
# Act
|
||||||
response = client.get(f"/regenerate?t={search_type}")
|
response = client.get(f"/regenerate?t={search_type}")
|
||||||
|
@ -61,9 +61,9 @@ def test_regenerate_with_valid_search_type(search_config):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_image_search(search_config):
|
def test_image_search(search_config: ContentTypeConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
config.content_type.image = search_config.image
|
config.content_type = search_config
|
||||||
model.image_search = image_search.setup(search_config.image, regenerate=False)
|
model.image_search = image_search.setup(search_config.image, regenerate=False)
|
||||||
query_expected_image_pairs = [("brown kitten next to fallen plant", "kitten_park.jpg"),
|
query_expected_image_pairs = [("brown kitten next to fallen plant", "kitten_park.jpg"),
|
||||||
("a horse and dog on a leash", "horse_dog.jpg"),
|
("a horse and dog on a leash", "horse_dog.jpg"),
|
||||||
|
@ -83,9 +83,9 @@ def test_image_search(search_config):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_search(search_config):
|
def test_notes_search(search_config: ContentTypeConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.notes_search = asymmetric.setup(search_config.notes, regenerate=False)
|
model.notes_search = asymmetric.setup(search_config.org, regenerate=False)
|
||||||
user_query = "How to git install application?"
|
user_query = "How to git install application?"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
@ -99,9 +99,9 @@ def test_notes_search(search_config):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_search_with_include_filter(search_config):
|
def test_notes_search_with_include_filter(search_config: ContentTypeConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.notes_search = asymmetric.setup(search_config.notes, regenerate=False)
|
model.notes_search = asymmetric.setup(search_config.org, regenerate=False)
|
||||||
user_query = "How to git install application? +Emacs"
|
user_query = "How to git install application? +Emacs"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
@ -115,9 +115,9 @@ def test_notes_search_with_include_filter(search_config):
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_search_with_exclude_filter(search_config):
|
def test_notes_search_with_exclude_filter(search_config: ContentTypeConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.notes_search = asymmetric.setup(search_config.notes, regenerate=False)
|
model.notes_search = asymmetric.setup(search_config.org, regenerate=False)
|
||||||
user_query = "How to git install application? -clone"
|
user_query = "How to git install application? -clone"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
|
Loading…
Add table
Reference in a new issue