From 97a6dfaa1efdcf2a4d98656468c2bf77847d7999 Mon Sep 17 00:00:00 2001 From: Saba Date: Sat, 11 Dec 2021 14:13:14 -0500 Subject: [PATCH 1/3] Use default value False for verbose parameter, and small changes Pass config as parameter to initialize_search, change name of API methods to handle config CRUD operations, and initalize config to FullConfig --- src/main.py | 8 ++++---- src/search_type/asymmetric.py | 2 +- src/search_type/image_search.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main.py b/src/main.py index bfd6fafb..4c6783f6 100644 --- a/src/main.py +++ b/src/main.py @@ -18,9 +18,9 @@ from src.utils.rawconfig import FullConfig from src.processor.conversation.gpt import converse, message_to_log, message_to_prompt, understand # Application Global State +config = FullConfig() model = SearchModels() processor_config = ProcessorConfigModel() -config = {} config_file = "" verbose = 0 app = FastAPI() @@ -33,11 +33,11 @@ def ui(request: Request): return templates.TemplateResponse("config.html", context={'request': request}) @app.get('/config', response_model=FullConfig) -def config(): +def config_data(): return config @app.post('/config') -async def config(updated_config: FullConfig): +async def config_data(updated_config: FullConfig): global config config = updated_config with open(config_file, 'w') as outfile: @@ -92,7 +92,7 @@ def search(q: str, n: Optional[int] = 5, t: Optional[SearchType] = None): @app.get('/regenerate') def regenerate(t: Optional[SearchType] = None): - initialize_search(regenerate=True, t=t) + initialize_search(config, regenerate=True, t=t) return {'status': 'ok', 'message': 'regeneration completed'} diff --git a/src/search_type/asymmetric.py b/src/search_type/asymmetric.py index bdf7ddff..416cf7e2 100644 --- a/src/search_type/asymmetric.py +++ b/src/search_type/asymmetric.py @@ -149,7 +149,7 @@ def collate_results(hits, entries, count=5): in hits[0:count]] -def setup(config: TextSearchConfig, regenerate: bool, verbose: bool) -> TextSearchModel: +def setup(config: TextSearchConfig, regenerate: bool, verbose: bool=False) -> TextSearchModel: # Initialize Model bi_encoder, cross_encoder, top_k = initialize_model() diff --git a/src/search_type/image_search.py b/src/search_type/image_search.py index f0025328..f8af7d8e 100644 --- a/src/search_type/image_search.py +++ b/src/search_type/image_search.py @@ -154,7 +154,7 @@ def collate_results(hits, image_names, image_directory, count=5): in hits[0:count]] -def setup(config: ImageSearchConfig, regenerate: bool, verbose: bool) -> ImageSearchModel: +def setup(config: ImageSearchConfig, regenerate: bool, verbose: bool=False) -> ImageSearchModel: # Initialize Model encoder = initialize_model() From 9ebf00e29bcf134d7745707ba9ec52ca9ee4ff7d Mon Sep 17 00:00:00 2001 From: Saba Date: Sat, 11 Dec 2021 14:13:37 -0500 Subject: [PATCH 2/3] Add instructions for installing exiftool to README (for Ubuntu only) --- README.org | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.org b/README.org index e669dafa..272626c7 100644 --- a/README.org +++ b/README.org @@ -15,6 +15,10 @@ conda env create -f environment.yml conda activate semantic-search #+end_src + *** Install Environment Dependencies + #+begin_src shell + sudo apt-get -y install libimage-exiftool-perl + #+end_src ** Configure Configure application search types and their underlying data source/files in ~sample_config.yml~ From ba8dc9ed5fec3454e3b5b403cb222bf8b033deaf Mon Sep 17 00:00:00 2001 From: Saba Date: Sat, 11 Dec 2021 14:14:31 -0500 Subject: [PATCH 3/3] Update the search_config instantiated for tests in conftest --- tests/conftest.py | 12 ++++++------ tests/test_client.py | 28 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index fcff1510..48c0cf51 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,7 +14,7 @@ def model_dir(tmp_path_factory): # Generate Image Embeddings from Test Images search_config = ContentTypeConfig() search_config.image = ImageSearchConfig( - input_directory = Path('tests/data'), + input_directory = 'tests/data', embeddings_file = model_dir.joinpath('.image_embeddings.pt'), batch_size = 10, use_xmp_metadata = False) @@ -23,12 +23,12 @@ def model_dir(tmp_path_factory): # Generate Notes Embeddings from Test Notes search_config.org = TextSearchConfig( - input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')], + input_files = ['tests/data/main_readme.org', 'tests/data/interface_emacs_readme.org'], input_filter = None, compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'), embeddings_file = model_dir.joinpath('.note_embeddings.pt')) - asymmetric.setup(search_config.notes, regenerate=False, verbose=True) + asymmetric.setup(search_config.org, regenerate=False, verbose=True) return model_dir @@ -37,14 +37,14 @@ def model_dir(tmp_path_factory): def search_config(model_dir): search_config = ContentTypeConfig() search_config.org = TextSearchConfig( - input_files = [Path('tests/data/main_readme.org'), Path('tests/data/interface_emacs_readme.org')], + input_files = ['tests/data/main_readme.org', 'tests/data/interface_emacs_readme.org'], input_filter = None, compressed_jsonl = model_dir.joinpath('.notes.jsonl.gz'), embeddings_file = model_dir.joinpath('.note_embeddings.pt')) search_config.image = ImageSearchConfig( - input_directory = Path('tests/data'), - embeddings_file = Path('tests/data/.image_embeddings.pt'), + input_directory = 'tests/data', + embeddings_file = 'tests/data/.image_embeddings.pt', batch_size = 10, use_xmp_metadata = False) diff --git a/tests/test_client.py b/tests/test_client.py index bf8815fa..f363a243 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -8,13 +8,12 @@ from fastapi.testclient import TestClient from src.main import app, model, config from src.search_type import asymmetric, image_search from src.utils.helpers import resolve_absolute_path -from src.utils.rawconfig import FullConfig +from src.utils.rawconfig import ContentTypeConfig # Arrange # ---------------------------------------------------------------------------------------------------- client = TestClient(app) -config = FullConfig() # Test # ---------------------------------------------------------------------------------------------------- @@ -30,9 +29,10 @@ def test_search_with_invalid_search_type(): # ---------------------------------------------------------------------------------------------------- -def test_search_with_valid_search_type(search_config): +def test_search_with_valid_search_type(search_config: ContentTypeConfig): # Arrange - config.content_type.image = search_config.image + config.content_type = search_config + # config.content_type.image = search_config.image for search_type in ["notes", "ledger", "music", "image"]: # Act response = client.get(f"/search?q=random&t={search_type}") @@ -50,9 +50,9 @@ def test_regenerate_with_invalid_search_type(): # ---------------------------------------------------------------------------------------------------- -def test_regenerate_with_valid_search_type(search_config): +def test_regenerate_with_valid_search_type(search_config: ContentTypeConfig): # Arrange - config.content_type.image = search_config.image + config.content_type = search_config for search_type in ["notes", "ledger", "music", "image"]: # Act response = client.get(f"/regenerate?t={search_type}") @@ -61,9 +61,9 @@ def test_regenerate_with_valid_search_type(search_config): # ---------------------------------------------------------------------------------------------------- -def test_image_search(search_config): +def test_image_search(search_config: ContentTypeConfig): # Arrange - config.content_type.image = search_config.image + config.content_type = search_config model.image_search = image_search.setup(search_config.image, regenerate=False) query_expected_image_pairs = [("brown kitten next to fallen plant", "kitten_park.jpg"), ("a horse and dog on a leash", "horse_dog.jpg"), @@ -83,9 +83,9 @@ def test_image_search(search_config): # ---------------------------------------------------------------------------------------------------- -def test_notes_search(search_config): +def test_notes_search(search_config: ContentTypeConfig): # Arrange - model.notes_search = asymmetric.setup(search_config.notes, regenerate=False) + model.notes_search = asymmetric.setup(search_config.org, regenerate=False) user_query = "How to git install application?" # Act @@ -99,9 +99,9 @@ def test_notes_search(search_config): # ---------------------------------------------------------------------------------------------------- -def test_notes_search_with_include_filter(search_config): +def test_notes_search_with_include_filter(search_config: ContentTypeConfig): # Arrange - model.notes_search = asymmetric.setup(search_config.notes, regenerate=False) + model.notes_search = asymmetric.setup(search_config.org, regenerate=False) user_query = "How to git install application? +Emacs" # Act @@ -115,9 +115,9 @@ def test_notes_search_with_include_filter(search_config): # ---------------------------------------------------------------------------------------------------- -def test_notes_search_with_exclude_filter(search_config): +def test_notes_search_with_exclude_filter(search_config: ContentTypeConfig): # Arrange - model.notes_search = asymmetric.setup(search_config.notes, regenerate=False) + model.notes_search = asymmetric.setup(search_config.org, regenerate=False) user_query = "How to git install application? -clone" # Act