From 1bfe9c4ef297bec37596ac007640d4552401a6a9 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Mon, 12 Sep 2022 16:42:41 +0300 Subject: [PATCH] Handle filter only queries. Short-circuit and return filtered results - For queries with only filters in them short-circuit and return filtered results. No need to run semantic search, re-ranking. - Add client test for filter only query and quote query in client tests --- src/search_type/text_search.py | 5 +++++ tests/test_client.py | 29 ++++++++++++++++++++++++----- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/search_type/text_search.py b/src/search_type/text_search.py index cd669094..90c822ac 100644 --- a/src/search_type/text_search.py +++ b/src/search_type/text_search.py @@ -112,6 +112,11 @@ def query(raw_query: str, model: TextSearchModel, rank_results=False): if entries is None or len(entries) == 0: return [], [] + # If query only had filters it'll be empty now. So short-circuit and return results. + if query.strip() == "": + hits = [{"corpus_id": id, "score": 1.0} for id, _ in enumerate(entries)] + return hits, entries + # Encode the query using the bi-encoder start = time.time() question_embedding = model.bi_encoder.encode([query], convert_to_tensor=True, device=state.device) diff --git a/tests/test_client.py b/tests/test_client.py index b167bce0..d405a044 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,18 +1,20 @@ # Standard Modules from io import BytesIO from PIL import Image +from urllib.parse import quote + # External Packages from fastapi.testclient import TestClient # Internal Packages from src.main import app -from src.utils.config import SearchType from src.utils.state import model, config from src.search_type import text_search, image_search from src.utils.rawconfig import ContentConfig, SearchConfig from src.processor.org_mode.org_to_jsonl import org_to_jsonl from src.search_filter.word_filter import WordFilter +from src.search_filter.file_filter import FileFilter # Arrange @@ -23,7 +25,7 @@ client = TestClient(app) # ---------------------------------------------------------------------------------------------------- def test_search_with_invalid_content_type(): # Arrange - user_query = "How to call Khoj from Emacs?" + user_query = quote("How to call Khoj from Emacs?") # Act response = client.get(f"/search?q={user_query}&t=invalid_content_type") @@ -117,7 +119,7 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig def test_notes_search(content_config: ContentConfig, search_config: SearchConfig): # Arrange model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False) - user_query = "How to git install application?" + user_query = quote("How to git install application?") # Act response = client.get(f"/search?q={user_query}&n=1&t=org&r=true") @@ -129,12 +131,29 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig assert "git clone" in search_result +# ---------------------------------------------------------------------------------------------------- +def test_notes_search_with_only_filters(content_config: ContentConfig, search_config: SearchConfig): + # Arrange + filters = [WordFilter(), FileFilter()] + model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters) + user_query = quote('+"Emacs" file:"*.org"') + + # Act + response = client.get(f"/search?q={user_query}&n=1&t=org") + + # Assert + assert response.status_code == 200 + # assert actual_data contains word "Emacs" + search_result = response.json()[0]["entry"] + assert "Emacs" in search_result + + # ---------------------------------------------------------------------------------------------------- def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig): # Arrange filters = [WordFilter()] model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters) - user_query = 'How to git install application? +"Emacs"' + user_query = quote('How to git install application? +"Emacs"') # Act response = client.get(f"/search?q={user_query}&n=1&t=org") @@ -151,7 +170,7 @@ def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_ # Arrange filters = [WordFilter()] model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters) - user_query = 'How to git install application? -"clone"' + user_query = quote('How to git install application? -"clone"') # Act response = client.get(f"/search?q={user_query}&n=1&t=org")