From cdcee89ae52347d35678ddb072d023fbfcb91b68 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 4 Sep 2022 02:12:56 +0300 Subject: [PATCH] Wrap words in quotes to trigger explicit filter from query - Do not run the more expensive explicit filter until the word to be filtered is completed by user. This requires an end sequence marker to identify end of explicit word filter to trigger filtering - Space isn't a good enough delimiter as the explicit filter could be at the end of the query in which case no space --- src/search_filter/explicit_filter.py | 4 ++-- tests/test_client.py | 4 ++-- tests/test_explicit_filter.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/search_filter/explicit_filter.py b/src/search_filter/explicit_filter.py index a719bcdd..e3b5bb9f 100644 --- a/src/search_filter/explicit_filter.py +++ b/src/search_filter/explicit_filter.py @@ -17,8 +17,8 @@ logger = logging.getLogger(__name__) class ExplicitFilter: # Filter Regex - required_regex = r'\+([^\s]+) ?' - blocked_regex = r'\-([^\s]+) ?' + required_regex = r'\+"(\w+)" ?' + blocked_regex = r'\-"(\w+)" ?' def __init__(self, filter_directory, search_type: SearchType, entry_key='raw'): self.filter_file = resolve_absolute_path(filter_directory / f"{search_type.name.lower()}_explicit_filter_entry_word_sets.pkl") diff --git a/tests/test_client.py b/tests/test_client.py index a80b2fa1..e9b632be 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -133,7 +133,7 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig): # Arrange model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False) - user_query = "How to git install application? +Emacs" + user_query = 'How to git install application? +"Emacs"' # Act response = client.get(f"/search?q={user_query}&n=1&t=org") @@ -149,7 +149,7 @@ def test_notes_search_with_include_filter(content_config: ContentConfig, search_ def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig): # Arrange model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False) - user_query = "How to git install application? -clone" + user_query = 'How to git install application? -"clone"' # Act response = client.get(f"/search?q={user_query}&n=1&t=org") diff --git a/tests/test_explicit_filter.py b/tests/test_explicit_filter.py index 9d4c022a..5f34b0ac 100644 --- a/tests/test_explicit_filter.py +++ b/tests/test_explicit_filter.py @@ -27,7 +27,7 @@ def test_explicit_exclude_filter(tmp_path): # Arrange explicit_filter = ExplicitFilter(tmp_path, SearchType.Org) embeddings, entries = arrange_content() - q_with_exclude_filter = 'head -exclude_word tail' + q_with_exclude_filter = 'head -"exclude_word" tail' # Act can_filter = explicit_filter.can_filter(q_with_exclude_filter) @@ -44,7 +44,7 @@ def test_explicit_include_filter(tmp_path): # Arrange explicit_filter = ExplicitFilter(tmp_path, SearchType.Org) embeddings, entries = arrange_content() - query_with_include_filter = 'head +include_word tail' + query_with_include_filter = 'head +"include_word" tail' # Act can_filter = explicit_filter.can_filter(query_with_include_filter) @@ -61,7 +61,7 @@ def test_explicit_include_and_exclude_filter(tmp_path): # Arrange explicit_filter = ExplicitFilter(tmp_path, SearchType.Org) embeddings, entries = arrange_content() - query_with_include_and_exclude_filter = 'head +include_word -exclude_word tail' + query_with_include_and_exclude_filter = 'head +"include_word" -"exclude_word" tail' # Act can_filter = explicit_filter.can_filter(query_with_include_and_exclude_filter)