Wrap words in quotes to trigger explicit filter from query

- Do not run the more expensive explicit filter until the word to be
  filtered is completed by user. This requires an end sequence marker
  to identify end of explicit word filter to trigger filtering

- Space isn't a good enough delimiter as the explicit filter could be
  at the end of the query in which case no space
This commit is contained in:
Debanjum Singh Solanky 2022-09-04 02:12:56 +03:00
parent 8d9f507df3
commit cdcee89ae5
3 changed files with 7 additions and 7 deletions

View file

@ -17,8 +17,8 @@ logger = logging.getLogger(__name__)
class ExplicitFilter: class ExplicitFilter:
# Filter Regex # Filter Regex
required_regex = r'\+([^\s]+) ?' required_regex = r'\+"(\w+)" ?'
blocked_regex = r'\-([^\s]+) ?' blocked_regex = r'\-"(\w+)" ?'
def __init__(self, filter_directory, search_type: SearchType, entry_key='raw'): def __init__(self, filter_directory, search_type: SearchType, entry_key='raw'):
self.filter_file = resolve_absolute_path(filter_directory / f"{search_type.name.lower()}_explicit_filter_entry_word_sets.pkl") self.filter_file = resolve_absolute_path(filter_directory / f"{search_type.name.lower()}_explicit_filter_entry_word_sets.pkl")

View file

@ -133,7 +133,7 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig
def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig): def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
# Arrange # Arrange
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False) model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
user_query = "How to git install application? +Emacs" user_query = 'How to git install application? +"Emacs"'
# Act # Act
response = client.get(f"/search?q={user_query}&n=1&t=org") response = client.get(f"/search?q={user_query}&n=1&t=org")
@ -149,7 +149,7 @@ def test_notes_search_with_include_filter(content_config: ContentConfig, search_
def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig): def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig):
# Arrange # Arrange
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False) model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
user_query = "How to git install application? -clone" user_query = 'How to git install application? -"clone"'
# Act # Act
response = client.get(f"/search?q={user_query}&n=1&t=org") response = client.get(f"/search?q={user_query}&n=1&t=org")

View file

@ -27,7 +27,7 @@ def test_explicit_exclude_filter(tmp_path):
# Arrange # Arrange
explicit_filter = ExplicitFilter(tmp_path, SearchType.Org) explicit_filter = ExplicitFilter(tmp_path, SearchType.Org)
embeddings, entries = arrange_content() embeddings, entries = arrange_content()
q_with_exclude_filter = 'head -exclude_word tail' q_with_exclude_filter = 'head -"exclude_word" tail'
# Act # Act
can_filter = explicit_filter.can_filter(q_with_exclude_filter) can_filter = explicit_filter.can_filter(q_with_exclude_filter)
@ -44,7 +44,7 @@ def test_explicit_include_filter(tmp_path):
# Arrange # Arrange
explicit_filter = ExplicitFilter(tmp_path, SearchType.Org) explicit_filter = ExplicitFilter(tmp_path, SearchType.Org)
embeddings, entries = arrange_content() embeddings, entries = arrange_content()
query_with_include_filter = 'head +include_word tail' query_with_include_filter = 'head +"include_word" tail'
# Act # Act
can_filter = explicit_filter.can_filter(query_with_include_filter) can_filter = explicit_filter.can_filter(query_with_include_filter)
@ -61,7 +61,7 @@ def test_explicit_include_and_exclude_filter(tmp_path):
# Arrange # Arrange
explicit_filter = ExplicitFilter(tmp_path, SearchType.Org) explicit_filter = ExplicitFilter(tmp_path, SearchType.Org)
embeddings, entries = arrange_content() embeddings, entries = arrange_content()
query_with_include_and_exclude_filter = 'head +include_word -exclude_word tail' query_with_include_and_exclude_filter = 'head +"include_word" -"exclude_word" tail'
# Act # Act
can_filter = explicit_filter.can_filter(query_with_include_and_exclude_filter) can_filter = explicit_filter.can_filter(query_with_include_and_exclude_filter)