mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Wrap words in quotes to trigger explicit filter from query
- Do not run the more expensive explicit filter until the word to be filtered is completed by user. This requires an end sequence marker to identify end of explicit word filter to trigger filtering - Space isn't a good enough delimiter as the explicit filter could be at the end of the query in which case no space
This commit is contained in:
parent
8d9f507df3
commit
cdcee89ae5
3 changed files with 7 additions and 7 deletions
|
@ -17,8 +17,8 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class ExplicitFilter:
|
class ExplicitFilter:
|
||||||
# Filter Regex
|
# Filter Regex
|
||||||
required_regex = r'\+([^\s]+) ?'
|
required_regex = r'\+"(\w+)" ?'
|
||||||
blocked_regex = r'\-([^\s]+) ?'
|
blocked_regex = r'\-"(\w+)" ?'
|
||||||
|
|
||||||
def __init__(self, filter_directory, search_type: SearchType, entry_key='raw'):
|
def __init__(self, filter_directory, search_type: SearchType, entry_key='raw'):
|
||||||
self.filter_file = resolve_absolute_path(filter_directory / f"{search_type.name.lower()}_explicit_filter_entry_word_sets.pkl")
|
self.filter_file = resolve_absolute_path(filter_directory / f"{search_type.name.lower()}_explicit_filter_entry_word_sets.pkl")
|
||||||
|
|
|
@ -133,7 +133,7 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig
|
||||||
def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
|
def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
||||||
user_query = "How to git install application? +Emacs"
|
user_query = 'How to git install application? +"Emacs"'
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
response = client.get(f"/search?q={user_query}&n=1&t=org")
|
response = client.get(f"/search?q={user_query}&n=1&t=org")
|
||||||
|
@ -149,7 +149,7 @@ def test_notes_search_with_include_filter(content_config: ContentConfig, search_
|
||||||
def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig):
|
def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
||||||
user_query = "How to git install application? -clone"
|
user_query = 'How to git install application? -"clone"'
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
response = client.get(f"/search?q={user_query}&n=1&t=org")
|
response = client.get(f"/search?q={user_query}&n=1&t=org")
|
||||||
|
|
|
@ -27,7 +27,7 @@ def test_explicit_exclude_filter(tmp_path):
|
||||||
# Arrange
|
# Arrange
|
||||||
explicit_filter = ExplicitFilter(tmp_path, SearchType.Org)
|
explicit_filter = ExplicitFilter(tmp_path, SearchType.Org)
|
||||||
embeddings, entries = arrange_content()
|
embeddings, entries = arrange_content()
|
||||||
q_with_exclude_filter = 'head -exclude_word tail'
|
q_with_exclude_filter = 'head -"exclude_word" tail'
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
can_filter = explicit_filter.can_filter(q_with_exclude_filter)
|
can_filter = explicit_filter.can_filter(q_with_exclude_filter)
|
||||||
|
@ -44,7 +44,7 @@ def test_explicit_include_filter(tmp_path):
|
||||||
# Arrange
|
# Arrange
|
||||||
explicit_filter = ExplicitFilter(tmp_path, SearchType.Org)
|
explicit_filter = ExplicitFilter(tmp_path, SearchType.Org)
|
||||||
embeddings, entries = arrange_content()
|
embeddings, entries = arrange_content()
|
||||||
query_with_include_filter = 'head +include_word tail'
|
query_with_include_filter = 'head +"include_word" tail'
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
can_filter = explicit_filter.can_filter(query_with_include_filter)
|
can_filter = explicit_filter.can_filter(query_with_include_filter)
|
||||||
|
@ -61,7 +61,7 @@ def test_explicit_include_and_exclude_filter(tmp_path):
|
||||||
# Arrange
|
# Arrange
|
||||||
explicit_filter = ExplicitFilter(tmp_path, SearchType.Org)
|
explicit_filter = ExplicitFilter(tmp_path, SearchType.Org)
|
||||||
embeddings, entries = arrange_content()
|
embeddings, entries = arrange_content()
|
||||||
query_with_include_and_exclude_filter = 'head +include_word -exclude_word tail'
|
query_with_include_and_exclude_filter = 'head +"include_word" -"exclude_word" tail'
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
can_filter = explicit_filter.can_filter(query_with_include_and_exclude_filter)
|
can_filter = explicit_filter.can_filter(query_with_include_and_exclude_filter)
|
||||||
|
|
Loading…
Reference in a new issue