mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-30 19:03:01 +01:00
Update Tests to Configure Filters and Setup Text Search
This commit is contained in:
parent
c7de57b8ea
commit
30c3eb372a
4 changed files with 19 additions and 17 deletions
|
@ -3,9 +3,9 @@ import pytest
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from src.search_type import image_search, text_search
|
from src.search_type import image_search, text_search
|
||||||
|
from src.utils.config import SearchType
|
||||||
from src.utils.rawconfig import ContentConfig, TextContentConfig, ImageContentConfig, SearchConfig, TextSearchConfig, ImageSearchConfig
|
from src.utils.rawconfig import ContentConfig, TextContentConfig, ImageContentConfig, SearchConfig, TextSearchConfig, ImageSearchConfig
|
||||||
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
|
from src.processor.org_mode.org_to_jsonl import org_to_jsonl
|
||||||
from src.utils import state
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope='session')
|
@pytest.fixture(scope='session')
|
||||||
|
@ -46,7 +46,7 @@ def model_dir(search_config):
|
||||||
batch_size = 10,
|
batch_size = 10,
|
||||||
use_xmp_metadata = False)
|
use_xmp_metadata = False)
|
||||||
|
|
||||||
image_search.setup(content_config.image, search_config.image, regenerate=False, verbose=True)
|
image_search.setup(content_config.image, search_config.image, regenerate=False)
|
||||||
|
|
||||||
# Generate Notes Embeddings from Test Notes
|
# Generate Notes Embeddings from Test Notes
|
||||||
content_config.org = TextContentConfig(
|
content_config.org = TextContentConfig(
|
||||||
|
@ -55,7 +55,7 @@ def model_dir(search_config):
|
||||||
compressed_jsonl = model_dir.joinpath('notes.jsonl.gz'),
|
compressed_jsonl = model_dir.joinpath('notes.jsonl.gz'),
|
||||||
embeddings_file = model_dir.joinpath('note_embeddings.pt'))
|
embeddings_file = model_dir.joinpath('note_embeddings.pt'))
|
||||||
|
|
||||||
text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, verbose=True)
|
text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
||||||
|
|
||||||
return model_dir
|
return model_dir
|
||||||
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ import pytest
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from src.main import app
|
from src.main import app
|
||||||
|
from src.utils.config import SearchType
|
||||||
from src.utils.state import model, config
|
from src.utils.state import model, config
|
||||||
from src.search_type import text_search, image_search
|
from src.search_type import text_search, image_search
|
||||||
from src.utils.rawconfig import ContentConfig, SearchConfig
|
from src.utils.rawconfig import ContentConfig, SearchConfig
|
||||||
|
@ -115,7 +116,7 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_search(content_config: ContentConfig, search_config: SearchConfig):
|
def test_notes_search(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
|
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
||||||
user_query = "How to git install application?"
|
user_query = "How to git install application?"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
@ -131,7 +132,7 @@ def test_notes_search(content_config: ContentConfig, search_config: SearchConfig
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
|
def test_notes_search_with_include_filter(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
|
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
||||||
user_query = "How to git install application? +Emacs"
|
user_query = "How to git install application? +Emacs"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
@ -147,7 +148,7 @@ def test_notes_search_with_include_filter(content_config: ContentConfig, search_
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig):
|
def test_notes_search_with_exclude_filter(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
|
model.orgmode_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
||||||
user_query = "How to git install application? -clone"
|
user_query = "How to git install application? -clone"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
|
|
@ -18,37 +18,37 @@ def test_date_filter():
|
||||||
{'compiled': '', 'raw': 'Entry with date:1984-04-02'}]
|
{'compiled': '', 'raw': 'Entry with date:1984-04-02'}]
|
||||||
|
|
||||||
q_with_no_date_filter = 'head tail'
|
q_with_no_date_filter = 'head tail'
|
||||||
ret_query, ret_entries, ret_emb = DateFilter().filter(q_with_no_date_filter, entries.copy(), embeddings)
|
ret_query, ret_entries, ret_emb = DateFilter().apply(q_with_no_date_filter, entries.copy(), embeddings)
|
||||||
assert ret_query == 'head tail'
|
assert ret_query == 'head tail'
|
||||||
assert len(ret_emb) == 3
|
assert len(ret_emb) == 3
|
||||||
assert ret_entries == entries
|
assert ret_entries == entries
|
||||||
|
|
||||||
q_with_dtrange_non_overlapping_at_boundary = 'head dt>"1984-04-01" dt<"1984-04-02" tail'
|
q_with_dtrange_non_overlapping_at_boundary = 'head dt>"1984-04-01" dt<"1984-04-02" tail'
|
||||||
ret_query, ret_entries, ret_emb = DateFilter().filter(q_with_dtrange_non_overlapping_at_boundary, entries.copy(), embeddings)
|
ret_query, ret_entries, ret_emb = DateFilter().apply(q_with_dtrange_non_overlapping_at_boundary, entries.copy(), embeddings)
|
||||||
assert ret_query == 'head tail'
|
assert ret_query == 'head tail'
|
||||||
assert len(ret_emb) == 0
|
assert len(ret_emb) == 0
|
||||||
assert ret_entries == []
|
assert ret_entries == []
|
||||||
|
|
||||||
query_with_overlapping_dtrange = 'head dt>"1984-04-01" dt<"1984-04-03" tail'
|
query_with_overlapping_dtrange = 'head dt>"1984-04-01" dt<"1984-04-03" tail'
|
||||||
ret_query, ret_entries, ret_emb = DateFilter().filter(query_with_overlapping_dtrange, entries.copy(), embeddings)
|
ret_query, ret_entries, ret_emb = DateFilter().apply(query_with_overlapping_dtrange, entries.copy(), embeddings)
|
||||||
assert ret_query == 'head tail'
|
assert ret_query == 'head tail'
|
||||||
assert ret_entries == [entries[2]]
|
assert ret_entries == [entries[2]]
|
||||||
assert len(ret_emb) == 1
|
assert len(ret_emb) == 1
|
||||||
|
|
||||||
query_with_overlapping_dtrange = 'head dt>="1984-04-01" dt<"1984-04-02" tail'
|
query_with_overlapping_dtrange = 'head dt>="1984-04-01" dt<"1984-04-02" tail'
|
||||||
ret_query, ret_entries, ret_emb = DateFilter().filter(query_with_overlapping_dtrange, entries.copy(), embeddings)
|
ret_query, ret_entries, ret_emb = DateFilter().apply(query_with_overlapping_dtrange, entries.copy(), embeddings)
|
||||||
assert ret_query == 'head tail'
|
assert ret_query == 'head tail'
|
||||||
assert ret_entries == [entries[1]]
|
assert ret_entries == [entries[1]]
|
||||||
assert len(ret_emb) == 1
|
assert len(ret_emb) == 1
|
||||||
|
|
||||||
query_with_overlapping_dtrange = 'head dt>"1984-04-01" dt<="1984-04-02" tail'
|
query_with_overlapping_dtrange = 'head dt>"1984-04-01" dt<="1984-04-02" tail'
|
||||||
ret_query, ret_entries, ret_emb = DateFilter().filter(query_with_overlapping_dtrange, entries.copy(), embeddings)
|
ret_query, ret_entries, ret_emb = DateFilter().apply(query_with_overlapping_dtrange, entries.copy(), embeddings)
|
||||||
assert ret_query == 'head tail'
|
assert ret_query == 'head tail'
|
||||||
assert ret_entries == [entries[2]]
|
assert ret_entries == [entries[2]]
|
||||||
assert len(ret_emb) == 1
|
assert len(ret_emb) == 1
|
||||||
|
|
||||||
query_with_overlapping_dtrange = 'head dt>="1984-04-01" dt<="1984-04-02" tail'
|
query_with_overlapping_dtrange = 'head dt>="1984-04-01" dt<="1984-04-02" tail'
|
||||||
ret_query, ret_entries, ret_emb = DateFilter().filter(query_with_overlapping_dtrange, entries.copy(), embeddings)
|
ret_query, ret_entries, ret_emb = DateFilter().apply(query_with_overlapping_dtrange, entries.copy(), embeddings)
|
||||||
assert ret_query == 'head tail'
|
assert ret_query == 'head tail'
|
||||||
assert ret_entries == [entries[1], entries[2]]
|
assert ret_entries == [entries[1], entries[2]]
|
||||||
assert len(ret_emb) == 2
|
assert len(ret_emb) == 2
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
# System Packages
|
# System Packages
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from src.utils.config import SearchType
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
from src.utils.state import model
|
from src.utils.state import model
|
||||||
|
@ -13,7 +14,7 @@ from src.processor.org_mode.org_to_jsonl import org_to_jsonl
|
||||||
def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchConfig):
|
def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
# Act
|
# Act
|
||||||
# Regenerate notes embeddings during asymmetric setup
|
# Regenerate notes embeddings during asymmetric setup
|
||||||
notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=True)
|
notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=True)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert len(notes_model.entries) == 10
|
assert len(notes_model.entries) == 10
|
||||||
|
@ -23,7 +24,7 @@ def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchCo
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig):
|
def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
|
model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
||||||
query = "How to git install application?"
|
query = "How to git install application?"
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
|
@ -46,7 +47,7 @@ def test_asymmetric_search(content_config: ContentConfig, search_config: SearchC
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchConfig):
|
def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchConfig):
|
||||||
# Arrange
|
# Arrange
|
||||||
initial_notes_model= text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
|
initial_notes_model= text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
||||||
|
|
||||||
assert len(initial_notes_model.entries) == 10
|
assert len(initial_notes_model.entries) == 10
|
||||||
assert len(initial_notes_model.corpus_embeddings) == 10
|
assert len(initial_notes_model.corpus_embeddings) == 10
|
||||||
|
@ -59,11 +60,11 @@ def test_asymmetric_reload(content_config: ContentConfig, search_config: SearchC
|
||||||
f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n")
|
f.write("\n* A Chihuahua doing Tango\n- Saw a super cute video of a chihuahua doing the Tango on Youtube\n")
|
||||||
|
|
||||||
# regenerate notes jsonl, model embeddings and model to include entry from new file
|
# regenerate notes jsonl, model embeddings and model to include entry from new file
|
||||||
regenerated_notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=True)
|
regenerated_notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=True)
|
||||||
|
|
||||||
# Act
|
# Act
|
||||||
# reload embeddings, entries, notes model from previously generated notes jsonl and model embeddings files
|
# reload embeddings, entries, notes model from previously generated notes jsonl and model embeddings files
|
||||||
initial_notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
|
initial_notes_model = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, SearchType.Org, regenerate=False)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert len(regenerated_notes_model.entries) == 11
|
assert len(regenerated_notes_model.entries) == 11
|
||||||
|
|
Loading…
Reference in a new issue