mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Test search uses ancestor headings as context for improved results
- Update test data to add deeper outline hierarchy for testing hierarchy as context - Update collateral tests that need count of entries updated, deleted asserts to be updated
This commit is contained in:
parent
74403e3536
commit
ddb07def0d
6 changed files with 77 additions and 18 deletions
|
@ -386,6 +386,44 @@ def sample_org_data():
|
||||||
def get_sample_data(type):
|
def get_sample_data(type):
|
||||||
sample_data = {
|
sample_data = {
|
||||||
"org": {
|
"org": {
|
||||||
|
"elisp.org": """
|
||||||
|
* Emacs Khoj
|
||||||
|
/An Emacs interface for [[https://github.com/khoj-ai/khoj][khoj]]/
|
||||||
|
|
||||||
|
** Requirements
|
||||||
|
- Install and Run [[https://github.com/khoj-ai/khoj][khoj]]
|
||||||
|
|
||||||
|
** Installation
|
||||||
|
*** Direct
|
||||||
|
- Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp
|
||||||
|
- Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet
|
||||||
|
#+begin_src elisp
|
||||||
|
;; Khoj Package
|
||||||
|
(use-package khoj
|
||||||
|
:load-path "~/.emacs.d/lisp/khoj.el"
|
||||||
|
:bind ("C-c s" . 'khoj))
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
*** Using [[https://github.com/quelpa/quelpa#installation][Quelpa]]
|
||||||
|
- Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed
|
||||||
|
- Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it.
|
||||||
|
#+begin_src elisp
|
||||||
|
;; Khoj Package
|
||||||
|
(use-package khoj
|
||||||
|
:quelpa (khoj :fetcher url :url "https://raw.githubusercontent.com/khoj-ai/khoj/master/interface/emacs/khoj.el")
|
||||||
|
:bind ("C-c s" . 'khoj))
|
||||||
|
#+end_src
|
||||||
|
|
||||||
|
** Usage
|
||||||
|
1. Call ~khoj~ using keybinding ~C-c s~ or ~M-x khoj~
|
||||||
|
2. Enter Query in Natural Language
|
||||||
|
e.g "What is the meaning of life?" "What are my life goals?"
|
||||||
|
3. Wait for results
|
||||||
|
*Note: It takes about 15s on a Mac M1 and a ~100K lines corpus of org-mode files*
|
||||||
|
4. (Optional) Narrow down results further
|
||||||
|
Include/Exclude specific words from results by adding to query
|
||||||
|
|
||||||
|
""",
|
||||||
"readme.org": """
|
"readme.org": """
|
||||||
* Khoj
|
* Khoj
|
||||||
/Allow natural language search on user content like notes, images using transformer based models/
|
/Allow natural language search on user content like notes, images using transformer based models/
|
||||||
|
@ -401,7 +439,7 @@ def get_sample_data(type):
|
||||||
git clone https://github.com/khoj-ai/khoj && cd khoj
|
git clone https://github.com/khoj-ai/khoj && cd khoj
|
||||||
conda env create -f environment.yml
|
conda env create -f environment.yml
|
||||||
conda activate khoj
|
conda activate khoj
|
||||||
#+end_src"""
|
#+end_src""",
|
||||||
},
|
},
|
||||||
"markdown": {
|
"markdown": {
|
||||||
"readme.markdown": """
|
"readme.markdown": """
|
||||||
|
|
4
tests/data/org/interface_emacs_readme.org
vendored
4
tests/data/org/interface_emacs_readme.org
vendored
|
@ -5,7 +5,7 @@
|
||||||
- Install and Run [[https://github.com/khoj-ai/khoj][khoj]]
|
- Install and Run [[https://github.com/khoj-ai/khoj][khoj]]
|
||||||
|
|
||||||
** Installation
|
** Installation
|
||||||
- Direct Install
|
*** Direct
|
||||||
- Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp
|
- Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp
|
||||||
|
|
||||||
- Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet
|
- Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet
|
||||||
|
@ -16,7 +16,7 @@
|
||||||
:bind ("C-c s" . 'khoj))
|
:bind ("C-c s" . 'khoj))
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
- Use [[https://github.com/quelpa/quelpa#installation][Quelpa]]
|
*** Using [[https://github.com/quelpa/quelpa#installation][Quelpa]]
|
||||||
- Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed
|
- Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed
|
||||||
- Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it.
|
- Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it.
|
||||||
#+begin_src elisp
|
#+begin_src elisp
|
||||||
|
|
6
tests/data/org/main_readme.org
vendored
6
tests/data/org/main_readme.org
vendored
|
@ -22,16 +22,16 @@
|
||||||
#+end_src
|
#+end_src
|
||||||
|
|
||||||
** Use
|
** Use
|
||||||
- *Khoj via Emacs*
|
*** *Khoj via Emacs*
|
||||||
- [[https://github.com/khoj-ai/khoj/tree/master/interface/emacs#installation][Install]] [[./interface/emacs/khoj.el][khoj.el]]
|
- [[https://github.com/khoj-ai/khoj/tree/master/interface/emacs#installation][Install]] [[./interface/emacs/khoj.el][khoj.el]]
|
||||||
- Run ~M-x khoj <user-query>~ or Call ~C-c C-s~
|
- Run ~M-x khoj <user-query>~ or Call ~C-c C-s~
|
||||||
|
|
||||||
- *Khoj via API*
|
*** *Khoj via API*
|
||||||
- Query: ~GET~ [[http://localhost:42110/api/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:42110/api/search?q="What is the meaning of life"]]
|
- Query: ~GET~ [[http://localhost:42110/api/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:42110/api/search?q="What is the meaning of life"]]
|
||||||
- Update Index: ~GET~ [[http://localhost:42110/api/update][http://localhost:42110/api/update]]
|
- Update Index: ~GET~ [[http://localhost:42110/api/update][http://localhost:42110/api/update]]
|
||||||
- [[http://localhost:42110/docs][Khoj API Docs]]
|
- [[http://localhost:42110/docs][Khoj API Docs]]
|
||||||
|
|
||||||
- *Call Khoj via Python Script Directly*
|
*** *Call Khoj via Python Script Directly*
|
||||||
#+begin_src shell
|
#+begin_src shell
|
||||||
python3 search_types/asymmetric.py \
|
python3 search_types/asymmetric.py \
|
||||||
--compressed-jsonl .notes.jsonl.gz \
|
--compressed-jsonl .notes.jsonl.gz \
|
||||||
|
|
|
@ -321,7 +321,7 @@ def test_notes_search_with_include_filter(client, sample_org_data, default_user:
|
||||||
assert response.status_code == 200
|
assert response.status_code == 200
|
||||||
# assert actual_data contains word "Emacs"
|
# assert actual_data contains word "Emacs"
|
||||||
search_result = response.json()[0]["entry"]
|
search_result = response.json()[0]["entry"]
|
||||||
assert "Emacs" in search_result
|
assert "emacs" in search_result
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@ -347,6 +347,27 @@ def test_notes_search_with_exclude_filter(client, sample_org_data, default_user:
|
||||||
assert "clone" not in search_result
|
assert "clone" not in search_result
|
||||||
|
|
||||||
|
|
||||||
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@pytest.mark.django_db(transaction=True)
|
||||||
|
def test_notes_search_requires_parent_context(
|
||||||
|
client, search_config: SearchConfig, sample_org_data, default_user: KhojUser
|
||||||
|
):
|
||||||
|
# Arrange
|
||||||
|
headers = {"Authorization": "Bearer kk-secret"}
|
||||||
|
text_search.setup(OrgToEntries, sample_org_data, regenerate=False, user=default_user)
|
||||||
|
user_query = quote("Install Khoj on Emacs")
|
||||||
|
|
||||||
|
# Act
|
||||||
|
response = client.get(f"/api/search?q={user_query}&n=1&t=org&r=true&max_distance=0.18", headers=headers)
|
||||||
|
|
||||||
|
# Assert
|
||||||
|
assert response.status_code == 200
|
||||||
|
|
||||||
|
assert len(response.json()) == 1, "Expected only 1 result"
|
||||||
|
search_result = response.json()[0]["entry"]
|
||||||
|
assert "Emacs load path" in search_result, "Expected 'Emacs load path' in search result"
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
@pytest.mark.django_db(transaction=True)
|
@pytest.mark.django_db(transaction=True)
|
||||||
def test_different_user_data_not_accessed(client, sample_org_data, default_user: KhojUser):
|
def test_different_user_data_not_accessed(client, sample_org_data, default_user: KhojUser):
|
||||||
|
|
|
@ -69,7 +69,7 @@ def test_index_update_with_user2_inaccessible_user1(client, api_user2: KhojApiUs
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert update_response.status_code == 200
|
assert update_response.status_code == 200
|
||||||
assert len(results) == 4
|
assert len(results) == 5
|
||||||
for result in results:
|
for result in results:
|
||||||
assert result["additional"]["file"] not in source_file_symbol
|
assert result["additional"]["file"] not in source_file_symbol
|
||||||
|
|
||||||
|
|
|
@ -70,7 +70,7 @@ def test_text_search_setup_with_empty_file_creates_no_entries(
|
||||||
text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
|
text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert "Deleted 3 entries. Created 0 new entries for user " in caplog.records[-1].message
|
assert "Deleted 8 entries. Created 0 new entries for user " in caplog.records[-1].message
|
||||||
verify_embeddings(0, default_user)
|
verify_embeddings(0, default_user)
|
||||||
|
|
||||||
|
|
||||||
|
@ -90,7 +90,7 @@ def test_text_indexer_deletes_embedding_before_regenerate(
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert "Deleting all entries for file type org" in caplog.text
|
assert "Deleting all entries for file type org" in caplog.text
|
||||||
assert "Deleted 3 entries. Created 10 new entries for user " in caplog.records[-1].message
|
assert "Deleted 8 entries. Created 13 new entries for user " in caplog.records[-1].message
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@ -106,7 +106,7 @@ def test_text_search_setup_batch_processes(content_config: ContentConfig, defaul
|
||||||
text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
|
text_search.setup(OrgToEntries, data, regenerate=True, user=default_user)
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert "Deleted 3 entries. Created 10 new entries for user " in caplog.records[-1].message
|
assert "Deleted 8 entries. Created 13 new entries for user " in caplog.records[-1].message
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@ -284,9 +284,9 @@ def test_regenerate_index_with_new_entry(
|
||||||
final_logs = caplog.text
|
final_logs = caplog.text
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert "Deleted 3 entries. Created 10 new entries for user " in initial_logs
|
assert "Deleted 8 entries. Created 13 new entries for user " in initial_logs
|
||||||
assert "Deleted 10 entries. Created 11 new entries for user " in final_logs
|
assert "Deleted 13 entries. Created 14 new entries for user " in final_logs
|
||||||
verify_embeddings(11, default_user)
|
verify_embeddings(14, default_user)
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
@ -320,7 +320,7 @@ def test_update_index_with_duplicate_entries_in_stable_order(
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
# verify only 1 entry added even if there are multiple duplicate entries
|
# verify only 1 entry added even if there are multiple duplicate entries
|
||||||
assert "Deleted 3 entries. Created 1 new entries for user " in initial_logs
|
assert "Deleted 8 entries. Created 1 new entries for user " in initial_logs
|
||||||
assert "Deleted 0 entries. Created 0 new entries for user " in final_logs
|
assert "Deleted 0 entries. Created 0 new entries for user " in final_logs
|
||||||
|
|
||||||
verify_embeddings(1, default_user)
|
verify_embeddings(1, default_user)
|
||||||
|
@ -357,7 +357,7 @@ def test_update_index_with_deleted_entry(org_config_with_only_new_file: LocalOrg
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
# verify only 1 entry added even if there are multiple duplicate entries
|
# verify only 1 entry added even if there are multiple duplicate entries
|
||||||
assert "Deleted 3 entries. Created 2 new entries for user " in initial_logs
|
assert "Deleted 8 entries. Created 2 new entries for user " in initial_logs
|
||||||
assert "Deleted 1 entries. Created 0 new entries for user " in final_logs
|
assert "Deleted 1 entries. Created 0 new entries for user " in final_logs
|
||||||
|
|
||||||
verify_embeddings(1, default_user)
|
verify_embeddings(1, default_user)
|
||||||
|
@ -388,9 +388,9 @@ def test_update_index_with_new_entry(content_config: ContentConfig, new_org_file
|
||||||
final_logs = caplog.text
|
final_logs = caplog.text
|
||||||
|
|
||||||
# Assert
|
# Assert
|
||||||
assert "Deleted 3 entries. Created 10 new entries for user " in initial_logs
|
assert "Deleted 8 entries. Created 13 new entries for user " in initial_logs
|
||||||
assert "Deleted 0 entries. Created 1 new entries for user " in final_logs
|
assert "Deleted 0 entries. Created 1 new entries for user " in final_logs
|
||||||
verify_embeddings(11, default_user)
|
verify_embeddings(14, default_user)
|
||||||
|
|
||||||
|
|
||||||
# ----------------------------------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------------------------------
|
||||||
|
|
Loading…
Reference in a new issue