diff --git a/tests/conftest.py b/tests/conftest.py index d90bae95..6e2609bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -386,6 +386,44 @@ def sample_org_data(): def get_sample_data(type): sample_data = { "org": { + "elisp.org": """ +* Emacs Khoj + /An Emacs interface for [[https://github.com/khoj-ai/khoj][khoj]]/ + +** Requirements + - Install and Run [[https://github.com/khoj-ai/khoj][khoj]] + +** Installation +*** Direct + - Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp + - Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet + #+begin_src elisp + ;; Khoj Package + (use-package khoj + :load-path "~/.emacs.d/lisp/khoj.el" + :bind ("C-c s" . 'khoj)) + #+end_src + +*** Using [[https://github.com/quelpa/quelpa#installation][Quelpa]] + - Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed + - Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it. + #+begin_src elisp + ;; Khoj Package + (use-package khoj + :quelpa (khoj :fetcher url :url "https://raw.githubusercontent.com/khoj-ai/khoj/master/interface/emacs/khoj.el") + :bind ("C-c s" . 'khoj)) + #+end_src + +** Usage + 1. Call ~khoj~ using keybinding ~C-c s~ or ~M-x khoj~ + 2. Enter Query in Natural Language + e.g "What is the meaning of life?" "What are my life goals?" + 3. Wait for results + *Note: It takes about 15s on a Mac M1 and a ~100K lines corpus of org-mode files* + 4. (Optional) Narrow down results further + Include/Exclude specific words from results by adding to query + +""", "readme.org": """ * Khoj /Allow natural language search on user content like notes, images using transformer based models/ @@ -401,7 +439,7 @@ def get_sample_data(type): git clone https://github.com/khoj-ai/khoj && cd khoj conda env create -f environment.yml conda activate khoj - #+end_src""" + #+end_src""", }, "markdown": { "readme.markdown": """ diff --git a/tests/data/org/interface_emacs_readme.org b/tests/data/org/interface_emacs_readme.org index 300f1013..2b74bff7 100644 --- a/tests/data/org/interface_emacs_readme.org +++ b/tests/data/org/interface_emacs_readme.org @@ -5,7 +5,7 @@ - Install and Run [[https://github.com/khoj-ai/khoj][khoj]] ** Installation - - Direct Install +*** Direct - Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp - Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet @@ -16,7 +16,7 @@ :bind ("C-c s" . 'khoj)) #+end_src - - Use [[https://github.com/quelpa/quelpa#installation][Quelpa]] +*** Using [[https://github.com/quelpa/quelpa#installation][Quelpa]] - Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed - Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it. #+begin_src elisp diff --git a/tests/data/org/main_readme.org b/tests/data/org/main_readme.org index 6495d6ba..d88a2b2b 100644 --- a/tests/data/org/main_readme.org +++ b/tests/data/org/main_readme.org @@ -22,16 +22,16 @@ #+end_src ** Use - - *Khoj via Emacs* +*** *Khoj via Emacs* - [[https://github.com/khoj-ai/khoj/tree/master/interface/emacs#installation][Install]] [[./interface/emacs/khoj.el][khoj.el]] - Run ~M-x khoj ~ or Call ~C-c C-s~ - - *Khoj via API* +*** *Khoj via API* - Query: ~GET~ [[http://localhost:42110/api/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:42110/api/search?q="What is the meaning of life"]] - Update Index: ~GET~ [[http://localhost:42110/api/update][http://localhost:42110/api/update]] - [[http://localhost:42110/docs][Khoj API Docs]] - - *Call Khoj via Python Script Directly* +*** *Call Khoj via Python Script Directly* #+begin_src shell python3 search_types/asymmetric.py \ --compressed-jsonl .notes.jsonl.gz \ diff --git a/tests/test_client.py b/tests/test_client.py index f642a727..5324e3c1 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -321,7 +321,7 @@ def test_notes_search_with_include_filter(client, sample_org_data, default_user: assert response.status_code == 200 # assert actual_data contains word "Emacs" search_result = response.json()[0]["entry"] - assert "Emacs" in search_result + assert "emacs" in search_result # ---------------------------------------------------------------------------------------------------- @@ -347,6 +347,27 @@ def test_notes_search_with_exclude_filter(client, sample_org_data, default_user: assert "clone" not in search_result +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.django_db(transaction=True) +def test_notes_search_requires_parent_context( + client, search_config: SearchConfig, sample_org_data, default_user: KhojUser +): + # Arrange + headers = {"Authorization": "Bearer kk-secret"} + text_search.setup(OrgToEntries, sample_org_data, regenerate=False, user=default_user) + user_query = quote("Install Khoj on Emacs") + + # Act + response = client.get(f"/api/search?q={user_query}&n=1&t=org&r=true&max_distance=0.18", headers=headers) + + # Assert + assert response.status_code == 200 + + assert len(response.json()) == 1, "Expected only 1 result" + search_result = response.json()[0]["entry"] + assert "Emacs load path" in search_result, "Expected 'Emacs load path' in search result" + + # ---------------------------------------------------------------------------------------------------- @pytest.mark.django_db(transaction=True) def test_different_user_data_not_accessed(client, sample_org_data, default_user: KhojUser): diff --git a/tests/test_multiple_users.py b/tests/test_multiple_users.py index 95a2535f..a94c173e 100644 --- a/tests/test_multiple_users.py +++ b/tests/test_multiple_users.py @@ -69,7 +69,7 @@ def test_index_update_with_user2_inaccessible_user1(client, api_user2: KhojApiUs # Assert assert update_response.status_code == 200 - assert len(results) == 4 + assert len(results) == 5 for result in results: assert result["additional"]["file"] not in source_file_symbol diff --git a/tests/test_text_search.py b/tests/test_text_search.py index 3d729ab5..b925a9a9 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -70,7 +70,7 @@ def test_text_search_setup_with_empty_file_creates_no_entries( text_search.setup(OrgToEntries, data, regenerate=True, user=default_user) # Assert - assert "Deleted 3 entries. Created 0 new entries for user " in caplog.records[-1].message + assert "Deleted 8 entries. Created 0 new entries for user " in caplog.records[-1].message verify_embeddings(0, default_user) @@ -90,7 +90,7 @@ def test_text_indexer_deletes_embedding_before_regenerate( # Assert assert "Deleting all entries for file type org" in caplog.text - assert "Deleted 3 entries. Created 10 new entries for user " in caplog.records[-1].message + assert "Deleted 8 entries. Created 13 new entries for user " in caplog.records[-1].message # ---------------------------------------------------------------------------------------------------- @@ -106,7 +106,7 @@ def test_text_search_setup_batch_processes(content_config: ContentConfig, defaul text_search.setup(OrgToEntries, data, regenerate=True, user=default_user) # Assert - assert "Deleted 3 entries. Created 10 new entries for user " in caplog.records[-1].message + assert "Deleted 8 entries. Created 13 new entries for user " in caplog.records[-1].message # ---------------------------------------------------------------------------------------------------- @@ -284,9 +284,9 @@ def test_regenerate_index_with_new_entry( final_logs = caplog.text # Assert - assert "Deleted 3 entries. Created 10 new entries for user " in initial_logs - assert "Deleted 10 entries. Created 11 new entries for user " in final_logs - verify_embeddings(11, default_user) + assert "Deleted 8 entries. Created 13 new entries for user " in initial_logs + assert "Deleted 13 entries. Created 14 new entries for user " in final_logs + verify_embeddings(14, default_user) # ---------------------------------------------------------------------------------------------------- @@ -320,7 +320,7 @@ def test_update_index_with_duplicate_entries_in_stable_order( # Assert # verify only 1 entry added even if there are multiple duplicate entries - assert "Deleted 3 entries. Created 1 new entries for user " in initial_logs + assert "Deleted 8 entries. Created 1 new entries for user " in initial_logs assert "Deleted 0 entries. Created 0 new entries for user " in final_logs verify_embeddings(1, default_user) @@ -357,7 +357,7 @@ def test_update_index_with_deleted_entry(org_config_with_only_new_file: LocalOrg # Assert # verify only 1 entry added even if there are multiple duplicate entries - assert "Deleted 3 entries. Created 2 new entries for user " in initial_logs + assert "Deleted 8 entries. Created 2 new entries for user " in initial_logs assert "Deleted 1 entries. Created 0 new entries for user " in final_logs verify_embeddings(1, default_user) @@ -388,9 +388,9 @@ def test_update_index_with_new_entry(content_config: ContentConfig, new_org_file final_logs = caplog.text # Assert - assert "Deleted 3 entries. Created 10 new entries for user " in initial_logs + assert "Deleted 8 entries. Created 13 new entries for user " in initial_logs assert "Deleted 0 entries. Created 1 new entries for user " in final_logs - verify_embeddings(11, default_user) + verify_embeddings(14, default_user) # ----------------------------------------------------------------------------------------------------