From 305c25ae1ade1c334f47b3026d49fe397986eb2b Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 16 Nov 2023 00:13:39 -0800 Subject: [PATCH 1/5] Track ancestor headings for each org-mode entry in org-node parser --- src/khoj/processor/org_mode/orgnode.py | 28 +++++++- tests/test_orgnode.py | 91 ++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 3 deletions(-) diff --git a/src/khoj/processor/org_mode/orgnode.py b/src/khoj/processor/org_mode/orgnode.py index db660ee7..68508666 100644 --- a/src/khoj/processor/org_mode/orgnode.py +++ b/src/khoj/processor/org_mode/orgnode.py @@ -80,6 +80,7 @@ def makelist(file, filename): } # populated from #+SEQ_TODO line level = "" heading = "" + ancestor_headings = [f"{filename}"] bodytext = "" introtext = "" tags = list() # set of all tags in headline @@ -98,7 +99,7 @@ def makelist(file, filename): heading_search = re.search(r"^(\*+)\s(.*?)\s*$", line) if heading_search: # we are processing a heading line if heading: # if we have are on second heading, append first heading to headings list - thisNode = Orgnode(level, heading, bodytext, tags) + thisNode = Orgnode(level, heading, bodytext, tags, ancestor_headings) if closed_date: thisNode.closed = closed_date closed_date = "" @@ -114,6 +115,8 @@ def makelist(file, filename): thisNode.properties = property_map nodelist.append(thisNode) property_map = {"LINE": f"file:{normalize_filename(filename)}::{ctr}"} + previous_level = level + previous_heading = heading level = heading_search.group(1) heading = heading_search.group(2) bodytext = "" @@ -126,6 +129,17 @@ def makelist(file, filename): for parsedtag in parsedtags.split(":"): if parsedtag != "": tags.append(parsedtag) + + # Add previous heading to ancestors if current heading is deeper than previous level + if len(level) > len(previous_level) and previous_heading: + ancestor_headings.append(previous_heading) + # Remove last ancestor(s) if current heading is shallower than previous level + elif len(level) < len(previous_level): + for _ in range(len(level), len(previous_level)): + if not ancestor_headings or len(ancestor_headings) == 0: + break + ancestor_headings.pop() + else: # we are processing a non-heading line if line[:10] == "#+SEQ_TODO": kwlist = re.findall(r"([A-Z]+)\(", line) @@ -216,7 +230,7 @@ def makelist(file, filename): nodelist = [thisNode] + nodelist # write out last heading node if heading: - thisNode = Orgnode(level, heading, bodytext, tags) + thisNode = Orgnode(level, heading, bodytext, tags, ancestor_headings) thisNode.properties = property_map if sched_date: thisNode.scheduled = sched_date @@ -261,7 +275,7 @@ class Orgnode(object): with the headline. """ - def __init__(self, level, headline, body, tags): + def __init__(self, level, headline, body, tags, ancestor_headings=[]): """ Create an Orgnode object given the parameters of level (as the raw asterisks), headline text (including the TODO tag), and @@ -279,9 +293,17 @@ class Orgnode(object): self._closed = "" # Closed date self._properties = dict() self._logbook = list() # List of clock-in, clock-out tuples representing logbook entries + self._ancestor_headings = ancestor_headings.copy() # Look for priority in headline and transfer to prty field + @property + def ancestors(self): + """ + Return the Heading text of the node without the TODO tag + """ + return self._ancestor_headings + @property def heading(self): """ diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py index c6ed3447..4ef12661 100644 --- a/tests/test_orgnode.py +++ b/tests/test_orgnode.py @@ -161,6 +161,8 @@ Body Line 1""" assert len(entries) == 1 # parsed heading from entry assert entries[0].heading == "Heading[1]" + # track ancestors of entry + assert entries[0].ancestors == [f"{orgfile}"] # ensure SOURCE link has square brackets in filename, heading escaped in rendered entries escaped_orgfile = f"{orgfile}".replace("[1]", "\\[1\\]") assert f":SOURCE: [[file:{escaped_orgfile}::*Heading\\[1\\]" in f"{entries[0]}" @@ -260,6 +262,7 @@ Body Line 1""" assert entries[0].closed == "" assert entries[0].scheduled == "" assert entries[0].deadline == "" + assert entries[0].ancestors == [] # ---------------------------------------------------------------------------------------------------- @@ -284,6 +287,7 @@ Body Line 1 assert entries[0].closed == "" assert entries[0].scheduled == "" assert entries[0].deadline == "" + assert entries[0].ancestors == [] # ---------------------------------------------------------------------------------------------------- @@ -304,8 +308,10 @@ entry body assert len(entries) == 2 assert entries[0].heading == "Title" assert entries[0].body == "intro body\n" + assert entries[0].ancestors == [] assert entries[1].heading == "Entry Heading" assert entries[1].body == "entry body\n\n" + assert entries[1].ancestors == [f"{orgfile}"] # ---------------------------------------------------------------------------------------------------- @@ -326,8 +332,93 @@ entry body assert len(entries) == 2 assert entries[0].heading == "Title1 Title2" assert entries[0].body == "intro body\n" + assert entries[0].ancestors == [] assert entries[1].heading == "Entry Heading" assert entries[1].body == "entry body\n\n" + assert entries[1].ancestors == [f"{orgfile}"] + + +# ---------------------------------------------------------------------------------------------------- +def test_parse_org_with_single_ancestor_heading(tmp_path): + "Parse org entries with parent headings context" + # Arrange + body = f""" +* Heading 1 +body 1 +** Sub Heading 1 +""" + orgfile = create_file(tmp_path, body) + + # Act + entries = orgnode.makelist_with_filepath(orgfile) + + # Assert + assert len(entries) == 2 + assert entries[0].heading == "Heading 1" + assert entries[0].ancestors == [f"{orgfile}"] + assert entries[1].heading == "Sub Heading 1" + assert entries[1].ancestors == [f"{orgfile}", "Heading 1"] + + +# ---------------------------------------------------------------------------------------------------- +def test_parse_org_with_multiple_ancestor_headings(tmp_path): + "Parse org entries with parent headings context" + # Arrange + body = f""" +* Heading 1 +body 1 +** Sub Heading 1 +*** Sub Sub Heading 1 +sub sub body 1 +""" + orgfile = create_file(tmp_path, body) + + # Act + entries = orgnode.makelist_with_filepath(orgfile) + + # Assert + assert len(entries) == 3 + assert entries[0].heading == "Heading 1" + assert entries[0].ancestors == [f"{orgfile}"] + assert entries[1].heading == "Sub Heading 1" + assert entries[1].ancestors == [f"{orgfile}", "Heading 1"] + assert entries[2].heading == "Sub Sub Heading 1" + assert entries[2].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"] + + +# ---------------------------------------------------------------------------------------------------- +def test_parse_org_with_multiple_ancestor_headings_of_siblings(tmp_path): + "Parse org entries with parent headings context" + # Arrange + body = f""" +* Heading 1 +body 1 +** Sub Heading 1 +*** Sub Sub Heading 1 +sub sub body 1 +*** Sub Sub Heading 2 +** Sub Heading 2 +*** Sub Sub Heading 3 +""" + orgfile = create_file(tmp_path, body) + + # Act + entries = orgnode.makelist_with_filepath(orgfile) + + # Assert + assert len(entries) == 6 + assert entries[0].heading == "Heading 1" + assert entries[0].ancestors == [f"{orgfile}"] + assert entries[1].heading == "Sub Heading 1" + assert entries[1].ancestors == [f"{orgfile}", "Heading 1"] + assert entries[2].heading == "Sub Sub Heading 1" + assert entries[2].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"] + assert entries[3].heading == "Sub Sub Heading 2" + assert entries[3].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 1"] + assert entries[4].heading == "Sub Heading 2" + assert entries[4].ancestors == [f"{orgfile}", "Heading 1"] + assert entries[5].heading == "Sub Sub Heading 3" + assert entries[5].ancestors == [f"{orgfile}", "Heading 1", "Sub Heading 2"] # Helper Functions From 74403e35365996d9a58799b06be9949976a18f56 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 16 Nov 2023 01:08:51 -0800 Subject: [PATCH 2/5] Add ancestor headings of each org-mode entry to their compiled form Resolves #85 --- src/khoj/processor/org_mode/org_to_entries.py | 9 +++++---- tests/test_org_to_entries.py | 5 +++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/khoj/processor/org_mode/org_to_entries.py b/src/khoj/processor/org_mode/org_to_entries.py index 04ce97e4..c84ab48f 100644 --- a/src/khoj/processor/org_mode/org_to_entries.py +++ b/src/khoj/processor/org_mode/org_to_entries.py @@ -100,12 +100,13 @@ class OrgToEntries(TextToEntries): continue todo_str = f"{parsed_entry.todo} " if parsed_entry.todo else "" - # Prepend filename as top heading to entry - filename = Path(entry_to_file_map[parsed_entry]).stem + + # Prepend ancestor headings, filename as top heading to entry for context + ancestors_trail = " / ".join(parsed_entry.ancestors) or Path(entry_to_file_map[parsed_entry]) if parsed_entry.heading: - heading = f"* {filename}\n** {todo_str}{parsed_entry.heading}." + heading = f"* Path: {ancestors_trail}\n** {todo_str}{parsed_entry.heading}." else: - heading = f"* {filename}." + heading = f"* Path: {ancestors_trail}." compiled = heading if state.verbose > 2: diff --git a/tests/test_org_to_entries.py b/tests/test_org_to_entries.py index 1eddcf95..76e2a8a7 100644 --- a/tests/test_org_to_entries.py +++ b/tests/test_org_to_entries.py @@ -45,9 +45,10 @@ def test_configure_heading_entry_to_jsonl(tmp_path): assert is_none_or_empty(jsonl_data) -def test_entry_split_when_exceeds_max_words(tmp_path): +def test_entry_split_when_exceeds_max_words(): "Ensure entries with compiled words exceeding max_words are split." # Arrange + tmp_path = "/tmp/test.org" entry = f"""*** Heading \t\r Body Line @@ -55,7 +56,7 @@ def test_entry_split_when_exceeds_max_words(tmp_path): data = { f"{tmp_path}": entry, } - expected_heading = f"* {tmp_path.stem}\n** Heading" + expected_heading = f"* Path: {tmp_path}\n** Heading" # Act # Extract Entries from specified Org files From ddb07def0d914be2bec6b09ad0b7e37aed1a7db1 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 16 Nov 2023 02:47:58 -0800 Subject: [PATCH 3/5] Test search uses ancestor headings as context for improved results - Update test data to add deeper outline hierarchy for testing hierarchy as context - Update collateral tests that need count of entries updated, deleted asserts to be updated --- tests/conftest.py | 40 ++++++++++++++++++++++- tests/data/org/interface_emacs_readme.org | 4 +-- tests/data/org/main_readme.org | 6 ++-- tests/test_client.py | 23 ++++++++++++- tests/test_multiple_users.py | 2 +- tests/test_text_search.py | 20 ++++++------ 6 files changed, 77 insertions(+), 18 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index d90bae95..6e2609bd 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -386,6 +386,44 @@ def sample_org_data(): def get_sample_data(type): sample_data = { "org": { + "elisp.org": """ +* Emacs Khoj + /An Emacs interface for [[https://github.com/khoj-ai/khoj][khoj]]/ + +** Requirements + - Install and Run [[https://github.com/khoj-ai/khoj][khoj]] + +** Installation +*** Direct + - Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp + - Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet + #+begin_src elisp + ;; Khoj Package + (use-package khoj + :load-path "~/.emacs.d/lisp/khoj.el" + :bind ("C-c s" . 'khoj)) + #+end_src + +*** Using [[https://github.com/quelpa/quelpa#installation][Quelpa]] + - Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed + - Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it. + #+begin_src elisp + ;; Khoj Package + (use-package khoj + :quelpa (khoj :fetcher url :url "https://raw.githubusercontent.com/khoj-ai/khoj/master/interface/emacs/khoj.el") + :bind ("C-c s" . 'khoj)) + #+end_src + +** Usage + 1. Call ~khoj~ using keybinding ~C-c s~ or ~M-x khoj~ + 2. Enter Query in Natural Language + e.g "What is the meaning of life?" "What are my life goals?" + 3. Wait for results + *Note: It takes about 15s on a Mac M1 and a ~100K lines corpus of org-mode files* + 4. (Optional) Narrow down results further + Include/Exclude specific words from results by adding to query + +""", "readme.org": """ * Khoj /Allow natural language search on user content like notes, images using transformer based models/ @@ -401,7 +439,7 @@ def get_sample_data(type): git clone https://github.com/khoj-ai/khoj && cd khoj conda env create -f environment.yml conda activate khoj - #+end_src""" + #+end_src""", }, "markdown": { "readme.markdown": """ diff --git a/tests/data/org/interface_emacs_readme.org b/tests/data/org/interface_emacs_readme.org index 300f1013..2b74bff7 100644 --- a/tests/data/org/interface_emacs_readme.org +++ b/tests/data/org/interface_emacs_readme.org @@ -5,7 +5,7 @@ - Install and Run [[https://github.com/khoj-ai/khoj][khoj]] ** Installation - - Direct Install +*** Direct - Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp - Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet @@ -16,7 +16,7 @@ :bind ("C-c s" . 'khoj)) #+end_src - - Use [[https://github.com/quelpa/quelpa#installation][Quelpa]] +*** Using [[https://github.com/quelpa/quelpa#installation][Quelpa]] - Ensure [[https://github.com/quelpa/quelpa#installation][Quelpa]], [[https://github.com/quelpa/quelpa-use-package#installation][quelpa-use-package]] are installed - Add below snippet to your ~/.emacs.d/init.el or .emacs config file and execute it. #+begin_src elisp diff --git a/tests/data/org/main_readme.org b/tests/data/org/main_readme.org index 6495d6ba..d88a2b2b 100644 --- a/tests/data/org/main_readme.org +++ b/tests/data/org/main_readme.org @@ -22,16 +22,16 @@ #+end_src ** Use - - *Khoj via Emacs* +*** *Khoj via Emacs* - [[https://github.com/khoj-ai/khoj/tree/master/interface/emacs#installation][Install]] [[./interface/emacs/khoj.el][khoj.el]] - Run ~M-x khoj ~ or Call ~C-c C-s~ - - *Khoj via API* +*** *Khoj via API* - Query: ~GET~ [[http://localhost:42110/api/search?q=%22what%20is%20the%20meaning%20of%20life%22][http://localhost:42110/api/search?q="What is the meaning of life"]] - Update Index: ~GET~ [[http://localhost:42110/api/update][http://localhost:42110/api/update]] - [[http://localhost:42110/docs][Khoj API Docs]] - - *Call Khoj via Python Script Directly* +*** *Call Khoj via Python Script Directly* #+begin_src shell python3 search_types/asymmetric.py \ --compressed-jsonl .notes.jsonl.gz \ diff --git a/tests/test_client.py b/tests/test_client.py index f642a727..5324e3c1 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -321,7 +321,7 @@ def test_notes_search_with_include_filter(client, sample_org_data, default_user: assert response.status_code == 200 # assert actual_data contains word "Emacs" search_result = response.json()[0]["entry"] - assert "Emacs" in search_result + assert "emacs" in search_result # ---------------------------------------------------------------------------------------------------- @@ -347,6 +347,27 @@ def test_notes_search_with_exclude_filter(client, sample_org_data, default_user: assert "clone" not in search_result +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.django_db(transaction=True) +def test_notes_search_requires_parent_context( + client, search_config: SearchConfig, sample_org_data, default_user: KhojUser +): + # Arrange + headers = {"Authorization": "Bearer kk-secret"} + text_search.setup(OrgToEntries, sample_org_data, regenerate=False, user=default_user) + user_query = quote("Install Khoj on Emacs") + + # Act + response = client.get(f"/api/search?q={user_query}&n=1&t=org&r=true&max_distance=0.18", headers=headers) + + # Assert + assert response.status_code == 200 + + assert len(response.json()) == 1, "Expected only 1 result" + search_result = response.json()[0]["entry"] + assert "Emacs load path" in search_result, "Expected 'Emacs load path' in search result" + + # ---------------------------------------------------------------------------------------------------- @pytest.mark.django_db(transaction=True) def test_different_user_data_not_accessed(client, sample_org_data, default_user: KhojUser): diff --git a/tests/test_multiple_users.py b/tests/test_multiple_users.py index 95a2535f..a94c173e 100644 --- a/tests/test_multiple_users.py +++ b/tests/test_multiple_users.py @@ -69,7 +69,7 @@ def test_index_update_with_user2_inaccessible_user1(client, api_user2: KhojApiUs # Assert assert update_response.status_code == 200 - assert len(results) == 4 + assert len(results) == 5 for result in results: assert result["additional"]["file"] not in source_file_symbol diff --git a/tests/test_text_search.py b/tests/test_text_search.py index 3d729ab5..b925a9a9 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -70,7 +70,7 @@ def test_text_search_setup_with_empty_file_creates_no_entries( text_search.setup(OrgToEntries, data, regenerate=True, user=default_user) # Assert - assert "Deleted 3 entries. Created 0 new entries for user " in caplog.records[-1].message + assert "Deleted 8 entries. Created 0 new entries for user " in caplog.records[-1].message verify_embeddings(0, default_user) @@ -90,7 +90,7 @@ def test_text_indexer_deletes_embedding_before_regenerate( # Assert assert "Deleting all entries for file type org" in caplog.text - assert "Deleted 3 entries. Created 10 new entries for user " in caplog.records[-1].message + assert "Deleted 8 entries. Created 13 new entries for user " in caplog.records[-1].message # ---------------------------------------------------------------------------------------------------- @@ -106,7 +106,7 @@ def test_text_search_setup_batch_processes(content_config: ContentConfig, defaul text_search.setup(OrgToEntries, data, regenerate=True, user=default_user) # Assert - assert "Deleted 3 entries. Created 10 new entries for user " in caplog.records[-1].message + assert "Deleted 8 entries. Created 13 new entries for user " in caplog.records[-1].message # ---------------------------------------------------------------------------------------------------- @@ -284,9 +284,9 @@ def test_regenerate_index_with_new_entry( final_logs = caplog.text # Assert - assert "Deleted 3 entries. Created 10 new entries for user " in initial_logs - assert "Deleted 10 entries. Created 11 new entries for user " in final_logs - verify_embeddings(11, default_user) + assert "Deleted 8 entries. Created 13 new entries for user " in initial_logs + assert "Deleted 13 entries. Created 14 new entries for user " in final_logs + verify_embeddings(14, default_user) # ---------------------------------------------------------------------------------------------------- @@ -320,7 +320,7 @@ def test_update_index_with_duplicate_entries_in_stable_order( # Assert # verify only 1 entry added even if there are multiple duplicate entries - assert "Deleted 3 entries. Created 1 new entries for user " in initial_logs + assert "Deleted 8 entries. Created 1 new entries for user " in initial_logs assert "Deleted 0 entries. Created 0 new entries for user " in final_logs verify_embeddings(1, default_user) @@ -357,7 +357,7 @@ def test_update_index_with_deleted_entry(org_config_with_only_new_file: LocalOrg # Assert # verify only 1 entry added even if there are multiple duplicate entries - assert "Deleted 3 entries. Created 2 new entries for user " in initial_logs + assert "Deleted 8 entries. Created 2 new entries for user " in initial_logs assert "Deleted 1 entries. Created 0 new entries for user " in final_logs verify_embeddings(1, default_user) @@ -388,9 +388,9 @@ def test_update_index_with_new_entry(content_config: ContentConfig, new_org_file final_logs = caplog.text # Assert - assert "Deleted 3 entries. Created 10 new entries for user " in initial_logs + assert "Deleted 8 entries. Created 13 new entries for user " in initial_logs assert "Deleted 0 entries. Created 1 new entries for user " in final_logs - verify_embeddings(11, default_user) + verify_embeddings(14, default_user) # ---------------------------------------------------------------------------------------------------- From 55785d50c387cca580e69b1c3c43020b419f852c Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 17 Nov 2023 14:47:06 -0800 Subject: [PATCH 4/5] Use title, when present, as root ancestor of entries instead of file path --- src/khoj/processor/org_mode/org_to_entries.py | 2 +- src/khoj/processor/org_mode/orgnode.py | 18 +++++++++++++----- tests/test_orgnode.py | 12 ++++++------ 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/khoj/processor/org_mode/org_to_entries.py b/src/khoj/processor/org_mode/org_to_entries.py index c84ab48f..e42b7498 100644 --- a/src/khoj/processor/org_mode/org_to_entries.py +++ b/src/khoj/processor/org_mode/org_to_entries.py @@ -110,7 +110,7 @@ class OrgToEntries(TextToEntries): compiled = heading if state.verbose > 2: - logger.debug(f"Title: {parsed_entry.heading}") + logger.debug(f"Title: {heading}") if parsed_entry.tags: tags_str = " ".join(parsed_entry.tags) diff --git a/src/khoj/processor/org_mode/orgnode.py b/src/khoj/processor/org_mode/orgnode.py index 68508666..28f55c17 100644 --- a/src/khoj/processor/org_mode/orgnode.py +++ b/src/khoj/processor/org_mode/orgnode.py @@ -80,7 +80,7 @@ def makelist(file, filename): } # populated from #+SEQ_TODO line level = "" heading = "" - ancestor_headings = [f"{filename}"] + ancestor_headings = [] bodytext = "" introtext = "" tags = list() # set of all tags in headline @@ -257,6 +257,9 @@ def makelist(file, filename): n.priority = priority_search.group(1) n.heading = priority_search.group(2) + # Prefix filepath/title to ancestors + n.ancestors = [file_title] + n.ancestors + # Set SOURCE property to a file+heading based org-mode link to the entry if n.level == 0: n.properties["LINE"] = f"file:{normalize_filename(filename)}::0" @@ -295,15 +298,20 @@ class Orgnode(object): self._logbook = list() # List of clock-in, clock-out tuples representing logbook entries self._ancestor_headings = ancestor_headings.copy() - # Look for priority in headline and transfer to prty field - @property - def ancestors(self): + def ancestors(self) -> List[str]: """ - Return the Heading text of the node without the TODO tag + Return the ancestor headings of the node """ return self._ancestor_headings + @ancestors.setter + def ancestors(self, new_ancestors): + """ + Update the ancestor headings of the node + """ + self._ancestor_headings = new_ancestors + @property def heading(self): """ diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py index 4ef12661..157763e2 100644 --- a/tests/test_orgnode.py +++ b/tests/test_orgnode.py @@ -262,7 +262,7 @@ Body Line 1""" assert entries[0].closed == "" assert entries[0].scheduled == "" assert entries[0].deadline == "" - assert entries[0].ancestors == [] + assert entries[0].ancestors == ["test"] # ---------------------------------------------------------------------------------------------------- @@ -287,7 +287,7 @@ Body Line 1 assert entries[0].closed == "" assert entries[0].scheduled == "" assert entries[0].deadline == "" - assert entries[0].ancestors == [] + assert entries[0].ancestors == ["title1 title2"] # ---------------------------------------------------------------------------------------------------- @@ -308,10 +308,10 @@ entry body assert len(entries) == 2 assert entries[0].heading == "Title" assert entries[0].body == "intro body\n" - assert entries[0].ancestors == [] + assert entries[0].ancestors == ["Title"] assert entries[1].heading == "Entry Heading" assert entries[1].body == "entry body\n\n" - assert entries[1].ancestors == [f"{orgfile}"] + assert entries[1].ancestors == ["Title"] # ---------------------------------------------------------------------------------------------------- @@ -332,10 +332,10 @@ entry body assert len(entries) == 2 assert entries[0].heading == "Title1 Title2" assert entries[0].body == "intro body\n" - assert entries[0].ancestors == [] + assert entries[0].ancestors == ["Title1 Title2"] assert entries[1].heading == "Entry Heading" assert entries[1].body == "entry body\n\n" - assert entries[1].ancestors == [f"{orgfile}"] + assert entries[0].ancestors == ["Title1 Title2"] # ---------------------------------------------------------------------------------------------------- From 33ad9b8e64f3861546f003fe29f929eb7b6c89d8 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Fri, 17 Nov 2023 14:49:39 -0800 Subject: [PATCH 5/5] Update text search test since indexing ancestor hierarchy added --- tests/conftest.py | 1 + tests/data/config.yml | 2 +- tests/data/org/interface_emacs_readme.org | 10 +--------- tests/test_text_search.py | 4 ++-- 4 files changed, 5 insertions(+), 12 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6e2609bd..dd684cc8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -422,6 +422,7 @@ def get_sample_data(type): *Note: It takes about 15s on a Mac M1 and a ~100K lines corpus of org-mode files* 4. (Optional) Narrow down results further Include/Exclude specific words from results by adding to query + e.g "What is the meaning of life? -god +none" """, "readme.org": """ diff --git a/tests/data/config.yml b/tests/data/config.yml index 2d642a09..bb6736ab 100644 --- a/tests/data/config.yml +++ b/tests/data/config.yml @@ -14,4 +14,4 @@ search-type: asymmetric: cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2 encoder: sentence-transformers/msmarco-MiniLM-L-6-v3 -version: 0.14.0 +version: 0.15.0 diff --git a/tests/data/org/interface_emacs_readme.org b/tests/data/org/interface_emacs_readme.org index 2b74bff7..ef43b3cc 100644 --- a/tests/data/org/interface_emacs_readme.org +++ b/tests/data/org/interface_emacs_readme.org @@ -4,10 +4,9 @@ ** Requirements - Install and Run [[https://github.com/khoj-ai/khoj][khoj]] -** Installation +** Install *** Direct - Put ~khoj.el~ in your Emacs load path. For e.g ~/.emacs.d/lisp - - Load via ~use-package~ in your ~/.emacs.d/init.el or .emacs file by adding below snippet #+begin_src elisp ;; Khoj Package @@ -28,17 +27,10 @@ ** Usage 1. Call ~khoj~ using keybinding ~C-c s~ or ~M-x khoj~ - 2. Enter Query in Natural Language - e.g "What is the meaning of life?" "What are my life goals?" - 3. Wait for results - *Note: It takes about 15s on a Mac M1 and a ~100K lines corpus of org-mode files* - 4. (Optional) Narrow down results further - Include/Exclude specific words from results by adding to query - e.g "What is the meaning of life? -god +none" diff --git a/tests/test_text_search.py b/tests/test_text_search.py index b925a9a9..b4507feb 100644 --- a/tests/test_text_search.py +++ b/tests/test_text_search.py @@ -161,7 +161,7 @@ async def test_text_search(search_config: SearchConfig): default_user, ) - query = "How to git install application?" + query = "Load Khoj on Emacs?" # Act hits = await text_search.query(default_user, query) @@ -170,7 +170,7 @@ async def test_text_search(search_config: SearchConfig): # Assert search_result = results[0].entry - assert "git clone" in search_result, 'search result did not contain "git clone" entry' + assert "Emacs load path" in search_result, 'Expected "Emacs load path" in entry' # ----------------------------------------------------------------------------------------------------