From 35d81de1a1e813f74f30185bc599baaae634584a Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 7 Sep 2022 13:38:15 +0300
Subject: [PATCH 01/19] Update khoj version to 0.1.7 in setup.py

This should have been done right after the 0.1.6 release. To allow
pre-release versions for 0.1.7 published to pypi from master to be
installable. Currently their being published as 0.1.6 pre-release
versions instead
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index bf9f3ce9..8d58ea5c 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ this_directory = Path(__file__).parent
 
 setup(
     name='khoj-assistant',
-    version='0.1.6',
+    version='0.1.7',
     description="A natural language search engine for your personal notes, transactions and images",
     long_description=(this_directory / "Readme.md").read_text(encoding="utf-8"),
     long_description_content_type="text/markdown",

From e4d40e4d4d020a26ef338e39f1e95743bad1c256 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 7 Sep 2022 14:51:03 +0300
Subject: [PATCH 02/19] Update setup.py version, Readme. Remove faulty release
 badge for now

---
 .github/workflows/release.yml | 6 +++---
 Readme.md                     | 6 +++---
 setup.py                      | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index a664f12e..9d642f2b 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1,15 +1,15 @@
 name: release
 
 on:
+  push:
+    tags:
+      - v*
   workflow_dispatch:
     inputs:
       version:
         description: 'Version Number'
         required: true
         type: string
-  push:
-    tags:
-      - v*
 
 jobs:
   publish:
diff --git a/Readme.md b/Readme.md
index 628ce458..301e2369 100644
--- a/Readme.md
+++ b/Readme.md
@@ -2,7 +2,6 @@
 [![build](https://github.com/debanjum/khoj/actions/workflows/build.yml/badge.svg)](https://github.com/debanjum/khoj/actions/workflows/build.yml)
 [![test](https://github.com/debanjum/khoj/actions/workflows/test.yml/badge.svg)](https://github.com/debanjum/khoj/actions/workflows/test.yml)
 [![publish](https://github.com/debanjum/khoj/actions/workflows/publish.yml/badge.svg)](https://github.com/debanjum/khoj/actions/workflows/publish.yml)
-[![release](https://github.com/debanjum/khoj/actions/workflows/release.yml/badge.svg)](https://github.com/debanjum/khoj/actions/workflows/release.yml)
 
 *A natural language search engine for your personal notes, transactions and images*
 
@@ -107,7 +106,7 @@ pip install --upgrade khoj-assistant
 ## Troubleshoot
 
 - Symptom: Errors out complaining about Tensors mismatch, null etc
-  - Mitigation: Disable `image` search on the desktop GUI
+  - Mitigation: Disable `image` search using the desktop GUI
 - Symptom: Errors out with \"Killed\" in error message in Docker
   - Fix: Increase RAM available to Docker Containers in Docker Settings
   - Refer: [StackOverflow Solution](https://stackoverflow.com/a/50770267), [Configure Resources on Docker for Mac](https://docs.docker.com/desktop/mac/#resources)
@@ -125,11 +124,12 @@ pip install --upgrade khoj-assistant
 
 - Semantic search using the bi-encoder is fairly fast at \<50 ms
 - Reranking using the cross-encoder is slower at \<2s on 15 results. Tweak `top_k` to tradeoff speed for accuracy of results
+- Filters in query (e.g by file, word or date) usually add \<20ms to query latency
 
 ### Indexing performance
 
 - Indexing is more strongly impacted by the size of the source data
-- Indexing 100K+ line corpus of notes takes 6 minutes
+- Indexing 100K+ line corpus of notes takes about 10 minutes
 - Indexing 4000+ images takes about 15 minutes and more than 8Gb of RAM
 - Once <https://github.com/debanjum/khoj/issues/36> is implemented, it should only take this long on first run
 
diff --git a/setup.py b/setup.py
index 8d58ea5c..5f197e13 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ this_directory = Path(__file__).parent
 
 setup(
     name='khoj-assistant',
-    version='0.1.7',
+    version='0.1.8',
     description="A natural language search engine for your personal notes, transactions and images",
     long_description=(this_directory / "Readme.md").read_text(encoding="utf-8"),
     long_description_content_type="text/markdown",

From 5d3aeba22fec62e988657dc542e7f46a1a3032bc Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Thu, 8 Sep 2022 10:37:39 +0300
Subject: [PATCH 03/19] Use --no-gui flag on starting Khoj from docker-compose

As the GUI wouldn't work when run from a docker container
---
 docker-compose.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker-compose.yml b/docker-compose.yml
index 022463f2..42f2e617 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -26,4 +26,4 @@ services:
       - ./tests/data/embeddings/:/data/embeddings/
       - ./tests/data/models/:/data/models/
     # Use 0.0.0.0 to explicitly set the host ip for the service on the container. https://pythonspeed.com/articles/docker-connection-refused/
-    command: --host="0.0.0.0" --port=8000 -c=config/khoj_docker.yml -vv
+    command: --no-gui --host="0.0.0.0" --port=8000 -c=config/khoj_docker.yml -vv

From 17354aaffd6d2a943a69ecc7824e690280e2bf2c Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Thu, 8 Sep 2022 10:39:11 +0300
Subject: [PATCH 04/19] Install pyqt system package in Docker image to get qt
 dependencies

Otherwise app start fails with pyqt package import related errors.
See #76 for bug
---
 Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Dockerfile b/Dockerfile
index d38a39af..e1cd9321 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -4,7 +4,7 @@ LABEL org.opencontainers.image.source https://github.com/debanjum/khoj
 
 # Install System Dependencies
 RUN apt-get update -y && \
-    apt-get -y install libimage-exiftool-perl
+    apt-get -y install libimage-exiftool-perl python3-pyqt5
 
 # Copy Application to Container
 COPY . /app

From 26ff66f38b459b59befc58a90e13a7ac82ca4dd2 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Thu, 8 Sep 2022 10:42:34 +0300
Subject: [PATCH 05/19] (Re-)Enable image search via Docker image as image
 search issues fixed

---
 config/khoj_docker.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/config/khoj_docker.yml b/config/khoj_docker.yml
index b1ef7d5f..7e3fe8b4 100644
--- a/config/khoj_docker.yml
+++ b/config/khoj_docker.yml
@@ -20,11 +20,11 @@ content-type:
     compressed-jsonl: /data/embeddings/transactions.jsonl.gz
     embeddings-file: /data/embeddings/transaction_embeddings.pt
 
-#  image:
-#    input-directories: ["/data/images/"]
-#    embeddings-file: "/data/embeddings/image_embeddings.pt"
-#    batch-size: 50
-#    use-xmp-metadata: true
+  image:
+    input-directories: ["/data/images/"]
+    embeddings-file: "/data/embeddings/image_embeddings.pt"
+    batch-size: 50
+    use-xmp-metadata: false
 
   music:
     input-files: ["/data/music/music.org"]

From 3ddffdfba4fb20684953fcb41352771e0d46a015 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Fri, 9 Sep 2022 07:15:28 +0300
Subject: [PATCH 06/19] Create config directory before setting up logging to
 file under it

- The logging to file code expects the config directory to already be setup
- But parent directory of config file was being set up later in code
- This resulted in app start failing with ~/.khoj dir does not exist error
---
 src/main.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/main.py b/src/main.py
index 72a05884..6fbcd035 100644
--- a/src/main.py
+++ b/src/main.py
@@ -63,6 +63,9 @@ def run():
     args = cli(state.cli_args)
     set_state(args)
 
+    # Create app directory, if it doesn't exist
+    state.config_file.parent.mkdir(parents=True, exist_ok=True)
+
     # Setup Logger
     if args.verbose == 0:
         logger.setLevel(logging.WARN)

From 588f5989493c72b3d46d46c584bd459f94524804 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Fri, 9 Sep 2022 07:18:05 +0300
Subject: [PATCH 07/19] Pass empty list of `input_files' to FileBrowser on
 first run

- Default config has `input_files' set to None
- This was being passed to `FileBrowser' on Initialization
- But `FileBrowser' expects `content_files' of list type, not None
- This resulted in an unexpected NoneType failure
---
 src/interface/desktop/main_window.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/interface/desktop/main_window.py b/src/interface/desktop/main_window.py
index 97bbb54a..25287d27 100644
--- a/src/interface/desktop/main_window.py
+++ b/src/interface/desktop/main_window.py
@@ -92,7 +92,7 @@ class MainWindow(QtWidgets.QMainWindow):
         search_type_layout = QtWidgets.QVBoxLayout(search_type_settings)
         enable_search_type = SearchCheckBox(f"Search {search_type.name}", search_type)
         # Add file browser to set input files for given search type
-        input_files = FileBrowser(file_input_text, search_type, current_content_files)
+        input_files = FileBrowser(file_input_text, search_type, current_content_files or [])
 
         # Set enabled/disabled based on checkbox state
         enable_search_type.setChecked(current_content_files is not None and len(current_content_files) > 0)

From 4d776d9c7a3630151f6f749d64d43c6811ceedea Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Fri, 9 Sep 2022 07:50:15 +0300
Subject: [PATCH 08/19] Bump khoj version to 0.1.9

---
 setup.py                    | 2 +-
 src/interface/emacs/khoj.el | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 5f197e13..dc0ba0f3 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@ this_directory = Path(__file__).parent
 
 setup(
     name='khoj-assistant',
-    version='0.1.8',
+    version='0.1.9',
     description="A natural language search engine for your personal notes, transactions and images",
     long_description=(this_directory / "Readme.md").read_text(encoding="utf-8"),
     long_description_content_type="text/markdown",
diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el
index 5ab00d0f..c34fb88e 100644
--- a/src/interface/emacs/khoj.el
+++ b/src/interface/emacs/khoj.el
@@ -5,7 +5,7 @@
 ;; Author: Debanjum Singh Solanky <debanjum@gmail.com>
 ;; Description: Natural, Incremental Search for your Second Brain
 ;; Keywords: search, org-mode, outlines, markdown, beancount, ledger, image
-;; Version: 0.1.6
+;; Version: 0.1.9
 ;; Package-Requires: ((emacs "27.1"))
 ;; URL: http://github.com/debanjum/khoj/interface/emacs
 

From e00bb53336b77f289da203f1e4bec7bbb25b1dd8 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 12:16:53 +0300
Subject: [PATCH 09/19] Init word filter dictionary with default value as set
 to simplify code

---
 src/search_filter/word_filter.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/search_filter/word_filter.py b/src/search_filter/word_filter.py
index c7c5d059..a177ba38 100644
--- a/src/search_filter/word_filter.py
+++ b/src/search_filter/word_filter.py
@@ -3,6 +3,7 @@ import re
 import time
 import pickle
 import logging
+from collections import defaultdict
 
 # Internal Packages
 from src.search_filter.base_filter import BaseFilter
@@ -37,19 +38,18 @@ class WordFilter(BaseFilter):
             start = time.time()
             self.cache = {}  # Clear cache on (re-)generating entries_by_word_set
             entry_splitter = r',|\.| |\]|\[\(|\)|\{|\}|\t|\n|\:'
+            self.word_to_entry_index = defaultdict(set)
             # Create map of words to entries they exist in
             for entry_index, entry in enumerate(entries):
                 for word in re.split(entry_splitter, entry[self.entry_key].lower()):
                     if word == '':
                         continue
-                    if word not in self.word_to_entry_index:
-                        self.word_to_entry_index[word] = set()
                     self.word_to_entry_index[word].add(entry_index)
 
             with self.filter_file.open('wb') as f:
                 pickle.dump(self.word_to_entry_index, f)
             end = time.time()
-            logger.debug(f"Index {self.search_type} for word filter to {self.filter_file}: {end - start} seconds")
+            logger.debug(f"Indexed {len(self.word_to_entry_index)} words of {self.search_type} type for word filter to {self.filter_file}: {end - start} seconds")
 
         return self.word_to_entry_index
 

From d835467f2c8964c4bd439d7b09db5ec79a97c35b Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 13:05:21 +0300
Subject: [PATCH 10/19] Throw exception if no valid entries found in specified
 content files

- Previously we were failing if no valid entries while computing
  embeddings. This was obscuring the actual issue of no valid entries
  found in the specified content files
- Throwing an exception early with clear message when no entries found
  should make clarify the issue to be fixed
- See issue #83 for details
---
 src/search_type/text_search.py |  4 +++-
 tests/test_text_search.py      | 25 ++++++++++++++++++++++++-
 2 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/src/search_type/text_search.py b/src/search_type/text_search.py
index 8666056c..7a80c296 100644
--- a/src/search_type/text_search.py
+++ b/src/search_type/text_search.py
@@ -11,7 +11,7 @@ from src.search_filter.base_filter import BaseFilter
 
 # Internal Packages
 from src.utils import state
-from src.utils.helpers import get_absolute_path, resolve_absolute_path, load_model
+from src.utils.helpers import get_absolute_path, is_none_or_empty, resolve_absolute_path, load_model
 from src.utils.config import TextSearchModel
 from src.utils.rawconfig import TextSearchConfig, TextContentConfig
 from src.utils.jsonl import load_jsonl
@@ -174,6 +174,8 @@ def setup(text_to_jsonl, config: TextContentConfig, search_config: TextSearchCon
 
     # Extract Entries
     entries = extract_entries(config.compressed_jsonl)
+    if is_none_or_empty(entries):
+        raise ValueError(f"No valid entries found in specified files: {config.input_files} or {config.input_filter}")
     top_k = min(len(entries), top_k)  # top_k hits can't be more than the total entries in corpus
 
     # Compute or Load Embeddings
diff --git a/tests/test_text_search.py b/tests/test_text_search.py
index 39fed92e..6a1471c9 100644
--- a/tests/test_text_search.py
+++ b/tests/test_text_search.py
@@ -1,6 +1,10 @@
 # System Packages
+from copy import deepcopy
 from pathlib import Path
 
+# External Packages
+import pytest
+
 # Internal Packages
 from src.utils.state import model
 from src.search_type import text_search
@@ -9,6 +13,25 @@ from src.processor.org_mode.org_to_jsonl import org_to_jsonl
 
 
 # Test
+# ----------------------------------------------------------------------------------------------------
+def test_asymmetric_setup_with_empty_file_raises_error(content_config: ContentConfig, search_config: SearchConfig):
+    # Arrange
+    file_to_index = Path(content_config.org.input_filter).parent / "new_file_to_index.org"
+    file_to_index.touch()
+    new_org_content_config = deepcopy(content_config.org)
+    new_org_content_config.input_files = [f'{file_to_index}']
+    new_org_content_config.input_filter = None
+
+    # Act
+    # Generate notes embeddings during asymmetric setup
+    with pytest.raises(ValueError, match=r'^No valid entries found*'):
+        text_search.setup(org_to_jsonl, new_org_content_config, search_config.asymmetric, regenerate=True)
+
+    # Cleanup
+    # delete created test file
+    file_to_index.unlink()
+
+
 # ----------------------------------------------------------------------------------------------------
 def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchConfig):
     # Act
@@ -23,7 +46,7 @@ def test_asymmetric_setup(content_config: ContentConfig, search_config: SearchCo
 # ----------------------------------------------------------------------------------------------------
 def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig):
     # Arrange
-    model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False)
+    model.notes_search = text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=True)
     query = "How to git install application?"
 
     # Act

From d6bd7bf3e17b7b11e7d98df37e303b4fe14cebdb Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 13:11:58 +0300
Subject: [PATCH 11/19] Fix initializing OrgNode level to string to parse org
 files

- Parsed `level` argument passed to OrgNode during init is expected to
  be a string, not an integer
- This was resulting in app failure only when parsing org files with
  no headings, like in issue #83, as level is set to string of `*`s
  the moment a heading is found in the current file
---
 src/processor/org_mode/orgnode.py |  2 +-
 tests/test_org_to_jsonl.py        | 26 ++++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/src/processor/org_mode/orgnode.py b/src/processor/org_mode/orgnode.py
index 39c67731..f2bc84e6 100644
--- a/src/processor/org_mode/orgnode.py
+++ b/src/processor/org_mode/orgnode.py
@@ -61,7 +61,7 @@ def makelist(filename):
 
    todos         = { "TODO": "", "WAITING": "", "ACTIVE": "",
                      "DONE": "", "CANCELLED": "", "FAILED": ""} # populated from #+SEQ_TODO line
-   level         = 0
+   level         = ""
    heading       = ""
    bodytext      = ""
    tags          = set()      # set of all tags in headline
diff --git a/tests/test_org_to_jsonl.py b/tests/test_org_to_jsonl.py
index 6a626299..f9539b26 100644
--- a/tests/test_org_to_jsonl.py
+++ b/tests/test_org_to_jsonl.py
@@ -15,7 +15,7 @@ def test_entry_with_empty_body_line_to_jsonl(tmp_path):
     :PROPERTIES:
     :ID:       42-42-42
     :END:
-    \t\r\n 
+    \t\r 
     '''
     orgfile = create_file(tmp_path, entry)
 
@@ -37,7 +37,29 @@ def test_entry_with_body_to_jsonl(tmp_path):
     :PROPERTIES:
     :ID:       42-42-42
     :END:
-    \t\r\nBody Line 1\n
+    \t\r
+    Body Line 1
+    '''
+    orgfile = create_file(tmp_path, entry)
+
+    # Act
+    # Extract Entries from specified Org files
+    entries, entry_to_file_map = extract_org_entries(org_files=[orgfile])
+
+    # Process Each Entry from All Notes Files
+    jsonl_string = convert_org_entries_to_jsonl(entries, entry_to_file_map)
+    jsonl_data = [json.loads(json_string) for json_string in jsonl_string.splitlines()]
+
+    # Assert
+    assert len(jsonl_data) == 1
+
+
+def test_file_with_no_headings_to_jsonl(tmp_path):
+    "Ensure files with no heading, only body text are loaded."
+    # Arrange
+    entry = f'''
+    - Bullet point 1
+    - Bullet point 2
     '''
     orgfile = create_file(tmp_path, entry)
 

From 07b98d35f1d5b4665ff3d0fee8eb532a6e682f7a Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 13:18:39 +0300
Subject: [PATCH 12/19] Use filename or #+TITLE as heading for 0th level
 content in org files

- Set LINE, SOURCE link properties in property drawer correctly for
  content which falls under no heading
- See Issue #83 for more details
---
 src/processor/org_mode/orgnode.py | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/src/processor/org_mode/orgnode.py b/src/processor/org_mode/orgnode.py
index f2bc84e6..17c7a9e9 100644
--- a/src/processor/org_mode/orgnode.py
+++ b/src/processor/org_mode/orgnode.py
@@ -73,6 +73,7 @@ def makelist(filename):
    propdict      = dict()
    in_properties_drawer = False
    in_logbook_drawer = False
+   file_title = f'{filename}'
 
    for line in f:
        ctr += 1
@@ -111,6 +112,10 @@ def makelist(filename):
               kwlist = re.findall(r'([A-Z]+)\(', line)
               for kw in kwlist: todos[kw] = ""
 
+           # Set file title to TITLE property, if it exists
+           if line[:8] == "#+TITLE:":
+               file_title = line[8:].strip()
+
            # Ignore Properties Drawers Completely
            if re.search(':PROPERTIES:', line):
               in_properties_drawer=True
@@ -167,7 +172,7 @@ def makelist(filename):
                bodytext = bodytext + line
 
    # write out last node
-   thisNode = Orgnode(level, heading, bodytext, tags)
+   thisNode = Orgnode(level, heading or file_title, bodytext, tags)
    thisNode.setProperties(propdict)
    if sched_date:
       thisNode.setScheduled(sched_date)
@@ -196,8 +201,12 @@ def makelist(filename):
           n.setHeading(prtysrch.group(2))
 
        # Set SOURCE property to a file+heading based org-mode link to the entry
-       escaped_heading = n.Heading().replace("[","\\[").replace("]","\\]")
-       n.properties['SOURCE'] = f'[[file:{normalize_filename(filename)}::*{escaped_heading}]]'
+       if n.Level() == 0:
+         n.properties['LINE'] = f'file:{normalize_filename(filename)}::0'
+         n.properties['SOURCE'] = f'[[file:{normalize_filename(filename)}]]'
+       else:
+         escaped_heading = n.Heading().replace("[","\\[").replace("]","\\]")
+         n.properties['SOURCE'] = f'[[file:{normalize_filename(filename)}::*{escaped_heading}]]'
 
    return nodelist
 

From 11917c6dddcec6ac74d4caa7dbb3952ca1194e4a Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 13:26:03 +0300
Subject: [PATCH 13/19] Do not normalize absolute filenames for creating links
 in OrgNode

---
 src/processor/org_mode/orgnode.py | 9 +++++++--
 tests/test_orgnode.py             | 8 +++-----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/processor/org_mode/orgnode.py b/src/processor/org_mode/orgnode.py
index 17c7a9e9..8d88bf18 100644
--- a/src/processor/org_mode/orgnode.py
+++ b/src/processor/org_mode/orgnode.py
@@ -41,8 +41,13 @@ from os.path import relpath
 indent_regex = re.compile(r'^\s*')
 
 def normalize_filename(filename):
-   file_relative_to_home = f'~/{relpath(filename, start=Path.home())}'
-   escaped_filename = f'{file_relative_to_home}'.replace("[","\[").replace("]","\]")
+   "Normalize and escape filename for rendering"
+   if not Path(filename).is_absolute():
+      # Normalize relative filename to be relative to current directory
+      normalized_filename = f'~/{relpath(filename, start=Path.home())}'
+   else:
+      normalized_filename = filename
+   escaped_filename = f'{normalized_filename}'.replace("[","\[").replace("]","\]")
    return escaped_filename
 
 def makelist(filename):
diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py
index 186eaaec..2db68a0b 100644
--- a/tests/test_orgnode.py
+++ b/tests/test_orgnode.py
@@ -81,18 +81,17 @@ Body Line 1
 Body Line 2
 '''
     orgfile = create_file(tmp_path, entry)
-    normalized_orgfile = f'~/{relpath(orgfile, start=Path.home())}'
 
     # Act
     entries = orgnode.makelist(orgfile)
 
     # Assert
     # SOURCE link rendered with Heading
-    assert f':SOURCE: [[file:{normalized_orgfile}::*{entries[0].Heading()}]]' in f'{entries[0]}'
+    assert f':SOURCE: [[file:{orgfile}::*{entries[0].Heading()}]]' in f'{entries[0]}'
     # ID link rendered with ID
     assert f':ID: id:123-456-789-4234-1231' in f'{entries[0]}'
     # LINE link rendered with line number
-    assert f':LINE: file:{normalized_orgfile}::2' in f'{entries[0]}'
+    assert f':LINE: file:{orgfile}::2' in f'{entries[0]}'
 
 
 # ----------------------------------------------------------------------------------------------------
@@ -115,8 +114,7 @@ Body Line 1'''
     # parsed heading from entry
     assert entries[0].Heading() == "Heading[1]"
     # ensure SOURCE link has square brackets in filename, heading escaped in rendered entries
-    normalized_orgfile = f'~/{relpath(orgfile, start=Path.home())}'
-    escaped_orgfile = f'{normalized_orgfile}'.replace("[1]", "\\[1\\]")
+    escaped_orgfile = f'{orgfile}'.replace("[1]", "\\[1\\]")
     assert f':SOURCE: [[file:{escaped_orgfile}::*Heading\[1\]' in f'{entries[0]}'
 
 

From 2b58218b5637c22c44bf8152eb4dc11b5aef038b Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 14:15:43 +0300
Subject: [PATCH 14/19] Reuse search models across sessions. Merge unused
 pytest fixtures

- Remove unused model_dir pytest fixture. It was only being used by
  the content_config fixture, not by any tests
- Reuse existing search models downloaded to khoj directory.
  Downloading search models for each pytest sessions seems excessive and
  slows down tests quite a bit
---
 tests/conftest.py | 45 ++++++++++++++-------------------------------
 1 file changed, 14 insertions(+), 31 deletions(-)

diff --git a/tests/conftest.py b/tests/conftest.py
index 7545527f..fdb26557 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,6 +4,7 @@ import pytest
 # Internal Packages
 from src.search_type import image_search, text_search
 from src.utils.config import SearchType
+from src.utils.helpers import resolve_absolute_path
 from src.utils.rawconfig import ContentConfig, TextContentConfig, ImageContentConfig, SearchConfig, TextSearchConfig, ImageSearchConfig
 from src.processor.org_mode.org_to_jsonl import org_to_jsonl
 from src.search_filter.date_filter import DateFilter
@@ -12,41 +13,41 @@ from src.search_filter.file_filter import FileFilter
 
 
 @pytest.fixture(scope='session')
-def search_config(tmp_path_factory) -> SearchConfig:
-    model_dir = tmp_path_factory.mktemp('data')
-
+def search_config() -> SearchConfig:
+    model_dir = resolve_absolute_path('~/.khoj/search')
+    model_dir.mkdir(parents=True, exist_ok=True)
     search_config = SearchConfig()
 
     search_config.symmetric = TextSearchConfig(
         encoder = "sentence-transformers/all-MiniLM-L6-v2",
         cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2",
-        model_directory = model_dir
+        model_directory = model_dir / 'symmetric/'
     )
 
     search_config.asymmetric = TextSearchConfig(
         encoder = "sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
         cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2",
-        model_directory = model_dir
+        model_directory = model_dir / 'asymmetric/'
     )
 
     search_config.image = ImageSearchConfig(
         encoder = "sentence-transformers/clip-ViT-B-32",
-        model_directory = model_dir
+        model_directory = model_dir / 'image/'
     )
 
     return search_config
 
 
 @pytest.fixture(scope='session')
-def model_dir(search_config: SearchConfig):
-    model_dir = search_config.asymmetric.model_directory
+def content_config(tmp_path_factory, search_config: SearchConfig):
+    content_dir = tmp_path_factory.mktemp('content')
 
     # Generate Image Embeddings from Test Images
     content_config = ContentConfig()
     content_config.image = ImageContentConfig(
         input_directories = ['tests/data/images'],
-        embeddings_file = model_dir.joinpath('image_embeddings.pt'),
-        batch_size = 10,
+        embeddings_file = content_dir.joinpath('image_embeddings.pt'),
+        batch_size = 1,
         use_xmp_metadata = False)
 
     image_search.setup(content_config.image, search_config.image, regenerate=False)
@@ -55,28 +56,10 @@ def model_dir(search_config: SearchConfig):
     content_config.org = TextContentConfig(
         input_files = None,
         input_filter = 'tests/data/org/*.org',
-        compressed_jsonl = model_dir.joinpath('notes.jsonl.gz'),
-        embeddings_file = model_dir.joinpath('note_embeddings.pt'))
+        compressed_jsonl = content_dir.joinpath('notes.jsonl.gz'),
+        embeddings_file = content_dir.joinpath('note_embeddings.pt'))
 
-    filters = [DateFilter(), WordFilter(model_dir, search_type=SearchType.Org), FileFilter()]
+    filters = [DateFilter(), WordFilter(content_dir, search_type=SearchType.Org), FileFilter()]
     text_search.setup(org_to_jsonl, content_config.org, search_config.asymmetric, regenerate=False, filters=filters)
 
-    return model_dir
-
-
-@pytest.fixture(scope='session')
-def content_config(model_dir) -> ContentConfig:
-    content_config = ContentConfig()
-    content_config.org = TextContentConfig(
-        input_files = None,
-        input_filter = 'tests/data/org/*.org',
-        compressed_jsonl = model_dir.joinpath('notes.jsonl.gz'),
-        embeddings_file = model_dir.joinpath('note_embeddings.pt'))
-
-    content_config.image = ImageContentConfig(
-        input_directories = ['tests/data/images'],
-        embeddings_file = model_dir.joinpath('image_embeddings.pt'),
-        batch_size = 1,
-        use_xmp_metadata = False)
-
     return content_config
\ No newline at end of file

From 976397bd82fde0852ed6509624312297ce053fb5 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 15:22:26 +0300
Subject: [PATCH 15/19] Ignore empty #+TITLE, merge multiple #+TITLE for 0th
 level headings

---
 src/processor/org_mode/orgnode.py | 10 +++-
 tests/test_orgnode.py             | 94 ++++++++++++++++++++++++++++++-
 2 files changed, 101 insertions(+), 3 deletions(-)

diff --git a/src/processor/org_mode/orgnode.py b/src/processor/org_mode/orgnode.py
index 8d88bf18..f5c55e7f 100644
--- a/src/processor/org_mode/orgnode.py
+++ b/src/processor/org_mode/orgnode.py
@@ -118,8 +118,14 @@ def makelist(filename):
               for kw in kwlist: todos[kw] = ""
 
            # Set file title to TITLE property, if it exists
-           if line[:8] == "#+TITLE:":
-               file_title = line[8:].strip()
+           title_search = re.search(r'^#\+TITLE:\s*(.*)$', line)
+           if title_search and title_search.group(1).strip() != '':
+               title_text = title_search.group(1).strip()
+               if file_title == f'{filename}':
+                  file_title = title_text
+               else:
+                  file_title += f' {title_text}'
+               continue
 
            # Ignore Properties Drawers Completely
            if re.search(':PROPERTIES:', line):
diff --git a/tests/test_orgnode.py b/tests/test_orgnode.py
index 2db68a0b..eebf7f8a 100644
--- a/tests/test_orgnode.py
+++ b/tests/test_orgnode.py
@@ -8,6 +8,28 @@ from src.processor.org_mode import orgnode
 
 
 # Test
+# ----------------------------------------------------------------------------------------------------
+def test_parse_entry_with_no_headings(tmp_path):
+    "Test parsing of entry with minimal fields"
+    # Arrange
+    entry = f'''Body Line 1'''
+    orgfile = create_file(tmp_path, entry)
+
+    # Act
+    entries = orgnode.makelist(orgfile)
+
+    # Assert
+    assert len(entries) == 1
+    assert entries[0].Heading() == f'{orgfile}'
+    assert entries[0].Tags() == set()
+    assert entries[0].Body() == "Body Line 1"
+    assert entries[0].Priority() == ""
+    assert entries[0].Property("ID") == ""
+    assert entries[0].Closed() == ""
+    assert entries[0].Scheduled() == ""
+    assert entries[0].Deadline() == ""
+
+
 # ----------------------------------------------------------------------------------------------------
 def test_parse_minimal_entry(tmp_path):
     "Test parsing of entry with minimal fields"
@@ -166,10 +188,80 @@ Body 2
         assert entry.Logbook() == [(datetime.datetime(1984,4,index+1,9,0,0), datetime.datetime(1984,4,index+1,12,0,0))]
 
 
+# ----------------------------------------------------------------------------------------------------
+def test_parse_entry_with_empty_title(tmp_path):
+    "Test parsing of entry with minimal fields"
+    # Arrange
+    entry = f'''#+TITLE: 
+Body Line 1'''
+    orgfile = create_file(tmp_path, entry)
+
+    # Act
+    entries = orgnode.makelist(orgfile)
+
+    # Assert
+    assert len(entries) == 1
+    assert entries[0].Heading() == f'{orgfile}'
+    assert entries[0].Tags() == set()
+    assert entries[0].Body() == "Body Line 1"
+    assert entries[0].Priority() == ""
+    assert entries[0].Property("ID") == ""
+    assert entries[0].Closed() == ""
+    assert entries[0].Scheduled() == ""
+    assert entries[0].Deadline() == ""
+
+
+# ----------------------------------------------------------------------------------------------------
+def test_parse_entry_with_title_and_no_headings(tmp_path):
+    "Test parsing of entry with minimal fields"
+    # Arrange
+    entry = f'''#+TITLE: test
+Body Line 1'''
+    orgfile = create_file(tmp_path, entry)
+
+    # Act
+    entries = orgnode.makelist(orgfile)
+
+    # Assert
+    assert len(entries) == 1
+    assert entries[0].Heading() == 'test'
+    assert entries[0].Tags() == set()
+    assert entries[0].Body() == "Body Line 1"
+    assert entries[0].Priority() == ""
+    assert entries[0].Property("ID") == ""
+    assert entries[0].Closed() == ""
+    assert entries[0].Scheduled() == ""
+    assert entries[0].Deadline() == ""
+
+
+# ----------------------------------------------------------------------------------------------------
+def test_parse_entry_with_multiple_titles_and_no_headings(tmp_path):
+    "Test parsing of entry with minimal fields"
+    # Arrange
+    entry = f'''#+TITLE: title1 
+Body Line 1
+#+TITLE:  title2  '''
+    orgfile = create_file(tmp_path, entry)
+
+    # Act
+    entries = orgnode.makelist(orgfile)
+
+    # Assert
+    assert len(entries) == 1
+    assert entries[0].Heading() == 'title1 title2'
+    assert entries[0].Tags() == set()
+    assert entries[0].Body() == "Body Line 1\n"
+    assert entries[0].Priority() == ""
+    assert entries[0].Property("ID") == ""
+    assert entries[0].Closed() == ""
+    assert entries[0].Scheduled() == ""
+    assert entries[0].Deadline() == ""
+
+
 # Helper Functions
 def create_file(tmp_path, entry, filename="test.org"):
     org_file = tmp_path / f"notes/{filename}"
     org_file.parent.mkdir()
     org_file.touch()
     org_file.write_text(entry)
-    return org_file
\ No newline at end of file
+    return org_file

From 1ac6a71ff0180f1369ef2b2e1bb6706ddd96d4ee Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 16:30:58 +0300
Subject: [PATCH 16/19] Add --version flag to show installed version of khoj

---
 src/utils/cli.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/utils/cli.py b/src/utils/cli.py
index a140bb46..1f66a757 100644
--- a/src/utils/cli.py
+++ b/src/utils/cli.py
@@ -1,6 +1,7 @@
 # Standard Packages
 import argparse
 import pathlib
+from importlib.metadata import version
 
 # Internal Packages
 from src.utils.helpers import resolve_absolute_path
@@ -17,9 +18,15 @@ def cli(args=None):
     parser.add_argument('--host', type=str, default='127.0.0.1', help="Host address of the server. Default: 127.0.0.1")
     parser.add_argument('--port', '-p', type=int, default=8000, help="Port of the server. Default: 8000")
     parser.add_argument('--socket', type=pathlib.Path, help="Path to UNIX socket for server. Use to run server behind reverse proxy. Default: /tmp/uvicorn.sock")
+    parser.add_argument('--version', '-V', action='store_true', help="Print the installed Khoj version and exit")
 
     args = parser.parse_args(args)
 
+    if args.version:
+        # Show version of khoj installed and exit
+        print(version('khoj-assistant'))
+        exit(0)
+
     # Normalize config_file path to absolute path
     args.config_file = resolve_absolute_path(args.config_file)
 

From ec675d27d384a8d5090c7caebcaad306318d8a4c Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 16:40:22 +0300
Subject: [PATCH 17/19] Suppress non-actionable HuggingFace FutureWarning shown
 on app start

---
 src/main.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/main.py b/src/main.py
index 6fbcd035..e7dbba5d 100644
--- a/src/main.py
+++ b/src/main.py
@@ -3,8 +3,12 @@ import os
 import signal
 import sys
 import logging
+import warnings
 from platform import system
 
+# Ignore non-actionable warnings
+warnings.filterwarnings("ignore", message=r'snapshot_download.py has been made private', category=FutureWarning)
+
 # External Packages
 import uvicorn
 from fastapi import FastAPI

From f97308bef210e0a3a71ff6c97d6466d0f3f6a70c Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sat, 10 Sep 2022 21:40:08 +0300
Subject: [PATCH 18/19] Fix log message on writing JSONL data to file

---
 src/utils/jsonl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/utils/jsonl.py b/src/utils/jsonl.py
index 91eac555..77b5af11 100644
--- a/src/utils/jsonl.py
+++ b/src/utils/jsonl.py
@@ -44,7 +44,7 @@ def dump_jsonl(jsonl_data, output_path):
     with open(output_path, 'w', encoding='utf-8') as f:
         f.write(jsonl_data)
 
-    logger.info(f'Wrote {len(jsonl_data)} lines to jsonl at {output_path}')
+    logger.info(f'Wrote jsonl data to {output_path}')
 
 
 def compress_jsonl_data(jsonl_data, output_path):

From ed8d432fdd4a6a55d48a30e5b179a101798a1d27 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 7 Sep 2022 03:07:30 +0300
Subject: [PATCH 19/19] Clean-up generated file after image search test run

- Clean-up unused imports in test files
---
 tests/test_cli.py          | 3 ---
 tests/test_image_search.py | 7 ++++++-
 tests/test_org_to_jsonl.py | 1 -
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 4cbf1209..3c99f424 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -2,9 +2,6 @@
 from pathlib import Path
 from random import random
 
-# External Modules
-import pytest
-
 # Internal Packages
 from src.utils.cli import cli
 from src.utils.helpers import resolve_absolute_path
diff --git a/tests/test_image_search.py b/tests/test_image_search.py
index 80c4fdf6..ad374da1 100644
--- a/tests/test_image_search.py
+++ b/tests/test_image_search.py
@@ -48,8 +48,13 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
             image_files_url='/static/images',
             count=1)
 
-        actual_image = Image.open(output_directory.joinpath(Path(results[0]["entry"]).name))
+        actual_image_path = output_directory.joinpath(Path(results[0]["entry"]).name)
+        actual_image = Image.open(actual_image_path)
         expected_image = Image.open(content_config.image.input_directories[0].joinpath(expected_image_name))
 
         # Assert
         assert expected_image == actual_image
+
+    # Cleanup
+    # Delete the image files copied to results directory
+    actual_image_path.unlink()
diff --git a/tests/test_org_to_jsonl.py b/tests/test_org_to_jsonl.py
index f9539b26..39b3e91f 100644
--- a/tests/test_org_to_jsonl.py
+++ b/tests/test_org_to_jsonl.py
@@ -1,6 +1,5 @@
 # Standard Packages
 import json
-from posixpath import split
 
 # Internal Packages
 from src.processor.org_mode.org_to_jsonl import convert_org_entries_to_jsonl, extract_org_entries