From 285d17af2add96208e2aa39c6053bd2e301736f1 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 6 Jun 2023 19:28:54 +0530
Subject: [PATCH 01/21] Search in parallel across all enabled content types
 requested via API

- Update API to return content from all enabled content types when type
  is not set to specific type in HTTP request param
- To do this efficiently run the search queries in parallel threads
---
 src/khoj/routers/api.py | 211 ++++++++++++++++++++++------------------
 1 file changed, 114 insertions(+), 97 deletions(-)

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index f7658caa..93fa0fda 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -1,4 +1,6 @@
 # Standard Packages
+from collections import defaultdict
+import concurrent.futures
 import math
 import yaml
 import logging
@@ -121,6 +123,7 @@ def search(
     user_query = q.strip()
     results_count = n
     score_threshold = score_threshold if score_threshold is not None else -math.inf
+    search_futures = defaultdict(list)
 
     # return cached results, if available
     query_cache_key = f"{user_query}-{n}-{t}-{r}-{score_threshold}-{dedupe}"
@@ -128,105 +131,119 @@ def search(
         logger.debug(f"Return response from query cache")
         return state.query_cache[query_cache_key]
 
-    if (t == SearchType.Org or t == None) and state.model.org_search:
-        # query org-mode notes
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        if (t == SearchType.Org or t == None) and state.model.org_search:
+            # query org-mode notes
+            search_futures[t] += [
+                executor.submit(
+                    text_search.query,
+                    user_query,
+                    state.model.org_search,
+                    rank_results=r,
+                    score_threshold=score_threshold,
+                    dedupe=dedupe,
+                )
+            ]
+
+        if (t == SearchType.Markdown or t == None) and state.model.markdown_search:
+            # query markdown notes
+            search_futures[t] += [
+                executor.submit(
+                    text_search.query,
+                    user_query,
+                    state.model.markdown_search,
+                    rank_results=r,
+                    score_threshold=score_threshold,
+                    dedupe=dedupe,
+                )
+            ]
+
+        if (t == SearchType.Pdf or t == None) and state.model.pdf_search:
+            # query pdf files
+            search_futures[t] += [
+                executor.submit(
+                    text_search.query,
+                    user_query,
+                    state.model.pdf_search,
+                    rank_results=r,
+                    score_threshold=score_threshold,
+                    dedupe=dedupe,
+                )
+            ]
+
+        if (t == SearchType.Ledger or t == None) and state.model.ledger_search:
+            # query transactions
+            search_futures[t] += [
+                executor.submit(
+                    text_search.query,
+                    user_query,
+                    state.model.ledger_search,
+                    rank_results=r,
+                    score_threshold=score_threshold,
+                    dedupe=dedupe,
+                )
+            ]
+
+        if (t == SearchType.Music or t == None) and state.model.music_search:
+            # query music library
+            search_futures[t] += [
+                executor.submit(
+                    text_search.query,
+                    user_query,
+                    state.model.music_search,
+                    rank_results=r,
+                    score_threshold=score_threshold,
+                    dedupe=dedupe,
+                )
+            ]
+
+        if (t == SearchType.Image) and state.model.image_search:
+            # query images
+            search_futures[t] += [
+                executor.submit(
+                    image_search.query,
+                    user_query,
+                    results_count,
+                    state.model.image_search,
+                    score_threshold=score_threshold,
+                )
+            ]
+
+        if (t is None or t in SearchType) and state.model.plugin_search:
+            # query specified plugin type
+            search_future[t] += [
+                executor.submit(
+                    text_search.query,
+                    user_query,
+                    # Get plugin search model for specified search type, or the first one if none specified
+                    state.model.plugin_search.get(t.value) or next(iter(state.model.plugin_search.values())),
+                    rank_results=r,
+                    score_threshold=score_threshold,
+                    dedupe=dedupe,
+                )
+            ]
+
+        # Query across each requested content types in parallel
         with timer("Query took", logger):
-            hits, entries = text_search.query(
-                user_query, state.model.org_search, rank_results=r, score_threshold=score_threshold, dedupe=dedupe
-            )
+            for search_future in search_futures[t]:
+                if t == SearchType.Image:
+                    hits = search_futures.result()
+                    output_directory = constants.web_directory / "images"
+                    # Collate results
+                    results += image_search.collate_results(
+                        hits,
+                        image_names=state.model.image_search.image_names,
+                        output_directory=output_directory,
+                        image_files_url="/static/images",
+                        count=results_count,
+                    )
+                else:
+                    hits, entries = search_future.result()
+                    # Collate results
+                    results += text_search.collate_results(hits, entries, results_count)
 
-        # collate and return results
-        with timer("Collating results took", logger):
-            results = text_search.collate_results(hits, entries, results_count)
-
-    elif (t == SearchType.Markdown or t == None) and state.model.markdown_search:
-        # query markdown files
-        with timer("Query took", logger):
-            hits, entries = text_search.query(
-                user_query, state.model.markdown_search, rank_results=r, score_threshold=score_threshold, dedupe=dedupe
-            )
-
-        # collate and return results
-        with timer("Collating results took", logger):
-            results = text_search.collate_results(hits, entries, results_count)
-
-    elif (t == SearchType.Pdf or t == None) and state.model.pdf_search:
-        # query pdf files
-        with timer("Query took", logger):
-            hits, entries = text_search.query(
-                user_query, state.model.pdf_search, rank_results=r, score_threshold=score_threshold, dedupe=dedupe
-            )
-
-        # collate and return results
-        with timer("Collating results took", logger):
-            results = text_search.collate_results(hits, entries, results_count)
-
-    elif (t == SearchType.Github or t == None) and state.model.github_search:
-        # query github embeddings
-        with timer("Query took", logger):
-            hits, entries = text_search.query(
-                user_query, state.model.github_search, rank_results=r, score_threshold=score_threshold, dedupe=dedupe
-            )
-
-        # collate and return results
-        with timer("Collating results took", logger):
-            results = text_search.collate_results(hits, entries, results_count)
-
-    elif (t == SearchType.Ledger or t == None) and state.model.ledger_search:
-        # query transactions
-        with timer("Query took", logger):
-            hits, entries = text_search.query(
-                user_query, state.model.ledger_search, rank_results=r, score_threshold=score_threshold, dedupe=dedupe
-            )
-
-        # collate and return results
-        with timer("Collating results took", logger):
-            results = text_search.collate_results(hits, entries, results_count)
-
-    elif (t == SearchType.Music or t == None) and state.model.music_search:
-        # query music library
-        with timer("Query took", logger):
-            hits, entries = text_search.query(
-                user_query, state.model.music_search, rank_results=r, score_threshold=score_threshold, dedupe=dedupe
-            )
-
-        # collate and return results
-        with timer("Collating results took", logger):
-            results = text_search.collate_results(hits, entries, results_count)
-
-    elif (t == SearchType.Image or t == None) and state.model.image_search:
-        # query images
-        with timer("Query took", logger):
-            hits = image_search.query(
-                user_query, results_count, state.model.image_search, score_threshold=score_threshold
-            )
-            output_directory = constants.web_directory / "images"
-
-        # collate and return results
-        with timer("Collating results took", logger):
-            results = image_search.collate_results(
-                hits,
-                image_names=state.model.image_search.image_names,
-                output_directory=output_directory,
-                image_files_url="/static/images",
-                count=results_count,
-            )
-
-    elif (t in SearchType or t == None) and state.model.plugin_search:
-        # query specified plugin type
-        with timer("Query took", logger):
-            hits, entries = text_search.query(
-                user_query,
-                # Get plugin search model for specified search type, or the first one if none specified
-                state.model.plugin_search.get(t.value) or next(iter(state.model.plugin_search.values())),
-                rank_results=r,
-                score_threshold=score_threshold,
-                dedupe=dedupe,
-            )
-
-        # collate and return results
-        with timer("Collating results took", logger):
-            results = text_search.collate_results(hits, entries, results_count)
+            # Sort results across all content types
+            results.sort(key=lambda x: float(x.score), reverse=True)
 
     # Cache results
     state.query_cache[query_cache_key] = results

From db07362ca31be3d3dbf72a7b9ac3196c65b42d06 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Thu, 8 Jun 2023 13:37:19 +0530
Subject: [PATCH 02/21] Encode user query as same across search types to speed
 up query time

- Add new filter abstract method to remove filter terms from query
- Use the filter method to remove filter terms, encode this defiltered
  query and pass it to the query methods of each search types

TODO: Encoding query is still taking 100-200 ms unlike before. Need to
investigate why
---
 src/khoj/routers/api.py               | 24 ++++++++++++++++++++++++
 src/khoj/search_filter/base_filter.py |  4 ++++
 src/khoj/search_filter/date_filter.py | 10 +++++++---
 src/khoj/search_filter/file_filter.py |  7 ++++++-
 src/khoj/search_filter/word_filter.py |  5 ++++-
 src/khoj/search_type/text_search.py   |  8 +++++---
 6 files changed, 50 insertions(+), 8 deletions(-)

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 93fa0fda..35216343 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -10,12 +10,16 @@ from typing import List, Optional, Union
 # External Packages
 from fastapi import APIRouter
 from fastapi import HTTPException
+from sentence_transformers import util
 
 # Internal Packages
 from khoj.configure import configure_processor, configure_search
 from khoj.processor.conversation.gpt import converse, extract_questions
 from khoj.processor.conversation.utils import message_to_log, message_to_prompt
 from khoj.search_type import image_search, text_search
+from khoj.search_filter.date_filter import DateFilter
+from khoj.search_filter.file_filter import FileFilter
+from khoj.search_filter.word_filter import WordFilter
 from khoj.utils.helpers import log_telemetry, timer
 from khoj.utils.rawconfig import (
     FullConfig,
@@ -131,6 +135,20 @@ def search(
         logger.debug(f"Return response from query cache")
         return state.query_cache[query_cache_key]
 
+    # Encode query with filter terms removed
+    for filter in [DateFilter(), WordFilter(), FileFilter()]:
+        defiltered_query = filter.defilter(user_query)
+
+    encoded_asymmetric_query = state.model.org_search.bi_encoder.encode(
+        [defiltered_query], convert_to_tensor=True, device=state.device
+    )
+    encoded_asymmetric_query = util.normalize_embeddings(encoded_asymmetric_query)
+
+    encoded_symmetric_query = state.model.org_search.bi_encoder.encode(
+        [defiltered_query], convert_to_tensor=True, device=state.device
+    )
+    encoded_symmetric_query = util.normalize_embeddings(encoded_symmetric_query)
+
     with concurrent.futures.ThreadPoolExecutor() as executor:
         if (t == SearchType.Org or t == None) and state.model.org_search:
             # query org-mode notes
@@ -139,6 +157,7 @@ def search(
                     text_search.query,
                     user_query,
                     state.model.org_search,
+                    question_embedding=encoded_asymmetric_query,
                     rank_results=r,
                     score_threshold=score_threshold,
                     dedupe=dedupe,
@@ -152,6 +171,7 @@ def search(
                     text_search.query,
                     user_query,
                     state.model.markdown_search,
+                    question_embedding=encoded_asymmetric_query,
                     rank_results=r,
                     score_threshold=score_threshold,
                     dedupe=dedupe,
@@ -165,6 +185,7 @@ def search(
                     text_search.query,
                     user_query,
                     state.model.pdf_search,
+                    question_embedding=encoded_asymmetric_query,
                     rank_results=r,
                     score_threshold=score_threshold,
                     dedupe=dedupe,
@@ -178,6 +199,7 @@ def search(
                     text_search.query,
                     user_query,
                     state.model.ledger_search,
+                    question_embedding=encoded_symmetric_query,
                     rank_results=r,
                     score_threshold=score_threshold,
                     dedupe=dedupe,
@@ -191,6 +213,7 @@ def search(
                     text_search.query,
                     user_query,
                     state.model.music_search,
+                    question_embedding=encoded_asymmetric_query,
                     rank_results=r,
                     score_threshold=score_threshold,
                     dedupe=dedupe,
@@ -217,6 +240,7 @@ def search(
                     user_query,
                     # Get plugin search model for specified search type, or the first one if none specified
                     state.model.plugin_search.get(t.value) or next(iter(state.model.plugin_search.values())),
+                    question_embedding=encoded_asymmetric_query,
                     rank_results=r,
                     score_threshold=score_threshold,
                     dedupe=dedupe,
diff --git a/src/khoj/search_filter/base_filter.py b/src/khoj/search_filter/base_filter.py
index c273f9b8..aa4fa2e4 100644
--- a/src/khoj/search_filter/base_filter.py
+++ b/src/khoj/search_filter/base_filter.py
@@ -18,3 +18,7 @@ class BaseFilter(ABC):
     @abstractmethod
     def apply(self, query: str, entries: List[Entry]) -> Tuple[str, Set[int]]:
         ...
+
+    @abstractmethod
+    def defilter(self, query: str) -> str:
+        ...
diff --git a/src/khoj/search_filter/date_filter.py b/src/khoj/search_filter/date_filter.py
index 36dc7974..be07eefd 100644
--- a/src/khoj/search_filter/date_filter.py
+++ b/src/khoj/search_filter/date_filter.py
@@ -49,6 +49,12 @@ class DateFilter(BaseFilter):
         "Check if query contains date filters"
         return self.extract_date_range(raw_query) is not None
 
+    def defilter(self, query):
+        # remove date range filter from query
+        query = re.sub(rf"\s+{self.date_regex}", " ", query)
+        query = re.sub(r"\s{2,}", " ", query).strip()  # remove multiple spaces
+        return query
+
     def apply(self, query, entries):
         "Find entries containing any dates that fall within date range specified in query"
         # extract date range specified in date filter of query
@@ -59,9 +65,7 @@ class DateFilter(BaseFilter):
         if query_daterange is None:
             return query, set(range(len(entries)))
 
-        # remove date range filter from query
-        query = re.sub(rf"\s+{self.date_regex}", " ", query)
-        query = re.sub(r"\s{2,}", " ", query).strip()  # remove multiple spaces
+        query = self.defilter(query)
 
         # return results from cache if exists
         cache_key = tuple(query_daterange)
diff --git a/src/khoj/search_filter/file_filter.py b/src/khoj/search_filter/file_filter.py
index 28610796..26f416fe 100644
--- a/src/khoj/search_filter/file_filter.py
+++ b/src/khoj/search_filter/file_filter.py
@@ -28,6 +28,9 @@ class FileFilter(BaseFilter):
     def can_filter(self, raw_query):
         return re.search(self.file_filter_regex, raw_query) is not None
 
+    def defilter(self, query: str) -> str:
+        return re.sub(self.file_filter_regex, "", query).strip()
+
     def apply(self, query, entries):
         # Extract file filters from raw query
         with timer("Extract files_to_search from query", logger):
@@ -44,8 +47,10 @@ class FileFilter(BaseFilter):
                 else:
                     files_to_search += [file]
 
+        # Remove filter terms from original query
+        query = self.defilter(query)
+
         # Return item from cache if exists
-        query = re.sub(self.file_filter_regex, "", query).strip()
         cache_key = tuple(files_to_search)
         if cache_key in self.cache:
             logger.debug(f"Return file filter results from cache")
diff --git a/src/khoj/search_filter/word_filter.py b/src/khoj/search_filter/word_filter.py
index 9ee81b21..9c98e848 100644
--- a/src/khoj/search_filter/word_filter.py
+++ b/src/khoj/search_filter/word_filter.py
@@ -43,13 +43,16 @@ class WordFilter(BaseFilter):
 
         return len(required_words) != 0 or len(blocked_words) != 0
 
+    def defilter(self, query: str) -> str:
+        return re.sub(self.blocked_regex, "", re.sub(self.required_regex, "", query)).strip()
+
     def apply(self, query, entries):
         "Find entries containing required and not blocked words specified in query"
         # Separate natural query from required, blocked words filters
         with timer("Extract required, blocked filters from query", logger):
             required_words = set([word.lower() for word in re.findall(self.required_regex, query)])
             blocked_words = set([word.lower() for word in re.findall(self.blocked_regex, query)])
-            query = re.sub(self.blocked_regex, "", re.sub(self.required_regex, "", query)).strip()
+            query = self.defilter(query)
 
         if len(required_words) == 0 and len(blocked_words) == 0:
             return query, set(range(len(entries)))
diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py
index 9d8d5c3a..96ffac7a 100644
--- a/src/khoj/search_type/text_search.py
+++ b/src/khoj/search_type/text_search.py
@@ -105,6 +105,7 @@ def compute_embeddings(
 def query(
     raw_query: str,
     model: TextSearchModel,
+    question_embedding: torch.Tensor = None,
     rank_results: bool = False,
     score_threshold: float = -math.inf,
     dedupe: bool = True,
@@ -124,9 +125,10 @@ def query(
         return hits, entries
 
     # Encode the query using the bi-encoder
-    with timer("Query Encode Time", logger, state.device):
-        question_embedding = model.bi_encoder.encode([query], convert_to_tensor=True, device=state.device)
-        question_embedding = util.normalize_embeddings(question_embedding)
+    if question_embedding is None:
+        with timer("Query Encode Time", logger, state.device):
+            question_embedding = model.bi_encoder.encode([query], convert_to_tensor=True, device=state.device)
+            question_embedding = util.normalize_embeddings(question_embedding)
 
     # Find relevant entries for the query
     with timer("Search Time", logger, state.device):

From 6d94d6e75a28a7becf2c947f81a8255db0cf67dc Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 20 Jun 2023 01:17:21 -0700
Subject: [PATCH 03/21] Encode the asymmetric, symmetric search queries in
 parallel for speed

Use timer to measure time to encode queries and total search time
---
 src/khoj/routers/api.py | 33 +++++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 8 deletions(-)

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 35216343..a1aef1a1 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -2,6 +2,7 @@
 from collections import defaultdict
 import concurrent.futures
 import math
+import time
 import yaml
 import logging
 from datetime import datetime
@@ -118,6 +119,8 @@ def search(
     dedupe: Optional[bool] = True,
     client: Optional[str] = None,
 ):
+    start_time = time.time()
+
     results: List[SearchResponse] = []
     if q is None or q == "":
         logger.warn(f"No query param (q) passed in API call to initiate search")
@@ -139,15 +142,26 @@ def search(
     for filter in [DateFilter(), WordFilter(), FileFilter()]:
         defiltered_query = filter.defilter(user_query)
 
-    encoded_asymmetric_query = state.model.org_search.bi_encoder.encode(
-        [defiltered_query], convert_to_tensor=True, device=state.device
-    )
-    encoded_asymmetric_query = util.normalize_embeddings(encoded_asymmetric_query)
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        with timer("Encoding query for asymmetric search took", logger=logger):
+            encode_asymmetric_futures = executor.submit(
+                state.model.org_search.bi_encoder.encode,
+                [defiltered_query],
+                convert_to_tensor=True,
+                device=state.device,
+            )
 
-    encoded_symmetric_query = state.model.org_search.bi_encoder.encode(
-        [defiltered_query], convert_to_tensor=True, device=state.device
-    )
-    encoded_symmetric_query = util.normalize_embeddings(encoded_symmetric_query)
+        with timer("Encoding query for symmetric search took", logger=logger):
+            encode_symmetric_futures = executor.submit(
+                state.model.org_search.bi_encoder.encode,
+                [defiltered_query],
+                convert_to_tensor=True,
+                device=state.device,
+            )
+
+        with timer("Normalizing query embeddings took", logger=logger):
+            encoded_asymmetric_query = util.normalize_embeddings(encode_asymmetric_futures.result())
+            encoded_symmetric_query = util.normalize_embeddings(encode_symmetric_futures.result())
 
     with concurrent.futures.ThreadPoolExecutor() as executor:
         if (t == SearchType.Org or t == None) and state.model.org_search:
@@ -279,6 +293,9 @@ def search(
         ]
     state.previous_query = user_query
 
+    end_time = time.time()
+    logger.debug(f"🔍 Search took {end_time - start_time:.2f} seconds")
+
     return results
 
 

From 0144e610d619292fd3e1f101f1927cd7cde73e22 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 20 Jun 2023 02:28:51 -0700
Subject: [PATCH 04/21] Only search across content types that work with
 asymmetric search

---
 src/khoj/routers/api.py | 33 +++++++++++----------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index a1aef1a1..7217df9c 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -142,27 +142,17 @@ def search(
     for filter in [DateFilter(), WordFilter(), FileFilter()]:
         defiltered_query = filter.defilter(user_query)
 
-    with concurrent.futures.ThreadPoolExecutor() as executor:
-        with timer("Encoding query for asymmetric search took", logger=logger):
-            encode_asymmetric_futures = executor.submit(
-                state.model.org_search.bi_encoder.encode,
-                [defiltered_query],
-                convert_to_tensor=True,
-                device=state.device,
+    encoded_asymmetric_query = None
+    if t == None or (t != SearchType.Ledger and t != SearchType.Image):
+        with timer("Encoding query took", logger=logger):
+            encoded_asymmetric_query = util.normalize_embeddings(
+                state.model.org_search.bi_encoder.encode(
+                    [defiltered_query],
+                    convert_to_tensor=True,
+                    device=state.device,
+                )
             )
 
-        with timer("Encoding query for symmetric search took", logger=logger):
-            encode_symmetric_futures = executor.submit(
-                state.model.org_search.bi_encoder.encode,
-                [defiltered_query],
-                convert_to_tensor=True,
-                device=state.device,
-            )
-
-        with timer("Normalizing query embeddings took", logger=logger):
-            encoded_asymmetric_query = util.normalize_embeddings(encode_asymmetric_futures.result())
-            encoded_symmetric_query = util.normalize_embeddings(encode_symmetric_futures.result())
-
     with concurrent.futures.ThreadPoolExecutor() as executor:
         if (t == SearchType.Org or t == None) and state.model.org_search:
             # query org-mode notes
@@ -206,14 +196,13 @@ def search(
                 )
             ]
 
-        if (t == SearchType.Ledger or t == None) and state.model.ledger_search:
+        if (t == SearchType.Ledger) and state.model.ledger_search:
             # query transactions
             search_futures[t] += [
                 executor.submit(
                     text_search.query,
                     user_query,
                     state.model.ledger_search,
-                    question_embedding=encoded_symmetric_query,
                     rank_results=r,
                     score_threshold=score_threshold,
                     dedupe=dedupe,
@@ -294,7 +283,7 @@ def search(
     state.previous_query = user_query
 
     end_time = time.time()
-    logger.debug(f"🔍 Search took {end_time - start_time:.2f} seconds")
+    logger.debug(f"🔍 Search took: {end_time - start_time:.2f} seconds")
 
     return results
 

From 1192e49307e0b2f1172f88a57f0466812a08d762 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 20 Jun 2023 19:51:33 -0700
Subject: [PATCH 05/21] Pass default value matching argument types expected by
 text_search methods

---
 src/khoj/routers/api.py             | 37 ++++++++++++++++-------------
 src/khoj/search_type/text_search.py |  2 +-
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 7217df9c..18487a59 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -24,6 +24,7 @@ from khoj.search_filter.word_filter import WordFilter
 from khoj.utils.helpers import log_telemetry, timer
 from khoj.utils.rawconfig import (
     FullConfig,
+    ProcessorConfig,
     SearchResponse,
     TextContentConfig,
     ConversationProcessorConfig,
@@ -101,7 +102,10 @@ async def set_content_config_data(content_type: str, updated_config: TextContent
 
 @api.post("/config/data/processor/conversation", status_code=200)
 async def set_processor_conversation_config_data(updated_config: ConversationProcessorConfig):
-    state.config.processor.conversation = updated_config
+    if state.config.processor is None:
+        state.config.processor = ProcessorConfig(conversation=updated_config)
+    else:
+        state.config.processor.conversation = updated_config
     try:
         save_config_to_file_updated_state()
         return {"status": "ok"}
@@ -139,6 +143,7 @@ def search(
         return state.query_cache[query_cache_key]
 
     # Encode query with filter terms removed
+    defiltered_query = user_query
     for filter in [DateFilter(), WordFilter(), FileFilter()]:
         defiltered_query = filter.defilter(user_query)
 
@@ -162,9 +167,9 @@ def search(
                     user_query,
                     state.model.org_search,
                     question_embedding=encoded_asymmetric_query,
-                    rank_results=r,
+                    rank_results=r or False,
                     score_threshold=score_threshold,
-                    dedupe=dedupe,
+                    dedupe=dedupe or True,
                 )
             ]
 
@@ -176,9 +181,9 @@ def search(
                     user_query,
                     state.model.markdown_search,
                     question_embedding=encoded_asymmetric_query,
-                    rank_results=r,
+                    rank_results=r or False,
                     score_threshold=score_threshold,
-                    dedupe=dedupe,
+                    dedupe=dedupe or True,
                 )
             ]
 
@@ -190,9 +195,9 @@ def search(
                     user_query,
                     state.model.pdf_search,
                     question_embedding=encoded_asymmetric_query,
-                    rank_results=r,
+                    rank_results=r or False,
                     score_threshold=score_threshold,
-                    dedupe=dedupe,
+                    dedupe=dedupe or True,
                 )
             ]
 
@@ -203,9 +208,9 @@ def search(
                     text_search.query,
                     user_query,
                     state.model.ledger_search,
-                    rank_results=r,
+                    rank_results=r or False,
                     score_threshold=score_threshold,
-                    dedupe=dedupe,
+                    dedupe=dedupe or True,
                 )
             ]
 
@@ -217,9 +222,9 @@ def search(
                     user_query,
                     state.model.music_search,
                     question_embedding=encoded_asymmetric_query,
-                    rank_results=r,
+                    rank_results=r or False,
                     score_threshold=score_threshold,
-                    dedupe=dedupe,
+                    dedupe=dedupe or True,
                 )
             ]
 
@@ -237,16 +242,16 @@ def search(
 
         if (t is None or t in SearchType) and state.model.plugin_search:
             # query specified plugin type
-            search_future[t] += [
+            search_futures[t] += [
                 executor.submit(
                     text_search.query,
                     user_query,
                     # Get plugin search model for specified search type, or the first one if none specified
                     state.model.plugin_search.get(t.value) or next(iter(state.model.plugin_search.values())),
                     question_embedding=encoded_asymmetric_query,
-                    rank_results=r,
+                    rank_results=r or False,
                     score_threshold=score_threshold,
-                    dedupe=dedupe,
+                    dedupe=dedupe or True,
                 )
             ]
 
@@ -262,12 +267,12 @@ def search(
                         image_names=state.model.image_search.image_names,
                         output_directory=output_directory,
                         image_files_url="/static/images",
-                        count=results_count,
+                        count=results_count or 5,
                     )
                 else:
                     hits, entries = search_future.result()
                     # Collate results
-                    results += text_search.collate_results(hits, entries, results_count)
+                    results += text_search.collate_results(hits, entries, results_count or 5)
 
             # Sort results across all content types
             results.sort(key=lambda x: float(x.score), reverse=True)
diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py
index 96ffac7a..c85857bb 100644
--- a/src/khoj/search_type/text_search.py
+++ b/src/khoj/search_type/text_search.py
@@ -105,7 +105,7 @@ def compute_embeddings(
 def query(
     raw_query: str,
     model: TextSearchModel,
-    question_embedding: torch.Tensor = None,
+    question_embedding: torch.Tensor | None = None,
     rank_results: bool = False,
     score_threshold: float = -math.inf,
     dedupe: bool = True,

From 5c7c8d1f465d62c330a7e2174f60c1cb609ecc55 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 20 Jun 2023 19:52:57 -0700
Subject: [PATCH 06/21] Use async/await to fix parallelization of search across
 content types

---
 src/khoj/routers/api.py             | 8 ++++----
 src/khoj/search_type/text_search.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 18487a59..785b08c0 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -114,7 +114,7 @@ async def set_processor_conversation_config_data(updated_config: ConversationPro
 
 
 @api.get("/search", response_model=List[SearchResponse])
-def search(
+async def search(
     q: str,
     n: Optional[int] = 5,
     t: Optional[SearchType] = None,
@@ -257,9 +257,9 @@ def search(
 
         # Query across each requested content types in parallel
         with timer("Query took", logger):
-            for search_future in search_futures[t]:
+            for search_future in concurrent.futures.as_completed(search_futures[t]):
                 if t == SearchType.Image:
-                    hits = search_futures.result()
+                    hits = await search_future.result()
                     output_directory = constants.web_directory / "images"
                     # Collate results
                     results += image_search.collate_results(
@@ -270,7 +270,7 @@ def search(
                         count=results_count or 5,
                     )
                 else:
-                    hits, entries = search_future.result()
+                    hits, entries = await search_future.result()
                     # Collate results
                     results += text_search.collate_results(hits, entries, results_count or 5)
 
diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py
index c85857bb..14e2015f 100644
--- a/src/khoj/search_type/text_search.py
+++ b/src/khoj/search_type/text_search.py
@@ -102,7 +102,7 @@ def compute_embeddings(
     return corpus_embeddings
 
 
-def query(
+async def query(
     raw_query: str,
     model: TextSearchModel,
     question_embedding: torch.Tensor | None = None,

From d5fb4196de3afefd04c72c68d2d7b121d5e88005 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 20 Jun 2023 20:19:15 -0700
Subject: [PATCH 07/21] Update web interface to allow querying all content
 types at once

---
 src/khoj/interface/web/index.html | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/khoj/interface/web/index.html b/src/khoj/interface/web/index.html
index 51412d75..78b24d56 100644
--- a/src/khoj/interface/web/index.html
+++ b/src/khoj/interface/web/index.html
@@ -94,9 +94,12 @@
                 setQueryFieldInUrl(query);
 
             // Generate Backend API URL to execute Search
-            url = type === "image"
-                ? `/api/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}&client=web`
-                : `/api/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}&r=${rerank}&client=web`;
+            if (type == 'all')
+                url = `/api/search?q=${encodeURIComponent(query)}&n=${results_count}&client=web`;
+            else if (type === "image")
+                url = `/api/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}&client=web`;
+            else
+                url = `/api/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}&r=${rerank}&client=web`;
 
             // Execute Search and Render Results
             fetch(url)
@@ -138,6 +141,7 @@
             fetch("/api/config/types")
                 .then(response => response.json())
                 .then(enabled_types => {
+                    enabled_types.push("all");
                     document.getElementById("type").innerHTML =
                     enabled_types
                     .map(type => `<option value="${type}">${type.slice(0,1).toUpperCase() + type.slice(1)}</option>`)

From 2cd3e799d3692dac9184bf682c72283afa0514ef Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 20 Jun 2023 22:22:43 -0700
Subject: [PATCH 08/21] Improve null and type checks

---
 src/khoj/configure.py               | 23 ++++++++++++++---------
 src/khoj/routers/api.py             | 22 +++++++++++-----------
 src/khoj/search_type/text_search.py |  2 +-
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/src/khoj/configure.py b/src/khoj/configure.py
index 3aa39f10..df031dfa 100644
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@@ -3,6 +3,7 @@ import sys
 import logging
 import json
 from enum import Enum
+from typing import Optional
 import requests
 
 # External Packages
@@ -78,16 +79,20 @@ def configure_search_types(config: FullConfig):
     core_search_types = {e.name: e.value for e in SearchType}
     # Extract configured plugin search types
     plugin_search_types = {}
-    if config.content_type.plugins:
+    if config.content_type and config.content_type.plugins:
         plugin_search_types = {plugin_type: plugin_type for plugin_type in config.content_type.plugins.keys()}
 
     # Dynamically generate search type enum by merging core search types with configured plugin search types
     return Enum("SearchType", merge_dicts(core_search_types, plugin_search_types))
 
 
-def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: state.SearchType = None):
+def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: Optional[state.SearchType] = None):
+    if config.content_type is None or config.search_type is None:
+        logger.error("🚨 Content Type or Search Type not configured.")
+        return
+
     # Initialize Org Notes Search
-    if (t == state.SearchType.Org or t == None) and config.content_type.org:
+    if (t == state.SearchType.Org or t == None) and config.content_type.org and config.search_type.asymmetric:
         logger.info("🦄 Setting up search for orgmode notes")
         # Extract Entries, Generate Notes Embeddings
         model.org_search = text_search.setup(
@@ -99,7 +104,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
         )
 
     # Initialize Org Music Search
-    if (t == state.SearchType.Music or t == None) and config.content_type.music:
+    if (t == state.SearchType.Music or t == None) and config.content_type.music and config.search_type.asymmetric:
         logger.info("🎺 Setting up search for org-music")
         # Extract Entries, Generate Music Embeddings
         model.music_search = text_search.setup(
@@ -111,7 +116,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
         )
 
     # Initialize Markdown Search
-    if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown:
+    if (t == state.SearchType.Markdown or t == None) and config.content_type.markdown and config.search_type.asymmetric:
         logger.info("💎 Setting up search for markdown notes")
         # Extract Entries, Generate Markdown Embeddings
         model.markdown_search = text_search.setup(
@@ -123,7 +128,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
         )
 
     # Initialize Ledger Search
-    if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger:
+    if (t == state.SearchType.Ledger or t == None) and config.content_type.ledger and config.search_type.symmetric:
         logger.info("💸 Setting up search for ledger")
         # Extract Entries, Generate Ledger Embeddings
         model.ledger_search = text_search.setup(
@@ -135,7 +140,7 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
         )
 
     # Initialize PDF Search
-    if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf:
+    if (t == state.SearchType.Pdf or t == None) and config.content_type.pdf and config.search_type.asymmetric:
         logger.info("🖨️ Setting up search for pdf")
         # Extract Entries, Generate PDF Embeddings
         model.pdf_search = text_search.setup(
@@ -147,14 +152,14 @@ def configure_search(model: SearchModels, config: FullConfig, regenerate: bool,
         )
 
     # Initialize Image Search
-    if (t == state.SearchType.Image or t == None) and config.content_type.image:
+    if (t == state.SearchType.Image or t == None) and config.content_type.image and config.search_type.image:
         logger.info("🌄 Setting up search for images")
         # Extract Entries, Generate Image Embeddings
         model.image_search = image_search.setup(
             config.content_type.image, search_config=config.search_type.image, regenerate=regenerate
         )
 
-    if (t == state.SearchType.Github or t == None) and config.content_type.github:
+    if (t == state.SearchType.Github or t == None) and config.content_type.github and config.search_type.asymmetric:
         logger.info("🐙 Setting up search for github")
         # Extract Entries, Generate Github Embeddings
         model.github_search = text_search.setup(
diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 785b08c0..fc8ff7ce 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -134,7 +134,7 @@ async def search(
     user_query = q.strip()
     results_count = n
     score_threshold = score_threshold if score_threshold is not None else -math.inf
-    search_futures = defaultdict(list)
+    search_futures: list[concurrent.futures.Future] = []
 
     # return cached results, if available
     query_cache_key = f"{user_query}-{n}-{t}-{r}-{score_threshold}-{dedupe}"
@@ -161,7 +161,7 @@ async def search(
     with concurrent.futures.ThreadPoolExecutor() as executor:
         if (t == SearchType.Org or t == None) and state.model.org_search:
             # query org-mode notes
-            search_futures[t] += [
+            search_futures += [
                 executor.submit(
                     text_search.query,
                     user_query,
@@ -175,7 +175,7 @@ async def search(
 
         if (t == SearchType.Markdown or t == None) and state.model.markdown_search:
             # query markdown notes
-            search_futures[t] += [
+            search_futures += [
                 executor.submit(
                     text_search.query,
                     user_query,
@@ -189,7 +189,7 @@ async def search(
 
         if (t == SearchType.Pdf or t == None) and state.model.pdf_search:
             # query pdf files
-            search_futures[t] += [
+            search_futures += [
                 executor.submit(
                     text_search.query,
                     user_query,
@@ -203,7 +203,7 @@ async def search(
 
         if (t == SearchType.Ledger) and state.model.ledger_search:
             # query transactions
-            search_futures[t] += [
+            search_futures += [
                 executor.submit(
                     text_search.query,
                     user_query,
@@ -216,7 +216,7 @@ async def search(
 
         if (t == SearchType.Music or t == None) and state.model.music_search:
             # query music library
-            search_futures[t] += [
+            search_futures += [
                 executor.submit(
                     text_search.query,
                     user_query,
@@ -230,7 +230,7 @@ async def search(
 
         if (t == SearchType.Image) and state.model.image_search:
             # query images
-            search_futures[t] += [
+            search_futures += [
                 executor.submit(
                     image_search.query,
                     user_query,
@@ -242,7 +242,7 @@ async def search(
 
         if (t is None or t in SearchType) and state.model.plugin_search:
             # query specified plugin type
-            search_futures[t] += [
+            search_futures += [
                 executor.submit(
                     text_search.query,
                     user_query,
@@ -257,7 +257,7 @@ async def search(
 
         # Query across each requested content types in parallel
         with timer("Query took", logger):
-            for search_future in concurrent.futures.as_completed(search_futures[t]):
+            for search_future in concurrent.futures.as_completed(search_futures):
                 if t == SearchType.Image:
                     hits = await search_future.result()
                     output_directory = constants.web_directory / "images"
@@ -288,7 +288,7 @@ async def search(
     state.previous_query = user_query
 
     end_time = time.time()
-    logger.debug(f"🔍 Search took: {end_time - start_time:.2f} seconds")
+    logger.debug(f"🔍 Search took: {end_time - start_time:.3f} seconds")
 
     return results
 
@@ -297,7 +297,7 @@ async def search(
 def update(t: Optional[SearchType] = None, force: Optional[bool] = False, client: Optional[str] = None):
     try:
         state.search_index_lock.acquire()
-        state.model = configure_search(state.model, state.config, regenerate=force, t=t)
+        state.model = configure_search(state.model, state.config, regenerate=force or False, t=t)
         state.search_index_lock.release()
     except ValueError as e:
         logger.error(e)
diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py
index 14e2015f..83f15918 100644
--- a/src/khoj/search_type/text_search.py
+++ b/src/khoj/search_type/text_search.py
@@ -181,7 +181,7 @@ def setup(
     previous_entries = (
         extract_entries(config.compressed_jsonl) if config.compressed_jsonl.exists() and not regenerate else None
     )
-    entries_with_indices = text_to_jsonl(config).process(previous_entries)
+    entries_with_indices = text_to_jsonl(config).process(previous_entries or [])
 
     # Extract Updated Entries
     entries = extract_entries(config.compressed_jsonl)

From 5c4eb950d53a723f124cac9ebf1a6f352a360645 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 20 Jun 2023 23:39:19 -0700
Subject: [PATCH 09/21] Search across all content types via khoj.el on Emacs

If no content-type selected in transient menu option, khoj.el queries
khoj server without content-type parameter (t) set.

This results in search across all enabled asymmetric search text
content types
---
 src/interface/emacs/khoj.el | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el
index a397d460..91cdff66 100644
--- a/src/interface/emacs/khoj.el
+++ b/src/interface/emacs/khoj.el
@@ -651,7 +651,9 @@ CONFIG is json obtained from Khoj config API."
 Use QUERY, CONTENT-TYPE and (optional) RERANK as query params"
   (let ((rerank (or rerank "false"))
         (encoded-query (url-hexify-string query)))
-    (format "%s/api/search?q=%s&t=%s&r=%s&n=%s&client=emacs" khoj-server-url encoded-query content-type rerank khoj-results-count)))
+    (if content-type
+      (format "%s/api/search?q=%s&r=%s&n=%s&client=emacs" khoj-server-url encoded-query rerank khoj-results-count)
+    (format "%s/api/search?q=%s&t=%s&r=%s&n=%s&client=emacs&t=%s" khoj-server-url content-type encoded-query rerank khoj-results-count))))
 
 (defun khoj--query-search-api-and-render-results (query-url content-type query buffer-name)
   "Query Khoj Search with QUERY-URL.

From 09f739b8cc2e319b6baf06e936c4e5eead426c31 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 27 Jun 2023 15:48:48 -0700
Subject: [PATCH 10/21] Null check config, log warning instead of error when
 configuring search

---
 src/khoj/configure.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/khoj/configure.py b/src/khoj/configure.py
index df031dfa..482e6b28 100644
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@@ -87,8 +87,8 @@ def configure_search_types(config: FullConfig):
 
 
 def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: Optional[state.SearchType] = None):
-    if config.content_type is None or config.search_type is None:
-        logger.error("🚨 Content Type or Search Type not configured.")
+    if config is None or config.content_type is None or config.search_type is None:
+        logger.warn("🚨 No Content or Search type is configured.")
         return
 
     # Initialize Org Notes Search

From 1b11d5723d474fb86cb05a1876a97dab6b4ea063 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 27 Jun 2023 15:50:41 -0700
Subject: [PATCH 11/21] Extract search request URL builder into js function in
 web interface

---
 src/khoj/interface/web/index.html | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/src/khoj/interface/web/index.html b/src/khoj/interface/web/index.html
index 78b24d56..906f3912 100644
--- a/src/khoj/interface/web/index.html
+++ b/src/khoj/interface/web/index.html
@@ -93,15 +93,8 @@
             if (rerank)
                 setQueryFieldInUrl(query);
 
-            // Generate Backend API URL to execute Search
-            if (type == 'all')
-                url = `/api/search?q=${encodeURIComponent(query)}&n=${results_count}&client=web`;
-            else if (type === "image")
-                url = `/api/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}&client=web`;
-            else
-                url = `/api/search?q=${encodeURIComponent(query)}&t=${type}&n=${results_count}&r=${rerank}&client=web`;
-
             // Execute Search and Render Results
+            url = createRequestUrl(query, type, results_count, rerank);
             fetch(url)
                 .then(response => response.json())
                 .then(data => {
@@ -157,6 +150,18 @@
                 });
         }
 
+        function createRequestUrl(query, results_count, type, rerank) {
+            // Generate Backend API URL to execute Search
+            let url = `/api/search?q=${encodeURIComponent(query)}&n=${results_count}&client=web`;
+            // If type is not 'all', append type to URL
+            if (type !== 'all')
+                url += `&t=${type}`;
+            // Rerank is only supported by text types
+            if (type !== "image")
+                url += `&r=${rerank}`;
+            return url;
+        }
+
         function setTypeFieldInUrl(type) {
             var url = new URL(window.location.href);
             url.searchParams.set("t", type.value);

From 510bb7e684a14cadcd8990932e15bc7a7f80690d Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 27 Jun 2023 15:59:50 -0700
Subject: [PATCH 12/21] Use typing union in text_search for python 3.8
 compatible type hinting

---
 src/khoj/search_type/text_search.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/khoj/search_type/text_search.py b/src/khoj/search_type/text_search.py
index 83f15918..0af5b0fc 100644
--- a/src/khoj/search_type/text_search.py
+++ b/src/khoj/search_type/text_search.py
@@ -2,7 +2,7 @@
 import logging
 import math
 from pathlib import Path
-from typing import List, Tuple, Type
+from typing import List, Tuple, Type, Union
 
 # External Packages
 import torch
@@ -105,7 +105,7 @@ def compute_embeddings(
 async def query(
     raw_query: str,
     model: TextSearchModel,
-    question_embedding: torch.Tensor | None = None,
+    question_embedding: Union[torch.Tensor, None] = None,
     rank_results: bool = False,
     score_threshold: float = -math.inf,
     dedupe: bool = True,

From 212b1a96c8dfef47af5d2727d0604cf406efc232 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 28 Jun 2023 11:34:26 -0700
Subject: [PATCH 13/21] Create "all" search type for search across all content
 types on khoj server

Allows moving logic to handle search across all content types to
server from clients
---
 src/khoj/routers/api.py  | 15 ++++++++-------
 src/khoj/utils/config.py |  1 +
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 4e305704..266eaed0 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -65,6 +65,7 @@ def get_config_types():
             and getattr(state.model, f"{search_type.value}_search") is not None
         )
         or ("plugins" in configured_content_types and search_type.name in configured_content_types["plugins"])
+        or search_type == SearchType.All
     ]
 
 
@@ -135,7 +136,7 @@ async def set_processor_conversation_config_data(updated_config: ConversationPro
 async def search(
     q: str,
     n: Optional[int] = 5,
-    t: Optional[SearchType] = None,
+    t: Optional[SearchType] = SearchType.All,
     r: Optional[bool] = False,
     score_threshold: Optional[Union[float, None]] = None,
     dedupe: Optional[bool] = True,
@@ -166,7 +167,7 @@ async def search(
         defiltered_query = filter.defilter(user_query)
 
     encoded_asymmetric_query = None
-    if t == None or (t != SearchType.Ledger and t != SearchType.Image):
+    if t == SearchType.All or (t != SearchType.Ledger and t != SearchType.Image):
         with timer("Encoding query took", logger=logger):
             encoded_asymmetric_query = util.normalize_embeddings(
                 state.model.org_search.bi_encoder.encode(
@@ -177,7 +178,7 @@ async def search(
             )
 
     with concurrent.futures.ThreadPoolExecutor() as executor:
-        if (t == SearchType.Org or t == None) and state.model.org_search:
+        if (t == SearchType.Org or t == SearchType.All) and state.model.org_search:
             # query org-mode notes
             search_futures += [
                 executor.submit(
@@ -191,7 +192,7 @@ async def search(
                 )
             ]
 
-        if (t == SearchType.Markdown or t == None) and state.model.markdown_search:
+        if (t == SearchType.Markdown or t == SearchType.All) and state.model.markdown_search:
             # query markdown notes
             search_futures += [
                 executor.submit(
@@ -205,7 +206,7 @@ async def search(
                 )
             ]
 
-        if (t == SearchType.Pdf or t == None) and state.model.pdf_search:
+        if (t == SearchType.Pdf or t == SearchType.All) and state.model.pdf_search:
             # query pdf files
             search_futures += [
                 executor.submit(
@@ -232,7 +233,7 @@ async def search(
                 )
             ]
 
-        if (t == SearchType.Music or t == None) and state.model.music_search:
+        if (t == SearchType.Music or t == SearchType.All) and state.model.music_search:
             # query music library
             search_futures += [
                 executor.submit(
@@ -258,7 +259,7 @@ async def search(
                 )
             ]
 
-        if (t is None or t in SearchType) and state.model.plugin_search:
+        if (t == SearchType.All or t in SearchType) and state.model.plugin_search:
             # query specified plugin type
             search_futures += [
                 executor.submit(
diff --git a/src/khoj/utils/config.py b/src/khoj/utils/config.py
index a83f7814..e3bea7b9 100644
--- a/src/khoj/utils/config.py
+++ b/src/khoj/utils/config.py
@@ -17,6 +17,7 @@ if TYPE_CHECKING:
 
 
 class SearchType(str, Enum):
+    All = "all"
     Org = "org"
     Ledger = "ledger"
     Music = "music"

From 1773a783398ea24ba10cda47de722da183171b98 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 28 Jun 2023 12:10:45 -0700
Subject: [PATCH 14/21] Fix createRequestUrl method signature to fetch results
 from khoj web

---
 src/khoj/interface/web/index.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/khoj/interface/web/index.html b/src/khoj/interface/web/index.html
index e24ad33c..2adfeda2 100644
--- a/src/khoj/interface/web/index.html
+++ b/src/khoj/interface/web/index.html
@@ -172,7 +172,7 @@
                 });
         }
 
-        function createRequestUrl(query, results_count, type, rerank) {
+        function createRequestUrl(query, type, results_count, rerank) {
             // Generate Backend API URL to execute Search
             let url = `/api/search?q=${encodeURIComponent(query)}&n=${results_count}&client=web`;
             // If type is not 'all', append type to URL

From 630bf995f1316b717eac4787fe3b97c56ce6907e Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 28 Jun 2023 12:12:38 -0700
Subject: [PATCH 15/21] Style each result based on its content type in same
 view on Khoj web

- So when searching across content types (with content-type = "all")
  org-mode results get rendered differently than markdown, PDF etc. results

- Set div class for each result separately instead of a single uber div
  for styling. This allows styling div of each result based on the
  content-type of that result

- No need to create placeholder "all" content type on web interface as
  server is passing an all content type by itself
---
 src/khoj/interface/web/index.html | 110 ++++++++++++++----------------
 1 file changed, 51 insertions(+), 59 deletions(-)

diff --git a/src/khoj/interface/web/index.html b/src/khoj/interface/web/index.html
index 2adfeda2..5dc6b0b1 100644
--- a/src/khoj/interface/web/index.html
+++ b/src/khoj/interface/web/index.html
@@ -14,11 +14,13 @@
     <script>
         function render_image(item) {
             return `
+            <div class="results-image">
             <a href="${item.entry}" class="image-link">
                 <img id=${item.score} src="${item.entry}?${Math.random()}"
                     title="Effective Score: ${item.score}, Meta: ${item.additional.metadata_score}, Image: ${item.additional.image_score}"
                     class="image">
-            </a>`
+            </a>
+            </div>`;
         }
 
         function render_org(query, data, classPrefix="") {
@@ -28,76 +30,69 @@
             var orgParser = new Org.Parser();
             var orgDocument = orgParser.parse(orgCode);
             var orgHTMLDocument = orgDocument.convert(Org.ConverterHTML, { htmlClassPrefix: classPrefix });
-            return orgHTMLDocument.toString();
+            return `<div class="results-org">` + orgHTMLDocument.toString() + `</div>`;
         }
 
         function render_markdown(query, data) {
             var md = window.markdownit();
-            return md.render(data.map(function (item) {
+            return data.map(function (item) {
                 if (item.additional.file.startsWith("http")) {
                     lines = item.entry.split("\n");
-                    return `${lines[0]}\t[*](${item.additional.file})\n${lines.slice(1).join("\n")}`;
+                    return md.render(`${lines[0]}\t[*](${item.additional.file})\n${lines.slice(1).join("\n")}`);
                 }
-                return `${item.entry}`;
-            }).join("\n"));
+                return `<div class="results-markdown">` + md.render(`${item.entry}`) + `</div>`;
+            }).join("\n");
         }
 
         function render_ledger(query, data) {
-            return `<div id="results-ledger">` + data.map(function (item) {
-                return `<p>${item.entry}</p>`
-            }).join("\n") + `</div>`;
+            return data.map(function (item) {
+                return `<div class="results-ledger">` + `<p>${item.entry}</p>` + `</div>`;
+            }).join("\n");
         }
 
         function render_pdf(query, data) {
-            return `<div id="results-pdf">` + data.map(function (item) {
+            return data.map(function (item) {
                 let compiled_lines = item.additional.compiled.split("\n");
                 let filename = compiled_lines.shift();
                 let text_match = compiled_lines.join("\n")
-                return `<h2>${filename}</h2>\n<p>${text_match}</p>`
-            }).join("\n") + `</div>`;
+                return `<div class="results-pdf">` + `<h2>${filename}</h2>\n<p>${text_match}</p>` + `</div>`;
+            }).join("\n");
         }
 
         function render_mutliple(query, data, type) {
-            let org_files = data.filter((item) => item.additional.file.endsWith(".org"));
-            let md_files = data.filter((item) => item.additional.file.endsWith(".md"));
-            let pdf_files = data.filter((item) => item.additional.file.endsWith(".pdf"));
-
             let html = "";
-            if (org_files.length > 0) {
-                html += render_org(query, org_files, type);
-            }
-
-            if (md_files.length > 0) {
-                html += render_markdown(query, md_files);
-            }
-
-            if (pdf_files.length > 0) {
-                html += render_pdf(query, pdf_files);
-            }
-
+            data.forEach(item => {
+               if (item.additional.file.endsWith(".org")) {
+                html += render_org(query, [item], "org-");
+               } else if (item.additional.file.endsWith(".md")) {
+                html += render_markdown(query, [item]);
+               } else if (item.additional.file.endsWith(".pdf")) {
+                html += render_pdf(query, [item]);
+               }
+            });
             return html;
         }
 
-        function render_json(data, query, type) {
+        function render_results(data, query, type) {
+            let results = "";
             if (type === "markdown") {
-                return render_markdown(query, data);
+                results = render_markdown(query, data);
             } else if (type === "org") {
-                return render_org(query, data);
+                results = render_org(query, data, "org-");
             } else if (type === "music") {
-                return render_org(query, data, "music-");
+                results = render_org(query, data, "music-");
             } else if (type === "image") {
-                return data.map(render_image).join('');
+                results = data.map(render_image).join('');
             } else if (type === "ledger") {
-                return render_ledger(query, data);
+                results = render_ledger(query, data);
             } else if (type === "pdf") {
-                return render_pdf(query, data);
-            } else if (type == "github") {
-                return render_mutliple(query, data, type);
+                results = render_pdf(query, data);
+            } else if (type === "github" || type === "all") {
+                results = render_mutliple(query, data, type);
             } else {
-                return `<div id="results-plugin">`
-                    + data.map((item) => `<p>${item.entry}</p>`).join("\n")
-                    + `</div>`;
+                results = data.map((item) => `<div class="results-plugin">` + `<p>${item.entry}</p>` + `</div>`).join("\n")
             }
+            return `<div id="results-${type}">${results}</div>`;
         }
 
         function search(rerank=false) {
@@ -121,10 +116,7 @@
                 .then(response => response.json())
                 .then(data => {
                     console.log(data);
-                    document.getElementById("results").innerHTML =
-                        `<div id=results-${type}>`
-                        + render_json(data, query, type)
-                        + `</div>`;
+                    document.getElementById("results").innerHTML = render_results(data, query, type);
                 });
         }
 
@@ -135,7 +127,7 @@
                 .then(data => {
                     console.log(data);
                     document.getElementById("results").innerHTML =
-                        render_json(data);
+                        render_results(data);
                 });
         }
 
@@ -156,7 +148,6 @@
             fetch("/api/config/types")
                 .then(response => response.json())
                 .then(enabled_types => {
-                    enabled_types.push("all");
                     document.getElementById("type").innerHTML =
                     enabled_types
                     .map(type => `<option value="${type}">${type.slice(0,1).toUpperCase() + type.slice(1)}</option>`)
@@ -313,7 +304,7 @@
             margin: 0px;
             line-height: 20px;
         }
-        #results-image {
+        .results-image {
             display: grid;
             grid-template-columns: repeat(3, 1fr);
         }
@@ -328,27 +319,28 @@
         #json {
             white-space: pre-wrap;
         }
-        #results-pdf,
-        #results-plugin,
-        #results-ledger {
+        .results-pdf,
+        .results-plugin,
+        .results-ledger {
             text-align: left;
             white-space: pre-line;
         }
-        #results-markdown, #results-github {
+        .results-markdown,
+        .results-github {
             text-align: left;
         }
-        #results-music,
-        #results-org {
+        .results-music,
+        .results-org {
             text-align: left;
             white-space: pre-line;
         }
-        #results-music h3,
-        #results-org h3 {
+        .results-music h3,
+        .results-org h3 {
             margin: 20px 0 0 0;
             font-size: larger;
         }
         span.music-task-status,
-        span.task-status {
+        span.org-task-status {
             color: white;
             padding: 3.5px 3.5px 0;
             margin-right: 5px;
@@ -357,15 +349,15 @@
             font-size: medium;
         }
         span.music-task-status.todo,
-        span.task-status.todo {
+        span.org-task-status.todo {
             background-color: #3b82f6
         }
         span.music-task-status.done,
-        span.task-status.done {
+        span.org-task-status.done {
             background-color: #22c55e;
         }
         span.music-task-tag,
-        span.task-tag {
+        span.org-task-tag {
             color: white;
             padding: 3.5px 3.5px 0;
             margin-right: 5px;

From 8eae7c898c67220c3054cc82b991cd63a4f974be Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 28 Jun 2023 12:52:28 -0700
Subject: [PATCH 16/21] Put each result under org heading when query for "all" 
 content type in khoj.el

- Add "all" as default content type when no content type retrieved
  from server
---
 src/interface/emacs/khoj.el | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/interface/emacs/khoj.el b/src/interface/emacs/khoj.el
index f1f08261..57f177cd 100644
--- a/src/interface/emacs/khoj.el
+++ b/src/interface/emacs/khoj.el
@@ -598,9 +598,19 @@ CONFIG is json obtained from Khoj config API."
   "Convert JSON-RESPONSE, QUERY from API to text entries."
   (thread-last json-response
                ;; extract and render entries from API response
-               (mapcar (lambda (args) (format "%s\n\n" (cdr (assoc 'entry args)))))
+               (mapcar (lambda (json-response-item)
+                         (thread-last
+                           ;; Extract pdf entry from each item in json response
+                           (cdr (assoc 'entry json-response-item))
+                           (format "%s\n\n")
+                           ;; Standardize results to 2nd level heading for consistent rendering
+                           (replace-regexp-in-string "^\*+" "")
+                           ;; Standardize results to 2nd level heading for consistent rendering
+                           (replace-regexp-in-string "^\#+" "")
+                           ;; Format entries as org entry string
+                           (format "** %s"))))
                ;; Set query as heading in rendered results buffer
-               (format "# Query: %s\n\n%s\n" query)
+               (format "* %s\n%s\n" query)
                ;; remove leading (, ) or SPC from extracted entries string
                (replace-regexp-in-string "^[\(\) ]" "")
                ;; remove trailing (, ) or SPC from extracted entries string
@@ -651,9 +661,7 @@ CONFIG is json obtained from Khoj config API."
 Use QUERY, CONTENT-TYPE and (optional) RERANK as query params"
   (let ((rerank (or rerank "false"))
         (encoded-query (url-hexify-string query)))
-    (if content-type
-      (format "%s/api/search?q=%s&r=%s&n=%s&client=emacs" khoj-server-url encoded-query rerank khoj-results-count)
-    (format "%s/api/search?q=%s&t=%s&r=%s&n=%s&client=emacs&t=%s" khoj-server-url content-type encoded-query rerank khoj-results-count))))
+    (format "%s/api/search?q=%s&t=%s&r=%s&n=%s&client=emacs" khoj-server-url encoded-query content-type rerank khoj-results-count)))
 
 (defun khoj--query-search-api-and-render-results (query-url content-type query buffer-name)
   "Query Khoj Search with QUERY-URL.
@@ -676,7 +684,8 @@ Render results in BUFFER-NAME using QUERY, CONTENT-TYPE."
              ((equal content-type "ledger") (khoj--extract-entries-as-ledger json-response query))
              ((equal content-type "image") (khoj--extract-entries-as-images json-response query))
              (t (khoj--extract-entries json-response query))))
-      (cond ((or (equal content-type "pdf")
+      (cond ((or (equal content-type "all")
+                 (equal content-type "pdf")
                  (equal content-type "org"))
              (progn (visual-line-mode)
                     (org-mode)
@@ -1005,7 +1014,7 @@ Paragraph only starts at first text after blank line."
   ;; set content type to: last used > based on current buffer > default type
   :init-value (lambda (obj) (oset obj value (format "--content-type=%s" (or khoj--content-type (khoj--buffer-name-to-content-type (buffer-name))))))
   ;; dynamically set choices to content types enabled on khoj backend
-  :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("org" "markdown" "pdf" "ledger" "music" "image")))
+  :choices (or (ignore-errors (mapcar #'symbol-name (khoj--get-enabled-content-types))) '("all" "org" "markdown" "pdf" "ledger" "music" "image")))
 
 (transient-define-suffix khoj--search-command (&optional args)
   (interactive (list (transient-args transient-current-command)))

From b1767f93d65052573892605a93fd5cd72e12d734 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 28 Jun 2023 19:53:20 -0700
Subject: [PATCH 17/21] Get any configured asymmetric search model to encode
 query for search

- Set image_search.query to async to use it with multi-threading
  This is same as text_search.query being set to an async method
- Exit search early if no search_model is defined in state.model
---
 src/khoj/routers/api.py              | 26 ++++++++++++++++++--------
 src/khoj/search_type/image_search.py |  2 +-
 2 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 266eaed0..822dd278 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -1,5 +1,4 @@
 # Standard Packages
-from collections import defaultdict
 import concurrent.futures
 import math
 import time
@@ -21,6 +20,7 @@ from khoj.search_type import image_search, text_search
 from khoj.search_filter.date_filter import DateFilter
 from khoj.search_filter.file_filter import FileFilter
 from khoj.search_filter.word_filter import WordFilter
+from khoj.utils.config import TextSearchModel
 from khoj.utils.helpers import log_telemetry, timer
 from khoj.utils.rawconfig import (
     ContentConfig,
@@ -144,10 +144,14 @@ async def search(
 ):
     start_time = time.time()
 
+    # Run validation checks
     results: List[SearchResponse] = []
     if q is None or q == "":
         logger.warn(f"No query param (q) passed in API call to initiate search")
         return results
+    if not state.model or not any(state.model.__dict__.values()):
+        logger.warn(f"No search models loaded. Configure a search model before initiating search")
+        return results
 
     # initialize variables
     user_query = q.strip()
@@ -168,14 +172,20 @@ async def search(
 
     encoded_asymmetric_query = None
     if t == SearchType.All or (t != SearchType.Ledger and t != SearchType.Image):
-        with timer("Encoding query took", logger=logger):
-            encoded_asymmetric_query = util.normalize_embeddings(
-                state.model.org_search.bi_encoder.encode(
-                    [defiltered_query],
-                    convert_to_tensor=True,
-                    device=state.device,
+        text_search_models: List[TextSearchModel] = [
+            model
+            for model_name, model in state.model.__dict__.items()
+            if isinstance(model, TextSearchModel) and model_name != "ledger_search"
+        ]
+        if text_search_models:
+            with timer("Encoding query took", logger=logger):
+                encoded_asymmetric_query = util.normalize_embeddings(
+                    text_search_models[0].bi_encoder.encode(
+                        [defiltered_query],
+                        convert_to_tensor=True,
+                        device=state.device,
+                    )
                 )
-            )
 
     with concurrent.futures.ThreadPoolExecutor() as executor:
         if (t == SearchType.Org or t == SearchType.All) and state.model.org_search:
diff --git a/src/khoj/search_type/image_search.py b/src/khoj/search_type/image_search.py
index 092353c7..d6cc33d6 100644
--- a/src/khoj/search_type/image_search.py
+++ b/src/khoj/search_type/image_search.py
@@ -143,7 +143,7 @@ def extract_metadata(image_name):
     return image_processed_metadata
 
 
-def query(raw_query, count, model: ImageSearchModel, score_threshold: float = -math.inf):
+async def query(raw_query, count, model: ImageSearchModel, score_threshold: float = -math.inf):
     # Set query to image content if query is of form file:/path/to/file.png
     if raw_query.startswith("file:") and pathlib.Path(raw_query[5:]).is_file():
         query_imagepath = resolve_absolute_path(pathlib.Path(raw_query[5:]), strict=True)

From f516d127c861b119655b838c86e6753356c3e377 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 28 Jun 2023 20:10:57 -0700
Subject: [PATCH 18/21] Update client tests to expect "all" as a valid new
 content type

---
 tests/test_client.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_client.py b/tests/test_client.py
index d74b4f2d..57ea08de 100644
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -34,7 +34,7 @@ def test_search_with_invalid_content_type(client):
 
 # ----------------------------------------------------------------------------------------------------
 def test_search_with_valid_content_type(client):
-    for content_type in ["org", "markdown", "ledger", "image", "music", "pdf", "plugin1"]:
+    for content_type in ["all", "org", "markdown", "ledger", "image", "music", "pdf", "plugin1"]:
         # Act
         response = client.get(f"/api/search?q=random&t={content_type}")
         # Assert
@@ -84,7 +84,7 @@ def test_get_configured_types_via_api(client):
 
     # Assert
     assert response.status_code == 200
-    assert response.json() == ["org", "image", "plugin1"]
+    assert response.json() == ["all", "org", "image", "plugin1"]
 
 
 # ----------------------------------------------------------------------------------------------------
@@ -102,7 +102,7 @@ def test_get_configured_types_with_only_plugin_content_config(content_config):
 
     # Assert
     assert response.status_code == 200
-    assert response.json() == ["plugin1"]
+    assert response.json() == ["all", "plugin1"]
 
 
 # ----------------------------------------------------------------------------------------------------
@@ -137,7 +137,7 @@ def test_get_configured_types_with_no_content_config():
 
     # Assert
     assert response.status_code == 200
-    assert response.json() == []
+    assert response.json() == ["all"]
 
 
 # ----------------------------------------------------------------------------------------------------

From 56ce97ef9e2fdf417015bb5129a3462db359d97d Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 28 Jun 2023 20:11:26 -0700
Subject: [PATCH 19/21] Use async/await in tests for query method of text and
 image search

The text, image search query method has become async. So async/await
is required to get results correctly in tests etc
---
 src/khoj/routers/api.py    |  2 +-
 tests/test_image_search.py | 18 ++++++++++++------
 tests/test_text_search.py  |  5 +++--
 3 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 822dd278..a9397127 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -157,7 +157,7 @@ async def search(
     user_query = q.strip()
     results_count = n
     score_threshold = score_threshold if score_threshold is not None else -math.inf
-    search_futures: list[concurrent.futures.Future] = []
+    search_futures: List[concurrent.futures.Future] = []
 
     # return cached results, if available
     query_cache_key = f"{user_query}-{n}-{t}-{r}-{score_threshold}-{dedupe}"
diff --git a/tests/test_image_search.py b/tests/test_image_search.py
index c29e93a1..e4f08d35 100644
--- a/tests/test_image_search.py
+++ b/tests/test_image_search.py
@@ -3,6 +3,9 @@ import logging
 from pathlib import Path
 from PIL import Image
 
+# External Packages
+import pytest
+
 # Internal Packages
 from khoj.utils.state import model
 from khoj.utils.constants import web_directory
@@ -48,7 +51,8 @@ def test_image_metadata(content_config: ContentConfig):
 
 
 # ----------------------------------------------------------------------------------------------------
-def test_image_search(content_config: ContentConfig, search_config: SearchConfig):
+@pytest.mark.anyio
+async def test_image_search(content_config: ContentConfig, search_config: SearchConfig):
     # Arrange
     output_directory = resolve_absolute_path(web_directory)
     model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
@@ -60,7 +64,7 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
 
     # Act
     for query, expected_image_name in query_expected_image_pairs:
-        hits = image_search.query(query, count=1, model=model.image_search)
+        hits = await image_search.query(query, count=1, model=model.image_search)
 
         results = image_search.collate_results(
             hits,
@@ -83,7 +87,8 @@ def test_image_search(content_config: ContentConfig, search_config: SearchConfig
 
 
 # ----------------------------------------------------------------------------------------------------
-def test_image_search_query_truncated(content_config: ContentConfig, search_config: SearchConfig, caplog):
+@pytest.mark.anyio
+async def test_image_search_query_truncated(content_config: ContentConfig, search_config: SearchConfig, caplog):
     # Arrange
     model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
     max_words_supported = 10
@@ -93,7 +98,7 @@ def test_image_search_query_truncated(content_config: ContentConfig, search_conf
     # Act
     try:
         with caplog.at_level(logging.INFO, logger="khoj.search_type.image_search"):
-            image_search.query(query, count=1, model=model.image_search)
+            await image_search.query(query, count=1, model=model.image_search)
     # Assert
     except RuntimeError as e:
         if "The size of tensor a (102) must match the size of tensor b (77)" in str(e):
@@ -102,7 +107,8 @@ def test_image_search_query_truncated(content_config: ContentConfig, search_conf
 
 
 # ----------------------------------------------------------------------------------------------------
-def test_image_search_by_filepath(content_config: ContentConfig, search_config: SearchConfig, caplog):
+@pytest.mark.anyio
+async def test_image_search_by_filepath(content_config: ContentConfig, search_config: SearchConfig, caplog):
     # Arrange
     output_directory = resolve_absolute_path(web_directory)
     model.image_search = image_search.setup(content_config.image, search_config.image, regenerate=False)
@@ -113,7 +119,7 @@ def test_image_search_by_filepath(content_config: ContentConfig, search_config:
 
     # Act
     with caplog.at_level(logging.INFO, logger="khoj.search_type.image_search"):
-        hits = image_search.query(query, count=1, model=model.image_search)
+        hits = await image_search.query(query, count=1, model=model.image_search)
 
         results = image_search.collate_results(
             hits,
diff --git a/tests/test_text_search.py b/tests/test_text_search.py
index 6634a671..69f58645 100644
--- a/tests/test_text_search.py
+++ b/tests/test_text_search.py
@@ -72,13 +72,14 @@ def test_text_content_index_only_updates_on_changes(content_config: ContentConfi
 
 
 # ----------------------------------------------------------------------------------------------------
-def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig):
+@pytest.mark.anyio
+async def test_asymmetric_search(content_config: ContentConfig, search_config: SearchConfig):
     # Arrange
     model.notes_search = text_search.setup(OrgToJsonl, content_config.org, search_config.asymmetric, regenerate=True)
     query = "How to git install application?"
 
     # Act
-    hits, entries = text_search.query(query, model=model.notes_search, rank_results=True)
+    hits, entries = await text_search.query(query, model=model.notes_search, rank_results=True)
 
     results = text_search.collate_results(hits, entries, count=1)
 

From 5f7eaa7ded840faf463e12d0a912068140673dd1 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 28 Jun 2023 22:04:34 -0700
Subject: [PATCH 20/21] Add trio, move freezegun, factory-boy to project test
 dependencies

---
 pyproject.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c374934a..bc5b04a2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,15 +73,15 @@ khoj = "khoj.main:run"
 [project.optional-dependencies]
 test = [
     "pytest >= 7.1.2",
+    "freezegun >= 1.2.0",
+    "factory-boy >= 3.2.1",
+    "trio >= 0.22.0",
 ]
 dev = [
     "khoj-assistant[test]",
     "mypy >= 1.0.1",
     "black >= 23.1.0",
     "pre-commit >= 3.0.4",
-    "freezegun >= 1.2.0",
-    "factory-boy==3.2.1",
-    "Faker==18.10.1",
 ]
 
 [tool.hatch.version]

From 5f2717cc4b17dcb252ea7a39f5b50ad5290e96d3 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 28 Jun 2023 22:07:52 -0700
Subject: [PATCH 21/21] Use logger.warning since logger.warn is deprecated

---
 src/khoj/configure.py                            | 4 ++--
 src/khoj/processor/conversation/gpt.py           | 2 +-
 src/khoj/processor/markdown/markdown_to_jsonl.py | 2 +-
 src/khoj/processor/org_mode/org_to_jsonl.py      | 2 +-
 src/khoj/processor/pdf/pdf_to_jsonl.py           | 4 +++-
 src/khoj/routers/api.py                          | 4 ++--
 6 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/khoj/configure.py b/src/khoj/configure.py
index da46c78c..050cf069 100644
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@@ -37,7 +37,7 @@ def configure_server(args, required=False):
             logger.error(f"Exiting as Khoj is not configured.\nConfigure it via GUI or by editing {state.config_file}.")
             sys.exit(1)
         else:
-            logger.warn(
+            logger.warning(
                 f"Khoj is not configured.\nConfigure it via khoj GUI, plugins or by editing {state.config_file}."
             )
             return
@@ -88,7 +88,7 @@ def configure_search_types(config: FullConfig):
 
 def configure_search(model: SearchModels, config: FullConfig, regenerate: bool, t: Optional[state.SearchType] = None):
     if config is None or config.content_type is None or config.search_type is None:
-        logger.warn("🚨 No Content or Search type is configured.")
+        logger.warning("🚨 No Content or Search type is configured.")
         return
 
     # Initialize Org Notes Search
diff --git a/src/khoj/processor/conversation/gpt.py b/src/khoj/processor/conversation/gpt.py
index 2c0a4cee..2f29b2ef 100644
--- a/src/khoj/processor/conversation/gpt.py
+++ b/src/khoj/processor/conversation/gpt.py
@@ -113,7 +113,7 @@ def extract_questions(text, model="text-davinci-003", conversation_log={}, api_k
             .replace("', '", '", "')
         )
     except json.decoder.JSONDecodeError:
-        logger.warn(f"GPT returned invalid JSON. Falling back to using user message as search query.\n{response}")
+        logger.warning(f"GPT returned invalid JSON. Falling back to using user message as search query.\n{response}")
         questions = [text]
     logger.debug(f"Extracted Questions by GPT: {questions}")
     return questions
diff --git a/src/khoj/processor/markdown/markdown_to_jsonl.py b/src/khoj/processor/markdown/markdown_to_jsonl.py
index 6c2beb45..ae0c20df 100644
--- a/src/khoj/processor/markdown/markdown_to_jsonl.py
+++ b/src/khoj/processor/markdown/markdown_to_jsonl.py
@@ -92,7 +92,7 @@ class MarkdownToJsonl(TextToJsonl):
         }
 
         if any(files_with_non_markdown_extensions):
-            logger.warn(
+            logger.warning(
                 f"[Warning] There maybe non markdown-mode files in the input set: {files_with_non_markdown_extensions}"
             )
 
diff --git a/src/khoj/processor/org_mode/org_to_jsonl.py b/src/khoj/processor/org_mode/org_to_jsonl.py
index 5f29ddc9..664427d9 100644
--- a/src/khoj/processor/org_mode/org_to_jsonl.py
+++ b/src/khoj/processor/org_mode/org_to_jsonl.py
@@ -88,7 +88,7 @@ class OrgToJsonl(TextToJsonl):
 
         files_with_non_org_extensions = {org_file for org_file in all_org_files if not org_file.endswith(".org")}
         if any(files_with_non_org_extensions):
-            logger.warn(f"There maybe non org-mode files in the input set: {files_with_non_org_extensions}")
+            logger.warning(f"There maybe non org-mode files in the input set: {files_with_non_org_extensions}")
 
         logger.debug(f"Processing files: {all_org_files}")
 
diff --git a/src/khoj/processor/pdf/pdf_to_jsonl.py b/src/khoj/processor/pdf/pdf_to_jsonl.py
index d8092cc8..3c90fdc1 100644
--- a/src/khoj/processor/pdf/pdf_to_jsonl.py
+++ b/src/khoj/processor/pdf/pdf_to_jsonl.py
@@ -83,7 +83,9 @@ class PdfToJsonl(TextToJsonl):
         files_with_non_pdf_extensions = {pdf_file for pdf_file in all_pdf_files if not pdf_file.endswith(".pdf")}
 
         if any(files_with_non_pdf_extensions):
-            logger.warn(f"[Warning] There maybe non pdf-mode files in the input set: {files_with_non_pdf_extensions}")
+            logger.warning(
+                f"[Warning] There maybe non pdf-mode files in the input set: {files_with_non_pdf_extensions}"
+            )
 
         logger.debug(f"Processing files: {all_pdf_files}")
 
diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index a9397127..9069418b 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -147,10 +147,10 @@ async def search(
     # Run validation checks
     results: List[SearchResponse] = []
     if q is None or q == "":
-        logger.warn(f"No query param (q) passed in API call to initiate search")
+        logger.warning(f"No query param (q) passed in API call to initiate search")
         return results
     if not state.model or not any(state.model.__dict__.values()):
-        logger.warn(f"No search models loaded. Configure a search model before initiating search")
+        logger.warning(f"No search models loaded. Configure a search model before initiating search")
         return results
 
     # initialize variables