From 7ac241b766b22e95da8087d17573dab8d5863431 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 22 Oct 2024 00:34:49 -0700
Subject: [PATCH 01/10] Improve format of notes, online context passed to chat
 models in prompt

Improve separation of note snippets and show its origin file in notes
prompt to have more readable, contextualized text shared with model.

Previously the references dict was being directly passed as a string.
The documents don't look well formatted and are less intelligible.

- Passing file path along with notes snippets will help contextualize
  the notes better.
- Better formatting should help with making notes more readable by the
  chat model.
---
 src/khoj/processor/conversation/anthropic/anthropic_chat.py | 3 +--
 src/khoj/processor/conversation/google/gemini_chat.py       | 3 +--
 src/khoj/processor/conversation/openai/gpt.py               | 3 +--
 src/khoj/processor/conversation/prompts.py                  | 4 ++++
 src/khoj/processor/conversation/utils.py                    | 6 +++++-
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
index cb51abb4..5fb900c9 100644
--- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py
+++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
@@ -142,9 +142,8 @@ def converse_anthropic(
     """
     # Initialize Variables
     current_date = datetime.now()
-    compiled_references = "\n\n".join({f"# {item}" for item in references})
-
     conversation_primer = prompts.query_prompt.format(query=user_query)
+    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     if agent and agent.personality:
         system_prompt = prompts.custom_personality.format(
diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py
index 7359b3eb..f7cfad31 100644
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@@ -139,9 +139,8 @@ def converse_gemini(
     """
     # Initialize Variables
     current_date = datetime.now()
-    compiled_references = "\n\n".join({f"# {item}" for item in references})
-
     conversation_primer = prompts.query_prompt.format(query=user_query)
+    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     if agent and agent.personality:
         system_prompt = prompts.custom_personality.format(
diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py
index ad02b10e..293bdacd 100644
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@@ -143,9 +143,8 @@ def converse(
     """
     # Initialize Variables
     current_date = datetime.now()
-    compiled_references = "\n\n".join({f"# {item['compiled']}" for item in references})
-
     conversation_primer = prompts.query_prompt.format(query=user_query)
+    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     if agent and agent.personality:
         system_prompt = prompts.custom_personality.format(
diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py
index ad164c8d..fb6a105b 100644
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@@ -118,6 +118,7 @@ Use my personal notes and our past conversations to inform your response.
 Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the provided notes or past conversations.
 
 User's Notes:
+-----
 {references}
 """.strip()
 )
@@ -127,6 +128,7 @@ notes_conversation_offline = PromptTemplate.from_template(
 Use my personal notes and our past conversations to inform your response.
 
 User's Notes:
+-----
 {references}
 """.strip()
 )
@@ -184,6 +186,7 @@ Use this up-to-date information from the internet to inform your response.
 Ask crisp follow-up questions to get additional context, when a helpful response cannot be provided from the online data or past conversations.
 
 Information from the internet:
+-----
 {online_results}
 """.strip()
 )
@@ -193,6 +196,7 @@ online_search_conversation_offline = PromptTemplate.from_template(
 Use this up-to-date information from the internet to inform your response.
 
 Information from the internet:
+-----
 {online_results}
 """.strip()
 )
diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index e841c484..56e9e9db 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -178,7 +178,11 @@ def generate_chatml_messages_with_context(
     # Extract Chat History for Context
     chatml_messages: List[ChatMessage] = []
     for chat in conversation_log.get("chat", []):
-        message_notes = f'\n\n Notes:\n{chat.get("context")}' if chat.get("context") else "\n"
+        references = "\n\n".join(
+            {f"# File: {item['file']}\n## {item['compiled']}\n" for item in chat.get("context") or []}
+        )
+        message_notes = f"\n\n Notes:\n{references}" if chat.get("context") else "\n"
+
         role = "user" if chat["by"] == "you" else "assistant"
 
         message_content = chat["message"] + message_notes

From 0c52a1169a3c9691f8aeac0fd73e1592975a7df4 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 22 Oct 2024 01:06:00 -0700
Subject: [PATCH 02/10] Put context into separate user message before sending
 to chat model

The document, online search context are now passed as separate user
messages to chat model, instead of being added to the final user message.

This will improve

- Models ability to differentiate data from user query.
  That should improve response quality and reduce prompt injection
  probability

- Make truncation logic simpler and more robust
  When context window hit, can simply pop messages to auto truncate
  context in order of context, user, assistant message for each
  conversation turn in history until reach current user query

  The complex, brittle logic to extract user query from context in
  last user message isn't required.

Marking the context message with assistant role doesn't translate well
across chat models. E.g
- Gemini can't handle consecutive messages by role = model well
- Claude will merge consecutive messages by same role. In current
  message ordering the context message will result get merged into the
  previous assistant response. And if move context message after user
  query. The truncation logic will have to hop and skip while doing
  deletions
- GPT seems to handle consecutive roles of any type fine

Using context role = user generalizes better across chat models for
now and aligns with previous behavior.
---
 .../conversation/anthropic/anthropic_chat.py  | 13 ++++-----
 .../conversation/google/gemini_chat.py        | 13 ++++-----
 src/khoj/processor/conversation/openai/gpt.py | 13 ++++-----
 src/khoj/processor/conversation/utils.py      | 29 ++++++++++++-------
 4 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
index 5fb900c9..826f0fa4 100644
--- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py
+++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
@@ -142,7 +142,6 @@ def converse_anthropic(
     """
     # Initialize Variables
     current_date = datetime.now()
-    conversation_primer = prompts.query_prompt.format(query=user_query)
     compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     if agent and agent.personality:
@@ -174,16 +173,16 @@ def converse_anthropic(
         completion_func(chat_response=prompts.no_online_results_found.format())
         return iter([prompts.no_online_results_found.format()])
 
-    if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
-        conversation_primer = (
-            f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}"
-        )
+    context_message = ""
     if not is_none_or_empty(compiled_references):
-        conversation_primer = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n{conversation_primer}"
+        context_message = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n"
+    if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
+        context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
 
     # Setup Prompt with Primer or Conversation History
     messages = generate_chatml_messages_with_context(
-        conversation_primer,
+        user_query,
+        context_message=context_message,
         conversation_log=conversation_log,
         model_name=model,
         max_prompt_size=max_prompt_size,
diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py
index f7cfad31..4221aeb3 100644
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@@ -139,7 +139,6 @@ def converse_gemini(
     """
     # Initialize Variables
     current_date = datetime.now()
-    conversation_primer = prompts.query_prompt.format(query=user_query)
     compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     if agent and agent.personality:
@@ -172,16 +171,16 @@ def converse_gemini(
         completion_func(chat_response=prompts.no_online_results_found.format())
         return iter([prompts.no_online_results_found.format()])
 
-    if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
-        conversation_primer = (
-            f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}"
-        )
+    context_message = ""
     if not is_none_or_empty(compiled_references):
-        conversation_primer = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n{conversation_primer}"
+        context_message = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n"
+    if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
+        context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
 
     # Setup Prompt with Primer or Conversation History
     messages = generate_chatml_messages_with_context(
-        conversation_primer,
+        user_query,
+        context_message=context_message,
         conversation_log=conversation_log,
         model_name=model,
         max_prompt_size=max_prompt_size,
diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py
index 293bdacd..2f5045c2 100644
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@@ -143,7 +143,6 @@ def converse(
     """
     # Initialize Variables
     current_date = datetime.now()
-    conversation_primer = prompts.query_prompt.format(query=user_query)
     compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     if agent and agent.personality:
@@ -175,18 +174,18 @@ def converse(
         completion_func(chat_response=prompts.no_online_results_found.format())
         return iter([prompts.no_online_results_found.format()])
 
-    if not is_none_or_empty(online_results):
-        conversation_primer = (
-            f"{prompts.online_search_conversation.format(online_results=str(online_results))}\n{conversation_primer}"
-        )
+    context_message = ""
     if not is_none_or_empty(compiled_references):
-        conversation_primer = f"{prompts.notes_conversation.format(query=user_query, references=compiled_references)}\n\n{conversation_primer}"
+        context_message = f"{prompts.notes_conversation.format(references=compiled_references)}\n\n"
+    if not is_none_or_empty(online_results):
+        context_message += f"{prompts.online_search_conversation.format(online_results=str(online_results))}"
 
     # Setup Prompt with Primer or Conversation History
     messages = generate_chatml_messages_with_context(
-        conversation_primer,
+        user_query,
         system_prompt,
         conversation_log,
+        context_message=context_message,
         model_name=model,
         max_prompt_size=max_prompt_size,
         tokenizer_name=tokenizer_name,
diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index 56e9e9db..75f17963 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -12,6 +12,7 @@ from transformers import AutoTokenizer
 
 from khoj.database.adapters import ConversationAdapters
 from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser
+from khoj.processor.conversation import prompts
 from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
 from khoj.utils import state
 from khoj.utils.helpers import is_none_or_empty, merge_dicts
@@ -163,6 +164,7 @@ def generate_chatml_messages_with_context(
     uploaded_image_url=None,
     vision_enabled=False,
     model_type="",
+    context_message="",
 ):
     """Generate messages for ChatGPT with context from previous conversation"""
     # Set max prompt size from user config or based on pre-configured for model and machine specs
@@ -178,24 +180,22 @@ def generate_chatml_messages_with_context(
     # Extract Chat History for Context
     chatml_messages: List[ChatMessage] = []
     for chat in conversation_log.get("chat", []):
-        references = "\n\n".join(
-            {f"# File: {item['file']}\n## {item['compiled']}\n" for item in chat.get("context") or []}
-        )
-        message_notes = f"\n\n Notes:\n{references}" if chat.get("context") else "\n"
+        if not is_none_or_empty(chat.get("context")):
+            references = "\n\n".join(
+                {f"# File: {item['file']}\n## {item['compiled']}\n" for item in chat.get("context") or []}
+            )
+            message_context = f"{prompts.notes_conversation.format(references=references)}\n\n"
+            reconstructed_context_message = ChatMessage(content=message_context, role="context")
+            chatml_messages.insert(0, reconstructed_context_message)
 
         role = "user" if chat["by"] == "you" else "assistant"
-
-        message_content = chat["message"] + message_notes
-
         message_content = construct_structured_message(
-            message_content, chat.get("uploadedImageData"), model_type, vision_enabled
+            chat["message"], chat.get("uploadedImageData"), model_type, vision_enabled
         )
-
         reconstructed_message = ChatMessage(content=message_content, role=role)
-
         chatml_messages.insert(0, reconstructed_message)
 
-        if len(chatml_messages) >= 2 * lookback_turns:
+        if len(chatml_messages) >= 3 * lookback_turns:
             break
 
     messages = []
@@ -206,6 +206,8 @@ def generate_chatml_messages_with_context(
                 role="user",
             )
         )
+    if not is_none_or_empty(context_message):
+        messages.append(ChatMessage(content=context_message, role="context"))
     if len(chatml_messages) > 0:
         messages += chatml_messages
     if not is_none_or_empty(system_message):
@@ -214,6 +216,11 @@ def generate_chatml_messages_with_context(
     # Truncate oldest messages from conversation history until under max supported prompt size by model
     messages = truncate_messages(messages, max_prompt_size, model_name, loaded_model, tokenizer_name)
 
+    # Reset context message role to assistant
+    for message in messages:
+        if message.role == "context":
+            message.role = "user"
+
     # Return message in chronological order
     return messages[::-1]
 

From 0847fb010247ef44dbfb6c7dcf81430c98cc5381 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 22 Oct 2024 02:32:34 -0700
Subject: [PATCH 03/10] Pass online context from chat history to chat model for
 response

Previously only notes context from chat history was included.
This change includes online context from chat history for model to use
for response generation.

This can reduce need for online lookups by reusing previous online
context for faster responses. But will increase overall response time
when not reusing past online context, as faster context buildup per
conversation.

Unsure if inclusion of context is preferrable. If not, both notes and
online context should be removed.
---
 src/khoj/processor/conversation/utils.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index 75f17963..3aee61c9 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -180,11 +180,15 @@ def generate_chatml_messages_with_context(
     # Extract Chat History for Context
     chatml_messages: List[ChatMessage] = []
     for chat in conversation_log.get("chat", []):
+        message_context = ""
         if not is_none_or_empty(chat.get("context")):
             references = "\n\n".join(
                 {f"# File: {item['file']}\n## {item['compiled']}\n" for item in chat.get("context") or []}
             )
             message_context = f"{prompts.notes_conversation.format(references=references)}\n\n"
+        if not is_none_or_empty(chat.get("onlineContext")):
+            message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}"
+        if not is_none_or_empty(chat.get("context")) or not is_none_or_empty(chat.get("onlineContext")):
             reconstructed_context_message = ChatMessage(content=message_context, role="context")
             chatml_messages.insert(0, reconstructed_context_message)
 

From 39a613d3bcef85522f99c8625203a1feb40bb062 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Tue, 22 Oct 2024 02:58:34 -0700
Subject: [PATCH 04/10] Fix up openai chat actor tests

---
 tests/test_openai_chat_actors.py   | 23 ++++++++++++++---------
 tests/test_openai_chat_director.py | 16 ++++++++--------
 2 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/tests/test_openai_chat_actors.py b/tests/test_openai_chat_actors.py
index fc253b50..b2ae2d34 100644
--- a/tests/test_openai_chat_actors.py
+++ b/tests/test_openai_chat_actors.py
@@ -214,7 +214,7 @@ def test_answer_from_chat_history_and_previously_retrieved_content():
         (
             "When was I born?",
             "You were born on 1st April 1984.",
-            ["Testatron was born on 1st April 1984 in Testville."],
+            [{"compiled": "Testatron was born on 1st April 1984 in Testville.", "file": "birth.org"}],
         ),
     ]
 
@@ -415,15 +415,18 @@ def test_ask_for_clarification_if_not_enough_context_in_question():
     context = [
         {
             "compiled": f"""# Ramya
-My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani."""
+My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""",
+            "file": "Family.md",
         },
         {
             "compiled": f"""# Fang
-My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li."""
+My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""",
+            "file": "Family.md",
         },
         {
             "compiled": f"""# Aiyla
-My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet."""
+My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""",
+            "file": "Family.md",
         },
     ]
 
@@ -608,9 +611,11 @@ async def test_infer_webpage_urls_actor_extracts_correct_links(chat_client, defa
         ),
     ],
 )
-async def test_infer_task_scheduling_request(chat_client, user_query, expected_crontime, expected_qs, unexpected_qs):
+async def test_infer_task_scheduling_request(
+    chat_client, user_query, expected_crontime, expected_qs, unexpected_qs, default_user2
+):
     # Act
-    crontime, inferred_query, _ = await schedule_query(user_query, {})
+    crontime, inferred_query, _ = await schedule_query(user_query, {}, default_user2)
     inferred_query = inferred_query.lower()
 
     # Assert
@@ -630,7 +635,7 @@ async def test_infer_task_scheduling_request(chat_client, user_query, expected_c
     "scheduling_query, executing_query, generated_response, expected_should_notify",
     [
         (
-            "Notify me if it is going to rain tomorrow?",
+            "Notify me only if it is going to rain tomorrow?",
             "What's the weather forecast for tomorrow?",
             "It is sunny and warm tomorrow.",
             False,
@@ -656,10 +661,10 @@ async def test_infer_task_scheduling_request(chat_client, user_query, expected_c
     ],
 )
 def test_decision_on_when_to_notify_scheduled_task_results(
-    chat_client, scheduling_query, executing_query, generated_response, expected_should_notify
+    chat_client, default_user2, scheduling_query, executing_query, generated_response, expected_should_notify
 ):
     # Act
-    generated_should_notify = should_notify(scheduling_query, executing_query, generated_response)
+    generated_should_notify = should_notify(scheduling_query, executing_query, generated_response, default_user2)
 
     # Assert
     assert generated_should_notify == expected_should_notify
diff --git a/tests/test_openai_chat_director.py b/tests/test_openai_chat_director.py
index 279d6e37..7d460408 100644
--- a/tests/test_openai_chat_director.py
+++ b/tests/test_openai_chat_director.py
@@ -307,7 +307,7 @@ def test_summarize_one_file(chat_client, default_user2: KhojUser):
         json={"filename": summarization_file, "conversation_id": str(conversation.id)},
     )
     query = "/summarize"
-    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id})
+    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
     response_message = response.json()["response"]
     # Assert
     assert response_message != ""
@@ -339,7 +339,7 @@ def test_summarize_extra_text(chat_client, default_user2: KhojUser):
         json={"filename": summarization_file, "conversation_id": str(conversation.id)},
     )
     query = "/summarize tell me about Xiu"
-    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id})
+    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
     response_message = response.json()["response"]
     # Assert
     assert response_message != ""
@@ -367,7 +367,7 @@ def test_summarize_multiple_files(chat_client, default_user2: KhojUser):
     )
 
     query = "/summarize"
-    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id})
+    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
     response_message = response.json()["response"]
 
     # Assert
@@ -383,7 +383,7 @@ def test_summarize_no_files(chat_client, default_user2: KhojUser):
 
     # Act
     query = "/summarize"
-    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id})
+    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
     response_message = response.json()["response"]
 
     # Assert
@@ -418,11 +418,11 @@ def test_summarize_different_conversation(chat_client, default_user2: KhojUser):
 
     # Act
     query = "/summarize"
-    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation2.id})
+    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation2.id)})
     response_message_conv2 = response.json()["response"]
 
     # now make sure that the file filter is still in conversation 1
-    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation1.id})
+    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation1.id)})
     response_message_conv1 = response.json()["response"]
 
     # Assert
@@ -449,7 +449,7 @@ def test_summarize_nonexistant_file(chat_client, default_user2: KhojUser):
         json={"filename": "imaginary.markdown", "conversation_id": str(conversation.id)},
     )
     query = urllib.parse.quote("/summarize")
-    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id})
+    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
     response_message = response.json()["response"]
     # Assert
     assert response_message == "No files selected for summarization. Please add files using the section on the left."
@@ -481,7 +481,7 @@ def test_summarize_diff_user_file(chat_client, default_user: KhojUser, pdf_confi
 
     # Act
     query = "/summarize"
-    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": conversation.id})
+    response = chat_client.post(f"/api/chat", json={"q": query, "conversation_id": str(conversation.id)})
     response_message = response.json()["response"]
 
     # Assert

From 3b978b9b6765fee055b80da0631924e0173dddd2 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Wed, 23 Oct 2024 03:40:01 -0700
Subject: [PATCH 05/10] Fix chat history construction when generating chatml
 msgs with context

---
 src/khoj/processor/conversation/utils.py | 2 +-
 src/khoj/routers/api_chat.py             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index bc5b2c2e..1ccc7594 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -203,7 +203,7 @@ def generate_chatml_messages_with_context(
             chatml_messages.insert(0, reconstructed_context_message)
 
         role = "user" if chat["by"] == "you" else "assistant"
-        message_content = construct_structured_message(message_content, chat.get("images"), model_type, vision_enabled)
+        message_content = construct_structured_message(chat["message"], chat.get("images"), model_type, vision_enabled)
 
         reconstructed_message = ChatMessage(content=message_content, role=role)
         chatml_messages.insert(0, reconstructed_message)
diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py
index 09ea9eea..3cc541b1 100644
--- a/src/khoj/routers/api_chat.py
+++ b/src/khoj/routers/api_chat.py
@@ -112,7 +112,7 @@ def add_files_filter(request: Request, filter: FilesFilterRequest):
         file_filters = ConversationAdapters.add_files_to_filter(request.user.object, conversation_id, files_filter)
         return Response(content=json.dumps(file_filters), media_type="application/json", status_code=200)
     except Exception as e:
-        logger.error(f"Error adding file filter {filter.filename}: {e}", exc_info=True)
+        logger.error(f"Error adding file filter {filter.filenames}: {e}", exc_info=True)
         raise HTTPException(status_code=422, detail=str(e))
 
 

From a691ce4aa68225f1c300a50938ea00300d810aa3 Mon Sep 17 00:00:00 2001
From: sabaimran <narmiabas@gmail.com>
Date: Sun, 27 Oct 2024 20:43:41 -0700
Subject: [PATCH 06/10] Batch entries into smaller groups to process

---
 .../commands/change_default_model.py          | 32 +++++++++++--------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/khoj/database/management/commands/change_default_model.py b/src/khoj/database/management/commands/change_default_model.py
index cfa78581..d9a6359f 100644
--- a/src/khoj/database/management/commands/change_default_model.py
+++ b/src/khoj/database/management/commands/change_default_model.py
@@ -19,6 +19,8 @@ from khoj.processor.embeddings import EmbeddingsModel
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+BATCH_SIZE = 1000  # Define an appropriate batch size
+
 
 class Command(BaseCommand):
     help = "Convert all existing Entry objects to use a new default Search model."
@@ -42,22 +44,24 @@ class Command(BaseCommand):
     def handle(self, *args, **options):
         @transaction.atomic
         def regenerate_entries(entry_filter: Q, embeddings_model: EmbeddingsModel, search_model: SearchModelConfig):
-            entries = Entry.objects.filter(entry_filter).all()
-            compiled_entries = [entry.compiled for entry in entries]
-            updated_entries: List[Entry] = []
-            try:
-                embeddings = embeddings_model.embed_documents(compiled_entries)
+            total_entries = Entry.objects.filter(entry_filter).count()
+            for start in tqdm(range(0, total_entries, BATCH_SIZE)):
+                end = start + BATCH_SIZE
+                entries = Entry.objects.filter(entry_filter)[start:end]
+                compiled_entries = [entry.compiled for entry in entries]
+                updated_entries: List[Entry] = []
+                try:
+                    embeddings = embeddings_model.embed_documents(compiled_entries)
+                except Exception as e:
+                    logger.error(f"Error embedding documents: {e}")
+                    return
 
-            except Exception as e:
-                logger.error(f"Error embedding documents: {e}")
-                return
+                for i, entry in enumerate(entries):
+                    entry.embeddings = embeddings[i]
+                    entry.search_model_id = search_model.id
+                    updated_entries.append(entry)
 
-            for i, entry in enumerate(tqdm(entries)):
-                entry.embeddings = embeddings[i]
-                entry.search_model_id = search_model.id
-                updated_entries.append(entry)
-
-            Entry.objects.bulk_update(updated_entries, ["embeddings", "search_model_id", "file_path"])
+                Entry.objects.bulk_update(updated_entries, ["embeddings", "search_model_id", "file_path"])
 
         search_model_config_id = options.get("search_model_id")
         apply = options.get("apply")

From 4e39088f5b312475d9dbc58f1fd7c1d07408c10f Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Sun, 27 Oct 2024 23:02:27 -0700
Subject: [PATCH 07/10] Make agent name in home page carousel not text wrap on
 mobile

---
 src/interface/web/app/page.tsx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/interface/web/app/page.tsx b/src/interface/web/app/page.tsx
index b570e86c..eafd5373 100644
--- a/src/interface/web/app/page.tsx
+++ b/src/interface/web/app/page.tsx
@@ -361,7 +361,7 @@ function ChatBodyData(props: ChatBodyDataProps) {
                                         className={`${selectedAgent === agents[index].slug ? convertColorToBorderClass(agents[index].color) : "border-muted text-muted-foreground"} hover:cursor-pointer`}
                                     >
                                         <CardTitle
-                                            className="text-center text-xs font-medium flex justify-center items-center px-1.5 py-1"
+                                            className="text-center text-xs font-medium flex justify-center items-center whitespace-nowrap px-1.5 py-1"
                                             onDoubleClick={() =>
                                                 openAgentEditCard(agents[index].slug)
                                             }

From ee0789eb3dfa80060c21fb11b8ffa3d0390581c4 Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Sun, 27 Oct 2024 23:55:47 -0700
Subject: [PATCH 08/10] Mark context messages with user role as context role
 isn't being used

Context role was added to allow change message truncation order based
on context role as well.

Revert it for now since currently this is not currently being done.
---
 src/khoj/processor/conversation/utils.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index 1ccc7594..9946b4e9 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -174,7 +174,7 @@ def generate_chatml_messages_with_context(
     model_type="",
     context_message="",
 ):
-    """Generate messages for ChatGPT with context from previous conversation"""
+    """Generate chat messages with appropriate context from previous conversation to send to the chat model"""
     # Set max prompt size from user config or based on pre-configured for model and machine specs
     if not max_prompt_size:
         if loaded_model:
@@ -199,7 +199,7 @@ def generate_chatml_messages_with_context(
         if not is_none_or_empty(chat.get("onlineContext")):
             message_context += f"{prompts.online_search_conversation.format(online_results=chat.get('onlineContext'))}"
         if not is_none_or_empty(message_context):
-            reconstructed_context_message = ChatMessage(content=message_context, role="context")
+            reconstructed_context_message = ChatMessage(content=message_context, role="user")
             chatml_messages.insert(0, reconstructed_context_message)
 
         role = "user" if chat["by"] == "you" else "assistant"
@@ -220,7 +220,7 @@ def generate_chatml_messages_with_context(
             )
         )
     if not is_none_or_empty(context_message):
-        messages.append(ChatMessage(content=context_message, role="context"))
+        messages.append(ChatMessage(content=context_message, role="user"))
     if len(chatml_messages) > 0:
         messages += chatml_messages
     if not is_none_or_empty(system_message):
@@ -229,11 +229,6 @@ def generate_chatml_messages_with_context(
     # Truncate oldest messages from conversation history until under max supported prompt size by model
     messages = truncate_messages(messages, max_prompt_size, model_name, loaded_model, tokenizer_name)
 
-    # Reset context message role to assistant
-    for message in messages:
-        if message.role == "context":
-            message.role = "user"
-
     # Return message in chronological order
     return messages[::-1]
 

From 8ddd70f3a9be25693021f778fb76be3d5082089e Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Mon, 28 Oct 2024 00:22:21 -0700
Subject: [PATCH 09/10] Put context into separate message before sending to
 offline chat model

Align context passed to offline chat model with other chat models

- Pass context in separate message for better separation between user
  query and the shared context
- Pass filename in context
- Add online results for webpage conversation command
---
 .../conversation/offline/chat_model.py        | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py
index 4eafae00..d9cbd507 100644
--- a/src/khoj/processor/conversation/offline/chat_model.py
+++ b/src/khoj/processor/conversation/offline/chat_model.py
@@ -153,7 +153,7 @@ def converse_offline(
     # Initialize Variables
     assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
     offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
-    compiled_references_message = "\n\n".join({f"{item['compiled']}" for item in references})
+    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
 
     current_date = datetime.now()
 
@@ -170,8 +170,6 @@ def converse_offline(
             day_of_week=current_date.strftime("%A"),
         )
 
-    conversation_primer = prompts.query_prompt.format(query=user_query)
-
     if location_data:
         location_prompt = prompts.user_location.format(location=f"{location_data}")
         system_prompt = f"{system_prompt}\n{location_prompt}"
@@ -181,27 +179,31 @@ def converse_offline(
         system_prompt = f"{system_prompt}\n{user_name_prompt}"
 
     # Get Conversation Primer appropriate to Conversation Type
-    if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references_message):
+    if conversation_commands == [ConversationCommand.Notes] and is_none_or_empty(compiled_references):
         return iter([prompts.no_notes_found.format()])
     elif conversation_commands == [ConversationCommand.Online] and is_none_or_empty(online_results):
         completion_func(chat_response=prompts.no_online_results_found.format())
         return iter([prompts.no_online_results_found.format()])
 
-    if ConversationCommand.Online in conversation_commands:
+    context_message = ""
+    if not is_none_or_empty(compiled_references):
+        context_message += f"{prompts.notes_conversation_offline.format(references=compiled_references)}\n\n"
+    if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
         simplified_online_results = online_results.copy()
         for result in online_results:
             if online_results[result].get("webpages"):
                 simplified_online_results[result] = online_results[result]["webpages"]
 
-        conversation_primer = f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}\n{conversation_primer}"
-    if not is_none_or_empty(compiled_references_message):
-        conversation_primer = f"{prompts.notes_conversation_offline.format(references=compiled_references_message)}\n\n{conversation_primer}"
+        context_message += (
+            f"{prompts.online_search_conversation_offline.format(online_results=str(simplified_online_results))}"
+        )
 
     # Setup Prompt with Primer or Conversation History
     messages = generate_chatml_messages_with_context(
-        conversation_primer,
+        user_query,
         system_prompt,
         conversation_log,
+        context_message=context_message,
         model_name=model,
         loaded_model=offline_chat_model,
         max_prompt_size=max_prompt_size,

From aad7528d1bfe99cc3d1f3c89aaa769322431efab Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Mon, 28 Oct 2024 01:55:24 -0700
Subject: [PATCH 10/10] Render slash commands popup below chat input text area
 on home page

---
 .../web/app/components/chatInputArea/chatInputArea.tsx       | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/interface/web/app/components/chatInputArea/chatInputArea.tsx b/src/interface/web/app/components/chatInputArea/chatInputArea.tsx
index 7f2baf1d..92a3b3ae 100644
--- a/src/interface/web/app/components/chatInputArea/chatInputArea.tsx
+++ b/src/interface/web/app/components/chatInputArea/chatInputArea.tsx
@@ -367,6 +367,11 @@ export const ChatInputArea = forwardRef<HTMLTextAreaElement, ChatInputProps>((pr
                         <PopoverContent
                             onOpenAutoFocus={(e) => e.preventDefault()}
                             className={`${props.isMobileWidth ? "w-[100vw]" : "w-full"} rounded-md`}
+                            side="top"
+                            align="center"
+                            /* Offset below text area on home page (i.e where conversationId is unset) */
+                            sideOffset={props.conversationId ? 0 : 80}
+                            alignOffset={0}
                         >
                             <Command className="max-w-full">
                                 <CommandInput