From 9935d4db0b04b379ed20ccbde7936b03c5bea765 Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Mon, 28 Oct 2024 17:48:45 -0700
Subject: [PATCH 1/7] Do not use a message branch if no msg id provided to
 prompt tracer

---
 src/khoj/processor/conversation/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index 7c46b3b3..184de372 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -423,7 +423,8 @@ def commit_conversation_trace(
         msg_branch = f"m_{mid}" if mid else None
         if msg_branch and msg_branch not in repo.branches:
             repo.create_head(msg_branch)
-        repo.heads[msg_branch].checkout()
+        if msg_branch:
+            repo.heads[msg_branch].checkout()
 
         # Include file with content to commit
         files_to_commit = {"query": session_yaml, "response": response_yaml, "system_prompt": system_message_yaml}

From 0b0cfb35e6f283393cdddb15dd3253dcc8c915ed Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Fri, 1 Nov 2024 11:55:13 -0700
Subject: [PATCH 2/7] Simplify in research mode check in api_chat.

- Dedent code for readability
- Use better name for in research mode check
- Continue to remove inferred summarize command when multiple files in
  file filter even when not in research mode
- Continue to show select information source train of thought.
  It was removed by mistake earlier
---
 src/khoj/routers/api_chat.py | 196 +++++++++++++++++------------------
 1 file changed, 97 insertions(+), 99 deletions(-)

diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py
index e9d60a1b..55fd0c31 100644
--- a/src/khoj/routers/api_chat.py
+++ b/src/khoj/routers/api_chat.py
@@ -693,7 +693,7 @@ async def chat(
         meta_log = conversation.conversation_log
         is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask]
 
-        pending_research = True
+        in_research_mode = False
         researched_results = ""
         online_results: Dict = dict()
         code_results: Dict = dict()
@@ -712,6 +712,11 @@ async def chat(
                 agent=agent,
                 tracer=tracer,
             )
+            conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
+            async for result in send_event(
+                ChatEvent.STATUS, f"**Chose Data Sources to Search:** {conversation_commands_str}"
+            ):
+                yield result
 
             mode = await aget_relevant_output_modes(
                 q, meta_log, is_automated_task, user, uploaded_images, agent, tracer=tracer
@@ -738,7 +743,7 @@ async def chat(
             ):
                 if isinstance(research_result, InformationCollectionIteration):
                     if research_result.summarizedResult:
-                        pending_research = False
+                        in_research_mode = True
                         if research_result.onlineContext:
                             online_results.update(research_result.onlineContext)
                         if research_result.codeContext:
@@ -752,11 +757,9 @@ async def chat(
                     yield research_result
 
             # researched_results = await extract_relevant_info(q, researched_results, agent)
-
+            in_research_mode = False
             logger.info(f"Researched Results: {researched_results}")
 
-            pending_research = False
-
         for cmd in conversation_commands:
             await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
             q = q.replace(f"/{cmd.value}", "").strip()
@@ -771,11 +774,9 @@ async def chat(
             and not used_slash_summarize
             # but we can't actually summarize
             and len(file_filters) != 1
-            # not pending research
-            and not pending_research
         ):
             conversation_commands.remove(ConversationCommand.Summarize)
-        elif ConversationCommand.Summarize in conversation_commands and pending_research:
+        elif ConversationCommand.Summarize in conversation_commands and not in_research_mode:
             response_log = ""
             agent_has_entries = await EntryAdapters.aagent_has_entries(agent)
             if len(file_filters) == 0 and not agent_has_entries:
@@ -869,7 +870,7 @@ async def chat(
 
         # Gather Context
         ## Extract Document References
-        if pending_research:
+        if not in_research_mode:
             try:
                 async for result in extract_references_and_questions(
                     request,
@@ -916,99 +917,96 @@ async def chat(
         if ConversationCommand.Notes in conversation_commands and is_none_or_empty(compiled_references):
             conversation_commands.remove(ConversationCommand.Notes)
 
-        if pending_research:
-            ## Gather Online References
-            if ConversationCommand.Online in conversation_commands:
-                try:
-                    async for result in search_online(
-                        defiltered_query,
-                        meta_log,
-                        location,
-                        user,
-                        partial(send_event, ChatEvent.STATUS),
-                        custom_filters,
-                        query_images=uploaded_images,
-                        agent=agent,
-                        tracer=tracer,
-                    ):
-                        if isinstance(result, dict) and ChatEvent.STATUS in result:
-                            yield result[ChatEvent.STATUS]
-                        else:
-                            online_results = result
-                except Exception as e:
-                    error_message = f"Error searching online: {e}. Attempting to respond without online results"
-                    logger.warning(error_message)
-                    async for result in send_event(
-                        ChatEvent.STATUS, "Online search failed. I'll try respond without online references"
-                    ):
-                        yield result
+        ## Gather Online References
+        if ConversationCommand.Online in conversation_commands and not in_research_mode:
+            try:
+                async for result in search_online(
+                    defiltered_query,
+                    meta_log,
+                    location,
+                    user,
+                    partial(send_event, ChatEvent.STATUS),
+                    custom_filters,
+                    query_images=uploaded_images,
+                    agent=agent,
+                    tracer=tracer,
+                ):
+                    if isinstance(result, dict) and ChatEvent.STATUS in result:
+                        yield result[ChatEvent.STATUS]
+                    else:
+                        online_results = result
+            except Exception as e:
+                error_message = f"Error searching online: {e}. Attempting to respond without online results"
+                logger.warning(error_message)
+                async for result in send_event(
+                    ChatEvent.STATUS, "Online search failed. I'll try respond without online references"
+                ):
+                    yield result
 
-        if pending_research:
-            ## Gather Webpage References
-            if ConversationCommand.Webpage in conversation_commands:
-                try:
-                    async for result in read_webpages(
-                        defiltered_query,
-                        meta_log,
-                        location,
-                        user,
-                        partial(send_event, ChatEvent.STATUS),
-                        query_images=uploaded_images,
-                        agent=agent,
-                        tracer=tracer,
-                    ):
-                        if isinstance(result, dict) and ChatEvent.STATUS in result:
-                            yield result[ChatEvent.STATUS]
-                        else:
-                            direct_web_pages = result
-                    webpages = []
-                    for query in direct_web_pages:
-                        if online_results.get(query):
-                            online_results[query]["webpages"] = direct_web_pages[query]["webpages"]
-                        else:
-                            online_results[query] = {"webpages": direct_web_pages[query]["webpages"]}
+        ## Gather Webpage References
+        if ConversationCommand.Webpage in conversation_commands and not in_research_mode:
+            try:
+                async for result in read_webpages(
+                    defiltered_query,
+                    meta_log,
+                    location,
+                    user,
+                    partial(send_event, ChatEvent.STATUS),
+                    query_images=uploaded_images,
+                    agent=agent,
+                    tracer=tracer,
+                ):
+                    if isinstance(result, dict) and ChatEvent.STATUS in result:
+                        yield result[ChatEvent.STATUS]
+                    else:
+                        direct_web_pages = result
+                webpages = []
+                for query in direct_web_pages:
+                    if online_results.get(query):
+                        online_results[query]["webpages"] = direct_web_pages[query]["webpages"]
+                    else:
+                        online_results[query] = {"webpages": direct_web_pages[query]["webpages"]}
 
-                        for webpage in direct_web_pages[query]["webpages"]:
-                            webpages.append(webpage["link"])
-                    async for result in send_event(ChatEvent.STATUS, f"**Read web pages**: {webpages}"):
-                        yield result
-                except Exception as e:
-                    logger.warning(
-                        f"Error reading webpages: {e}. Attempting to respond without webpage results",
-                        exc_info=True,
-                    )
-                    async for result in send_event(
-                        ChatEvent.STATUS, "Webpage read failed. I'll try respond without webpage references"
-                    ):
-                        yield result
+                    for webpage in direct_web_pages[query]["webpages"]:
+                        webpages.append(webpage["link"])
+                async for result in send_event(ChatEvent.STATUS, f"**Read web pages**: {webpages}"):
+                    yield result
+            except Exception as e:
+                logger.warning(
+                    f"Error reading webpages: {e}. Attempting to respond without webpage results",
+                    exc_info=True,
+                )
+                async for result in send_event(
+                    ChatEvent.STATUS, "Webpage read failed. I'll try respond without webpage references"
+                ):
+                    yield result
 
-        if pending_research:
-            ## Gather Code Results
-            if ConversationCommand.Code in conversation_commands and pending_research:
-                try:
-                    context = f"# Iteration 1:\n#---\nNotes:\n{compiled_references}\n\nOnline Results:{online_results}"
-                    async for result in run_code(
-                        defiltered_query,
-                        meta_log,
-                        context,
-                        location,
-                        user,
-                        partial(send_event, ChatEvent.STATUS),
-                        query_images=uploaded_images,
-                        agent=agent,
-                        tracer=tracer,
-                    ):
-                        if isinstance(result, dict) and ChatEvent.STATUS in result:
-                            yield result[ChatEvent.STATUS]
-                        else:
-                            code_results = result
-                    async for result in send_event(ChatEvent.STATUS, f"**Ran code snippets**: {len(code_results)}"):
-                        yield result
-                except ValueError as e:
-                    logger.warning(
-                        f"Failed to use code tool: {e}. Attempting to respond without code results",
-                        exc_info=True,
-                    )
+        ## Gather Code Results
+        if ConversationCommand.Code in conversation_commands and not in_research_mode:
+            try:
+                context = f"# Iteration 1:\n#---\nNotes:\n{compiled_references}\n\nOnline Results:{online_results}"
+                async for result in run_code(
+                    defiltered_query,
+                    meta_log,
+                    context,
+                    location,
+                    user,
+                    partial(send_event, ChatEvent.STATUS),
+                    query_images=uploaded_images,
+                    agent=agent,
+                    tracer=tracer,
+                ):
+                    if isinstance(result, dict) and ChatEvent.STATUS in result:
+                        yield result[ChatEvent.STATUS]
+                    else:
+                        code_results = result
+                async for result in send_event(ChatEvent.STATUS, f"**Ran code snippets**: {len(code_results)}"):
+                    yield result
+            except ValueError as e:
+                logger.warning(
+                    f"Failed to use code tool: {e}. Attempting to respond without code results",
+                    exc_info=True,
+                )
 
         ## Send Gathered References
         async for result in send_event(

From cd75151431b3583c8aaa535ff922a366edde41e1 Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Fri, 1 Nov 2024 11:58:44 -0700
Subject: [PATCH 3/7] Do not allow auto selecting research mode as tool for
 now.

You are required to manually turning it on. This takes longer and
should be a high intent activity initiated by user
---
 src/khoj/utils/helpers.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py
index 664bbde9..c98016fa 100644
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@@ -355,7 +355,6 @@ tool_descriptions_for_llm = {
     ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
     ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create documents and charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
     ConversationCommand.Summarize: "To retrieve an answer that depends on the entire document or a large text.",
-    ConversationCommand.Research: "To use when you need to do DEEP research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
 }
 
 function_calling_description_for_llm = {

From c1c779a7ef2d02a93a49b1fa8ca86b4b846fe8b4 Mon Sep 17 00:00:00 2001
From: Debanjum <debanjum@gmail.com>
Date: Fri, 1 Nov 2024 12:04:38 -0700
Subject: [PATCH 4/7] Do not yaml format raw code results in context for LLM.
 It's confusing

---
 src/khoj/processor/conversation/anthropic/anthropic_chat.py | 2 +-
 src/khoj/processor/conversation/google/gemini_chat.py       | 2 +-
 src/khoj/processor/conversation/offline/chat_model.py       | 4 +---
 src/khoj/processor/conversation/openai/gpt.py               | 2 +-
 src/khoj/processor/conversation/utils.py                    | 2 +-
 5 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
index 1ca2095d..e2fd0c74 100644
--- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py
+++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
@@ -189,7 +189,7 @@ def converse_anthropic(
     if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
         context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
     if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
-        context_message += f"{prompts.code_executed_context.format(code_results=yaml_dump(code_results))}\n\n"
+        context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
     context_message = context_message.strip()
 
     # Setup Prompt with Primer or Conversation History
diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py
index 7a3ffe4d..f543bc6b 100644
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@@ -193,7 +193,7 @@ def converse_gemini(
     if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
         context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
     if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
-        context_message += f"{prompts.code_executed_context.format(code_results=yaml_dump(code_results))}\n\n"
+        context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
     context_message = context_message.strip()
 
     # Setup Prompt with Primer or Conversation History
diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py
index a7afaca4..b3e1523c 100644
--- a/src/khoj/processor/conversation/offline/chat_model.py
+++ b/src/khoj/processor/conversation/offline/chat_model.py
@@ -160,8 +160,6 @@ def converse_offline(
     assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
     offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
     tracer["chat_model"] = model
-
-    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
     current_date = datetime.now()
 
     if agent and agent.personality:
@@ -203,7 +201,7 @@ def converse_offline(
 
         context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
     if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
-        context_message += f"{prompts.code_executed_context.format(code_results=yaml_dump(code_results))}\n\n"
+        context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
     context_message = context_message.strip()
 
     # Setup Prompt with Primer or Conversation History
diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py
index ee7bac83..c376a90e 100644
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@@ -191,7 +191,7 @@ def converse(
     if not is_none_or_empty(online_results):
         context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
     if not is_none_or_empty(code_results):
-        context_message += f"{prompts.code_executed_context.format(code_results=yaml_dump(code_results))}\n\n"
+        context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
     context_message = context_message.strip()
 
     # Setup Prompt with Primer or Conversation History
diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index b4db71d9..9970aefd 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -20,7 +20,7 @@ from langchain.schema import ChatMessage
 from llama_cpp.llama import Llama
 from transformers import AutoTokenizer
 
-from khoj.database.adapters import ConversationAdapters, ais_user_subscribed
+from khoj.database.adapters import ConversationAdapters
 from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens

From cead1598b919c83f731d88a4c60d192acdb4480d Mon Sep 17 00:00:00 2001
From: sabaimran <narmiabas@gmail.com>
Date: Fri, 1 Nov 2024 13:00:11 -0700
Subject: [PATCH 5/7] Don't reset research mode after completing research
 execution

---
 src/khoj/routers/api_chat.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py
index 55fd0c31..12583815 100644
--- a/src/khoj/routers/api_chat.py
+++ b/src/khoj/routers/api_chat.py
@@ -757,7 +757,6 @@ async def chat(
                     yield research_result
 
             # researched_results = await extract_relevant_info(q, researched_results, agent)
-            in_research_mode = False
             logger.info(f"Researched Results: {researched_results}")
 
         for cmd in conversation_commands:
@@ -1022,7 +1021,7 @@ async def chat(
 
         # Generate Output
         ## Generate Image Output
-        if ConversationCommand.Image in conversation_commands:
+        if ConversationCommand.Image in conversation_commands and not in_research_mode:
             async for result in text_to_image(
                 defiltered_query,
                 user,

From 8fd2fe162feeb31d464b043fa8a16ce6ba5ecdef Mon Sep 17 00:00:00 2001
From: sabaimran <narmiabas@gmail.com>
Date: Fri, 1 Nov 2024 13:12:34 -0700
Subject: [PATCH 6/7] Determine if research mode is enabled by checking the
 conversation commands and 'linting' them in the selection phase

---
 src/khoj/routers/api_chat.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py
index 12583815..cc9185be 100644
--- a/src/khoj/routers/api_chat.py
+++ b/src/khoj/routers/api_chat.py
@@ -693,7 +693,6 @@ async def chat(
         meta_log = conversation.conversation_log
         is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask]
 
-        in_research_mode = False
         researched_results = ""
         online_results: Dict = dict()
         code_results: Dict = dict()
@@ -712,6 +711,11 @@ async def chat(
                 agent=agent,
                 tracer=tracer,
             )
+
+            # If we're doing research, we don't want to do anything else
+            if ConversationCommand.Research in conversation_commands:
+                conversation_commands = [ConversationCommand.Research]
+
             conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
             async for result in send_event(
                 ChatEvent.STATUS, f"**Chose Data Sources to Search:** {conversation_commands_str}"
@@ -743,7 +747,6 @@ async def chat(
             ):
                 if isinstance(research_result, InformationCollectionIteration):
                     if research_result.summarizedResult:
-                        in_research_mode = True
                         if research_result.onlineContext:
                             online_results.update(research_result.onlineContext)
                         if research_result.codeContext:
@@ -775,7 +778,7 @@ async def chat(
             and len(file_filters) != 1
         ):
             conversation_commands.remove(ConversationCommand.Summarize)
-        elif ConversationCommand.Summarize in conversation_commands and not in_research_mode:
+        elif ConversationCommand.Summarize in conversation_commands:
             response_log = ""
             agent_has_entries = await EntryAdapters.aagent_has_entries(agent)
             if len(file_filters) == 0 and not agent_has_entries:
@@ -869,7 +872,7 @@ async def chat(
 
         # Gather Context
         ## Extract Document References
-        if not in_research_mode:
+        if not ConversationCommand.Research in conversation_commands:
             try:
                 async for result in extract_references_and_questions(
                     request,
@@ -917,7 +920,7 @@ async def chat(
             conversation_commands.remove(ConversationCommand.Notes)
 
         ## Gather Online References
-        if ConversationCommand.Online in conversation_commands and not in_research_mode:
+        if ConversationCommand.Online in conversation_commands:
             try:
                 async for result in search_online(
                     defiltered_query,
@@ -943,7 +946,7 @@ async def chat(
                     yield result
 
         ## Gather Webpage References
-        if ConversationCommand.Webpage in conversation_commands and not in_research_mode:
+        if ConversationCommand.Webpage in conversation_commands:
             try:
                 async for result in read_webpages(
                     defiltered_query,
@@ -981,7 +984,7 @@ async def chat(
                     yield result
 
         ## Gather Code Results
-        if ConversationCommand.Code in conversation_commands and not in_research_mode:
+        if ConversationCommand.Code in conversation_commands:
             try:
                 context = f"# Iteration 1:\n#---\nNotes:\n{compiled_references}\n\nOnline Results:{online_results}"
                 async for result in run_code(
@@ -1021,7 +1024,7 @@ async def chat(
 
         # Generate Output
         ## Generate Image Output
-        if ConversationCommand.Image in conversation_commands and not in_research_mode:
+        if ConversationCommand.Image in conversation_commands:
             async for result in text_to_image(
                 defiltered_query,
                 user,

From baa939f4ce41cd45f731dfcc29f75b391c7a4560 Mon Sep 17 00:00:00 2001
From: sabaimran <narmiabas@gmail.com>
Date: Fri, 1 Nov 2024 13:47:39 -0700
Subject: [PATCH 7/7] When running code, strip any code delimiters. Disable
 application json type specification in Gemini request.

---
 .../processor/conversation/google/gemini_chat.py     |  6 ++++--
 src/khoj/processor/conversation/utils.py             |  5 +++++
 src/khoj/processor/tools/run_code.py                 | 12 +++++++++---
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py
index f543bc6b..aebda1a8 100644
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@@ -116,8 +116,10 @@ def gemini_send_message_to_model(
     messages, system_prompt = format_messages_for_gemini(messages)
 
     model_kwargs = {}
-    if response_type == "json_object":
-        model_kwargs["response_mime_type"] = "application/json"
+
+    # Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
+    # if response_type == "json_object":
+    #     model_kwargs["response_mime_type"] = "application/json"
 
     # Get Response from Gemini
     return gemini_completion_with_backoff(
diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index 9970aefd..edef014f 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -447,6 +447,11 @@ def clean_json(response: str):
     return response.strip().replace("\n", "").removeprefix("```json").removesuffix("```")
 
 
+def clean_code_python(code: str):
+    """Remove any markdown codeblock and newline formatting if present. Useful for non schema enforceable models"""
+    return code.strip().removeprefix("```python").removesuffix("```")
+
+
 def defilter_query(query: str):
     """Remove any query filters in query"""
     defiltered_query = query
diff --git a/src/khoj/processor/tools/run_code.py b/src/khoj/processor/tools/run_code.py
index 9bdbfc13..d4ba9af1 100644
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@@ -12,6 +12,7 @@ from khoj.database.models import Agent, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.utils import (
     ChatEvent,
+    clean_code_python,
     clean_json,
     construct_chat_history,
 )
@@ -126,13 +127,18 @@ async def execute_sandboxed_python(code: str, sandbox_url: str = SANDBOX_URL) ->
     Returns the result of the code execution as a dictionary.
     """
     headers = {"Content-Type": "application/json"}
-    data = {"code": code}
+    cleaned_code = clean_code_python(code)
+    data = {"code": cleaned_code}
 
     async with aiohttp.ClientSession() as session:
         async with session.post(sandbox_url, json=data, headers=headers) as response:
             if response.status == 200:
                 result: dict[str, Any] = await response.json()
-                result["code"] = code
+                result["code"] = cleaned_code
                 return result
             else:
-                return {"code": code, "success": False, "std_err": f"Failed to execute code with {response.status}"}
+                return {
+                    "code": cleaned_code,
+                    "success": False,
+                    "std_err": f"Failed to execute code with {response.status}",
+                }