From baa939f4ce41cd45f731dfcc29f75b391c7a4560 Mon Sep 17 00:00:00 2001
From: sabaimran <narmiabas@gmail.com>
Date: Fri, 1 Nov 2024 13:47:39 -0700
Subject: [PATCH] When running code, strip any code delimiters. Disable
 application json type specification in Gemini request.

---
 .../processor/conversation/google/gemini_chat.py     |  6 ++++--
 src/khoj/processor/conversation/utils.py             |  5 +++++
 src/khoj/processor/tools/run_code.py                 | 12 +++++++++---
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py
index f543bc6b..aebda1a8 100644
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@@ -116,8 +116,10 @@ def gemini_send_message_to_model(
     messages, system_prompt = format_messages_for_gemini(messages)
 
     model_kwargs = {}
-    if response_type == "json_object":
-        model_kwargs["response_mime_type"] = "application/json"
+
+    # Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
+    # if response_type == "json_object":
+    #     model_kwargs["response_mime_type"] = "application/json"
 
     # Get Response from Gemini
     return gemini_completion_with_backoff(
diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py
index 9970aefd..edef014f 100644
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@@ -447,6 +447,11 @@ def clean_json(response: str):
     return response.strip().replace("\n", "").removeprefix("```json").removesuffix("```")
 
 
+def clean_code_python(code: str):
+    """Remove any markdown codeblock and newline formatting if present. Useful for non schema enforceable models"""
+    return code.strip().removeprefix("```python").removesuffix("```")
+
+
 def defilter_query(query: str):
     """Remove any query filters in query"""
     defiltered_query = query
diff --git a/src/khoj/processor/tools/run_code.py b/src/khoj/processor/tools/run_code.py
index 9bdbfc13..d4ba9af1 100644
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@@ -12,6 +12,7 @@ from khoj.database.models import Agent, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.utils import (
     ChatEvent,
+    clean_code_python,
     clean_json,
     construct_chat_history,
 )
@@ -126,13 +127,18 @@ async def execute_sandboxed_python(code: str, sandbox_url: str = SANDBOX_URL) ->
     Returns the result of the code execution as a dictionary.
     """
     headers = {"Content-Type": "application/json"}
-    data = {"code": code}
+    cleaned_code = clean_code_python(code)
+    data = {"code": cleaned_code}
 
     async with aiohttp.ClientSession() as session:
         async with session.post(sandbox_url, json=data, headers=headers) as response:
             if response.status == 200:
                 result: dict[str, Any] = await response.json()
-                result["code"] = code
+                result["code"] = cleaned_code
                 return result
             else:
-                return {"code": code, "success": False, "std_err": f"Failed to execute code with {response.status}"}
+                return {
+                    "code": cleaned_code,
+                    "success": False,
+                    "std_err": f"Failed to execute code with {response.status}",
+                }