diff --git a/src/khoj/processor/tools/run_code.py b/src/khoj/processor/tools/run_code.py
index 5366cd02..8f2f170a 100644
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@@ -1,4 +1,5 @@
import base64
+import copy
import datetime
import json
import logging
@@ -88,7 +89,8 @@ async def run_code(
with timer("Chat actor: Execute generated program", logger, log_level=logging.INFO):
result = await execute_sandboxed_python(generated_code.code, input_data, sandbox_url)
code = result.pop("code")
- logger.info(f"Executed Code:\n--@@--\n{code}\n--@@--Result:\n--@@--\n{result}\n--@@--")
+ cleaned_result = truncate_code_context({"cleaned": {"results": result}})["cleaned"]["results"]
+ logger.info(f"Executed Code\n----\n{code}\n----\nResult\n----\n{cleaned_result}\n----")
yield {query: {"code": code, "results": result}}
except Exception as e:
raise ValueError(f"Failed to run code for {query} with error: {e}")
@@ -163,7 +165,8 @@ async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_ur
result: dict[str, Any] = await response.json()
result["code"] = cleaned_code
# Store decoded output files
- for output_file in result.get("output_files", []):
+ result["output_files"] = result.get("output_files", [])
+ for output_file in result["output_files"]:
# Decode text files as UTF-8
if mimetypes.guess_type(output_file["filename"])[0].startswith("text/") or Path(
output_file["filename"]
@@ -175,4 +178,28 @@ async def execute_sandboxed_python(code: str, input_data: list[dict], sandbox_ur
"code": cleaned_code,
"success": False,
"std_err": f"Failed to execute code with {response.status}",
+ "output_files": [],
}
+
+
+def truncate_code_context(original_code_results: dict[str, Any], max_chars=10000) -> dict[str, Any]:
+ """
+ Truncate large output files and drop image file data from code results.
+ """
+ # Create a deep copy of the code results to avoid modifying the original data
+ code_results = copy.deepcopy(original_code_results)
+ for code_result in code_results.values():
+ for idx, output_file in enumerate(code_result["results"]["output_files"]):
+ # Drop image files from code results
+ if Path(output_file["filename"]).suffix in {".png", ".jpg", ".jpeg", ".webp"}:
+ code_result["results"]["output_files"][idx] = {
+ "filename": output_file["filename"],
+ "b64_data": "[placeholder for generated image data for brevity]",
+ }
+ # Truncate large output files
+ elif len(output_file["b64_data"]) > max_chars:
+ code_result["results"]["output_files"][idx] = {
+ "filename": output_file["filename"],
+ "b64_data": output_file["b64_data"][:max_chars] + "...",
+ }
+ return code_results
diff --git a/src/khoj/routers/research.py b/src/khoj/routers/research.py
index abf8f96c..fdc10906 100644
--- a/src/khoj/routers/research.py
+++ b/src/khoj/routers/research.py
@@ -16,7 +16,7 @@ from khoj.processor.conversation.utils import (
construct_tool_chat_history,
)
from khoj.processor.tools.online_search import read_webpages, search_online
-from khoj.processor.tools.run_code import run_code
+from khoj.processor.tools.run_code import run_code, truncate_code_context
from khoj.routers.api import extract_references_and_questions
from khoj.routers.helpers import (
ChatEvent,
@@ -348,7 +348,7 @@ async def execute_information_collection(
if online_results:
results_data += f"\n\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
if code_results:
- results_data += f"\n\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
+ results_data += f"\n\n{yaml.dump(truncate_code_context(code_results), allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
if summarize_files:
results_data += f"\n\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n"
if this_iteration.warning: