Merge branch 'master' of github.com:khoj-ai/khoj into improve-debug-reasoning-and-other-misc-fixes

2024-11-27 17:35:07 +01:00 · 2024-11-01 14:51:26 -07:00 · 2024-11-01 14:51:26 -07:00 · 2b35790165
commit 2b35790165
parent b3dad1f393 22f3ed3f5d
8 changed files with 125 additions and 111 deletions
--- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py
+++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py
@ -189,7 +189,7 @@ def converse_anthropic(
    if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
        context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
    if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
-        context_message += f"{prompts.code_executed_context.format(code_results=yaml_dump(code_results))}\n\n"
+        context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
    context_message = context_message.strip()

    # Setup Prompt with Primer or Conversation History
--- a/src/khoj/processor/conversation/google/gemini_chat.py
+++ b/src/khoj/processor/conversation/google/gemini_chat.py
@ -116,8 +116,10 @@ def gemini_send_message_to_model(
    messages, system_prompt = format_messages_for_gemini(messages)

    model_kwargs = {}
-    if response_type == "json_object":
-        model_kwargs["response_mime_type"] = "application/json"
+
+    # Sometimes, this causes unwanted behavior and terminates response early. Disable for now while it's flaky.
+    # if response_type == "json_object":
+    #     model_kwargs["response_mime_type"] = "application/json"

    # Get Response from Gemini
    return gemini_completion_with_backoff(
@ -193,7 +195,7 @@ def converse_gemini(
    if ConversationCommand.Online in conversation_commands or ConversationCommand.Webpage in conversation_commands:
        context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
    if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
-        context_message += f"{prompts.code_executed_context.format(code_results=yaml_dump(code_results))}\n\n"
+        context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
    context_message = context_message.strip()

    # Setup Prompt with Primer or Conversation History
--- a/src/khoj/processor/conversation/offline/chat_model.py
+++ b/src/khoj/processor/conversation/offline/chat_model.py
@ -160,9 +160,7 @@ def converse_offline(
    # Initialize Variables
    assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
    offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
-    compiled_references = "\n\n".join({f"# File: {item['file']}\n## {item['compiled']}\n" for item in references})
    tracer["chat_model"] = model
-
    current_date = datetime.now()

    if agent and agent.personality:
@ -204,7 +202,7 @@ def converse_offline(

        context_message += f"{prompts.online_search_conversation_offline.format(online_results=yaml_dump(simplified_online_results))}\n\n"
    if ConversationCommand.Code in conversation_commands and not is_none_or_empty(code_results):
-        context_message += f"{prompts.code_executed_context.format(code_results=yaml_dump(code_results))}\n\n"
+        context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
    context_message = context_message.strip()

    # Setup Prompt with Primer or Conversation History
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@ -191,7 +191,7 @@ def converse(
    if not is_none_or_empty(online_results):
        context_message += f"{prompts.online_search_conversation.format(online_results=yaml_dump(online_results))}\n\n"
    if not is_none_or_empty(code_results):
-        context_message += f"{prompts.code_executed_context.format(code_results=yaml_dump(code_results))}\n\n"
+        context_message += f"{prompts.code_executed_context.format(code_results=str(code_results))}\n\n"
    context_message = context_message.strip()

    # Setup Prompt with Primer or Conversation History
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@ -22,7 +22,7 @@ from langchain.schema import ChatMessage
 from llama_cpp.llama import Llama
 from transformers import AutoTokenizer

-from khoj.database.adapters import ConversationAdapters, ais_user_subscribed
+from khoj.database.adapters import ConversationAdapters
 from khoj.database.models import ChatModelOptions, ClientApplication, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.offline.utils import download_model, infer_max_tokens
@ -457,6 +457,11 @@ def clean_json(response: str):
    return response.strip().replace("\n", "").removeprefix("```json").removesuffix("```")


+def clean_code_python(code: str):
+    """Remove any markdown codeblock and newline formatting if present. Useful for non schema enforceable models"""
+    return code.strip().removeprefix("```python").removesuffix("```")
+
+
 def defilter_query(query: str):
    """Remove any query filters in query"""
    defiltered_query = query
--- a/src/khoj/processor/tools/run_code.py
+++ b/src/khoj/processor/tools/run_code.py
@ -12,6 +12,7 @@ from khoj.database.models import Agent, KhojUser
 from khoj.processor.conversation import prompts
 from khoj.processor.conversation.utils import (
    ChatEvent,
+    clean_code_python,
    clean_json,
    construct_chat_history,
 )
@ -126,13 +127,18 @@ async def execute_sandboxed_python(code: str, sandbox_url: str = SANDBOX_URL) ->
    Returns the result of the code execution as a dictionary.
    """
    headers = {"Content-Type": "application/json"}
-    data = {"code": code}
+    cleaned_code = clean_code_python(code)
+    data = {"code": cleaned_code}

    async with aiohttp.ClientSession() as session:
        async with session.post(sandbox_url, json=data, headers=headers) as response:
            if response.status == 200:
                result: dict[str, Any] = await response.json()
-                result["code"] = code
+                result["code"] = cleaned_code
                return result
            else:
-                return {"code": code, "success": False, "std_err": f"Failed to execute code with {response.status}"}
+                return {
+                    "code": cleaned_code,
+                    "success": False,
+                    "std_err": f"Failed to execute code with {response.status}",
+                }
--- a/src/khoj/routers/api_chat.py
+++ b/src/khoj/routers/api_chat.py
@ -710,7 +710,6 @@ async def chat(
        meta_log = conversation.conversation_log
        is_automated_task = conversation_commands == [ConversationCommand.AutomatedTask]

-        pending_research = True
        researched_results = ""
        online_results: Dict = dict()
        code_results: Dict = dict()
@ -730,6 +729,16 @@ async def chat(
                tracer=tracer,
            )

+            # If we're doing research, we don't want to do anything else
+            if ConversationCommand.Research in conversation_commands:
+                conversation_commands = [ConversationCommand.Research]
+
+            conversation_commands_str = ", ".join([cmd.value for cmd in conversation_commands])
+            async for result in send_event(
+                ChatEvent.STATUS, f"**Chose Data Sources to Search:** {conversation_commands_str}"
+            ):
+                yield result
+
            mode = await aget_relevant_output_modes(
                q, meta_log, is_automated_task, user, uploaded_images, agent, tracer=tracer
            )
@ -759,7 +768,6 @@ async def chat(
            ):
                if isinstance(research_result, InformationCollectionIteration):
                    if research_result.summarizedResult:
-                        pending_research = False
                        if research_result.onlineContext:
                            online_results.update(research_result.onlineContext)
                        if research_result.codeContext:
@ -773,10 +781,11 @@ async def chat(
                    yield research_result

            # researched_results = await extract_relevant_info(q, researched_results, agent)
-
            logger.info(f"Researched Results: {researched_results}")

-            pending_research = False
+        for cmd in conversation_commands:
+            await conversation_command_rate_limiter.update_and_check_if_valid(request, cmd)
+            q = q.replace(f"/{cmd.value}", "").strip()

        used_slash_summarize = conversation_commands == [ConversationCommand.Summarize]
        file_filters = conversation.file_filters if conversation else []
@ -788,11 +797,9 @@ async def chat(
            and not used_slash_summarize
            # but we can't actually summarize
            and len(file_filters) != 1
-            # not pending research
-            and not pending_research
        ):
            conversation_commands.remove(ConversationCommand.Summarize)
-        elif ConversationCommand.Summarize in conversation_commands and pending_research:
+        elif ConversationCommand.Summarize in conversation_commands:
            response_log = ""
            agent_has_entries = await EntryAdapters.aagent_has_entries(agent)
            if len(file_filters) == 0 and not agent_has_entries:
@ -886,7 +893,7 @@ async def chat(

        # Gather Context
        ## Extract Document References
-        if pending_research:
+        if not ConversationCommand.Research in conversation_commands:
            try:
                async for result in extract_references_and_questions(
                    request,
@ -933,7 +940,6 @@ async def chat(
        if ConversationCommand.Notes in conversation_commands and is_none_or_empty(compiled_references):
            conversation_commands.remove(ConversationCommand.Notes)

-        if pending_research:
        ## Gather Online References
        if ConversationCommand.Online in conversation_commands:
            try:
@ -960,7 +966,6 @@ async def chat(
                ):
                    yield result

-        if pending_research:
        ## Gather Webpage References
        if ConversationCommand.Webpage in conversation_commands:
            try:
@ -999,9 +1004,8 @@ async def chat(
                ):
                    yield result

-        if pending_research:
        ## Gather Code Results
-            if ConversationCommand.Code in conversation_commands and pending_research:
+        if ConversationCommand.Code in conversation_commands:
            try:
                context = f"# Iteration 1:\n#---\nNotes:\n{compiled_references}\n\nOnline Results:{online_results}"
                async for result in run_code(
--- a/src/khoj/utils/helpers.py
+++ b/src/khoj/utils/helpers.py
@ -364,7 +364,6 @@ tool_descriptions_for_llm = {
    ConversationCommand.Webpage: "To use if the user has directly provided the webpage urls or you are certain of the webpage urls to read.",
    ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create documents and charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.",
    ConversationCommand.Summarize: "To retrieve an answer that depends on the entire document or a large text.",
-    ConversationCommand.Research: "To use when you need to do DEEP research on a topic. This will take longer than usual, but give a more detailed, comprehensive answer.",
 }

 function_calling_description_for_llm = {