diff --git a/src/khoj/processor/conversation/anthropic/anthropic_chat.py b/src/khoj/processor/conversation/anthropic/anthropic_chat.py index feb587b2..1ca2095d 100644 --- a/src/khoj/processor/conversation/anthropic/anthropic_chat.py +++ b/src/khoj/processor/conversation/anthropic/anthropic_chat.py @@ -14,9 +14,9 @@ from khoj.processor.conversation.anthropic.utils import ( format_messages_for_anthropic, ) from khoj.processor.conversation.utils import ( + clean_json, construct_structured_message, generate_chatml_messages_with_context, - remove_json_codeblock, ) from khoj.utils.helpers import ConversationCommand, is_none_or_empty from khoj.utils.rawconfig import LocationData @@ -98,7 +98,7 @@ def extract_questions_anthropic( # Extract, Clean Message from Claude's Response try: - response = remove_json_codeblock(response) + response = clean_json(response) response = json.loads(response) response = [q.strip() for q in response["queries"] if q.strip()] if not isinstance(response, list) or not response: diff --git a/src/khoj/processor/conversation/google/gemini_chat.py b/src/khoj/processor/conversation/google/gemini_chat.py index e9538c0c..7a3ffe4d 100644 --- a/src/khoj/processor/conversation/google/gemini_chat.py +++ b/src/khoj/processor/conversation/google/gemini_chat.py @@ -14,9 +14,9 @@ from khoj.processor.conversation.google.utils import ( gemini_completion_with_backoff, ) from khoj.processor.conversation.utils import ( + clean_json, construct_structured_message, generate_chatml_messages_with_context, - remove_json_codeblock, ) from khoj.utils.helpers import ConversationCommand, is_none_or_empty from khoj.utils.rawconfig import LocationData @@ -93,7 +93,7 @@ def extract_questions_gemini( # Extract, Clean Message from Gemini's Response try: - response = remove_json_codeblock(response) + response = clean_json(response) response = json.loads(response) response = [q.strip() for q in response["queries"] if q.strip()] if not isinstance(response, list) or not response: diff --git a/src/khoj/processor/conversation/openai/gpt.py b/src/khoj/processor/conversation/openai/gpt.py index a0edb422..ee7bac83 100644 --- a/src/khoj/processor/conversation/openai/gpt.py +++ b/src/khoj/processor/conversation/openai/gpt.py @@ -12,9 +12,9 @@ from khoj.processor.conversation.openai.utils import ( completion_with_backoff, ) from khoj.processor.conversation.utils import ( + clean_json, construct_structured_message, generate_chatml_messages_with_context, - remove_json_codeblock, ) from khoj.utils.helpers import ConversationCommand, is_none_or_empty from khoj.utils.rawconfig import LocationData @@ -95,8 +95,7 @@ def extract_questions( # Extract, Clean Message from GPT's Response try: - response = response.strip() - response = remove_json_codeblock(response) + response = clean_json(response) response = json.loads(response) response = [q.strip() for q in response["queries"] if q.strip()] if not isinstance(response, list) or not response: diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index e2d461f6..b4db71d9 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -442,9 +442,9 @@ def reciprocal_conversation_to_chatml(message_pair): return [ChatMessage(content=message, role=role) for message, role in zip(message_pair, ["user", "assistant"])] -def remove_json_codeblock(response: str): - """Remove any markdown json codeblock formatting if present. Useful for non schema enforceable models""" - return response.removeprefix("```json").removesuffix("```") +def clean_json(response: str): + """Remove any markdown json codeblock and newline formatting if present. Useful for non schema enforceable models""" + return response.strip().replace("\n", "").removeprefix("```json").removesuffix("```") def defilter_query(query: str): diff --git a/src/khoj/processor/tools/run_code.py b/src/khoj/processor/tools/run_code.py index 27801390..9bdbfc13 100644 --- a/src/khoj/processor/tools/run_code.py +++ b/src/khoj/processor/tools/run_code.py @@ -12,8 +12,8 @@ from khoj.database.models import Agent, KhojUser from khoj.processor.conversation import prompts from khoj.processor.conversation.utils import ( ChatEvent, + clean_json, construct_chat_history, - remove_json_codeblock, ) from khoj.routers.helpers import send_message_to_model_wrapper from khoj.utils.helpers import timer @@ -111,8 +111,7 @@ async def generate_python_code( ) # Validate that the response is a non-empty, JSON-serializable list - response = response.strip() - response = remove_json_codeblock(response) + response = clean_json(response) response = json.loads(response) codes = [code.strip() for code in response["codes"] if code.strip()] diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 3fd0aeb1..1cb322b0 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -90,9 +90,9 @@ from khoj.processor.conversation.openai.gpt import converse, send_message_to_mod from khoj.processor.conversation.utils import ( ChatEvent, ThreadedGenerator, + clean_json, construct_chat_history, generate_chatml_messages_with_context, - remove_json_codeblock, save_to_conversation_log, ) from khoj.processor.speech.text_to_speech import is_eleven_labs_enabled @@ -334,8 +334,7 @@ async def aget_relevant_information_sources( ) try: - response = response.strip() - response = remove_json_codeblock(response) + response = clean_json(response) response = json.loads(response) response = [q.strip() for q in response["source"] if q.strip()] if not isinstance(response, list) or not response or len(response) == 0: @@ -413,8 +412,7 @@ async def aget_relevant_output_modes( ) try: - response = response.strip() - response = remove_json_codeblock(response) + response = clean_json(response) response = json.loads(response) if is_none_or_empty(response): @@ -475,8 +473,7 @@ async def infer_webpage_urls( # Validate that the response is a non-empty, JSON-serializable list of URLs try: - response = response.strip() - response = remove_json_codeblock(response) + response = clean_json(response) urls = json.loads(response) valid_unique_urls = {str(url).strip() for url in urls["links"] if is_valid_url(url)} if is_none_or_empty(valid_unique_urls): @@ -527,8 +524,7 @@ async def generate_online_subqueries( # Validate that the response is a non-empty, JSON-serializable list try: - response = response.strip() - response = remove_json_codeblock(response) + response = clean_json(response) response = json.loads(response) response = [q.strip() for q in response["queries"] if q.strip()] if not isinstance(response, list) or not response or len(response) == 0: @@ -801,8 +797,7 @@ async def generate_excalidraw_diagram_from_description( raw_response = await send_message_to_model_wrapper( query=excalidraw_diagram_generation, user=user, tracer=tracer ) - raw_response = raw_response.strip() - raw_response = remove_json_codeblock(raw_response) + raw_response = clean_json(raw_response) response: Dict[str, str] = json.loads(raw_response) if not response or not isinstance(response, List) or not isinstance(response[0], Dict): # TODO Some additional validation here that it's a valid Excalidraw diagram diff --git a/src/khoj/routers/research.py b/src/khoj/routers/research.py index 83aecc7e..6001bdc5 100644 --- a/src/khoj/routers/research.py +++ b/src/khoj/routers/research.py @@ -11,9 +11,9 @@ from khoj.database.models import Agent, KhojUser from khoj.processor.conversation import prompts from khoj.processor.conversation.utils import ( InformationCollectionIteration, + clean_json, construct_iteration_history, construct_tool_chat_history, - remove_json_codeblock, ) from khoj.processor.tools.online_search import read_webpages, search_online from khoj.processor.tools.run_code import run_code @@ -99,8 +99,7 @@ async def apick_next_tool( ) try: - response = response.strip() - response = remove_json_codeblock(response) + response = clean_json(response) response = json.loads(response) selected_tool = response.get("tool", None) generated_query = response.get("query", None)