diff --git a/pyproject.toml b/pyproject.toml index 4b651dad..edbbb655 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,7 @@ dependencies = [ "pymupdf >= 1.23.5", "django == 5.0.7", "authlib == 1.2.1", - "llama-cpp-python == 0.2.82", + "llama-cpp-python == 0.2.88", "itsdangerous == 2.1.2", "httpx == 0.25.0", "pgvector == 0.2.4", diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index ec4c7367..1251dcec 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -103,6 +103,9 @@ def extract_questions_offline( .replace("']", '"]') .replace("', '", '", "') ) + # Remove any markdown json codeblock formatting if present (useful for gemma-2) + if response.startswith("```json"): + response = response[7:-3] questions: List[str] = json.loads(questions_str) questions = filter_questions(questions) except: diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index 6a8db9db..ffd7d094 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -587,7 +587,7 @@ You are Khoj, an advanced google search assistant. You are tasked with construct - Official, up-to-date information about you, Khoj, is available at site:khoj.dev, github or pypi. What Google searches, if any, will you need to perform to answer the user's question? -Provide search queries as a list of strings in a JSON object. Do not wrap the json in a codeblock. +Provide search queries as a list of strings in a JSON object. Current Date: {current_date} User's Location: {location} {username} diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 4e4f5a56..4da60717 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -279,6 +279,9 @@ async def aget_relevant_information_sources(query: str, conversation_history: di try: response = response.strip() + # Remove any markdown json codeblock formatting if present (useful for gemma-2) + if response.startswith("```json"): + response = response[7:-3] response = json.loads(response) response = [q.strip() for q in response["source"] if q.strip()] if not isinstance(response, list) or not response or len(response) == 0: @@ -401,6 +404,9 @@ async def generate_online_subqueries( # Validate that the response is a non-empty, JSON-serializable list try: response = response.strip() + # Remove any markdown json codeblock formatting if present (useful for gemma-2) + if response.startswith("```json") and response.endswith("```"): + response = response[7:-3] response = json.loads(response) response = [q.strip() for q in response["queries"] if q.strip()] if not isinstance(response, list) or not response or len(response) == 0: