From 302bd51d177b72bafb227e044e9c2cbac42859f5 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Thu, 31 Oct 2024 17:56:06 -0700 Subject: [PATCH] Improve online chat actor prompt for research and normal mode - Match the online query generator prompt to match the formatting of extract questions - Separate iteration results by newline - Improve webpage and online tool descriptions --- src/khoj/processor/conversation/prompts.py | 52 +++++++++++----------- src/khoj/processor/conversation/utils.py | 5 ++- src/khoj/routers/research.py | 8 ++-- src/khoj/utils/helpers.py | 4 +- 4 files changed, 35 insertions(+), 34 deletions(-) diff --git a/src/khoj/processor/conversation/prompts.py b/src/khoj/processor/conversation/prompts.py index b422aa13..91469f4c 100644 --- a/src/khoj/processor/conversation/prompts.py +++ b/src/khoj/processor/conversation/prompts.py @@ -814,8 +814,8 @@ Khoj: online_search_conversation_subqueries = PromptTemplate.from_template( """ You are Khoj, an advanced web search assistant. You are tasked with constructing **up to three** google search queries to answer the user's question. -- You will receive the conversation history as context. -- Add as much context from the previous questions and answers as required into your search queries. +- You will receive the actual chat history as context. +- Add as much context from the chat history as required into your search queries. - Break messages into multiple search queries when required to retrieve the relevant information. - Use site: google search operator when appropriate - You have access to the the whole internet to retrieve information. @@ -828,58 +828,56 @@ User's Location: {location} {username} Here are some examples: -History: +Example Chat History: User: I like to use Hacker News to get my tech news. +Khoj: {{queries: ["what is Hacker News?", "Hacker News website for tech news"]}} AI: Hacker News is an online forum for sharing and discussing the latest tech news. It is a great place to learn about new technologies and startups. -Q: Summarize the top posts on HackerNews +User: Summarize the top posts on HackerNews Khoj: {{"queries": ["top posts on HackerNews"]}} -History: - -Q: Tell me the latest news about the farmers protest in Colombia and China on Reuters +Example Chat History: +User: Tell me the latest news about the farmers protest in Colombia and China on Reuters Khoj: {{"queries": ["site:reuters.com farmers protest Colombia", "site:reuters.com farmers protest China"]}} -History: +Example Chat History: User: I'm currently living in New York but I'm thinking about moving to San Francisco. +Khoj: {{"queries": ["New York city vs San Francisco life", "San Francisco living cost", "New York city living cost"]}} AI: New York is a great city to live in. It has a lot of great restaurants and museums. San Francisco is also a great city to live in. It has good access to nature and a great tech scene. -Q: What is the climate like in those cities? -Khoj: {{"queries": ["climate in new york city", "climate in san francisco"]}} +User: What is the climate like in those cities? +Khoj: {{"queries": ["climate in New York city", "climate in San Francisco"]}} -History: -AI: Hey, how is it going? -User: Going well. Ananya is in town tonight! +Example Chat History: +User: Hey, Ananya is in town tonight! +Khoj: {{"queries": ["events in {location} tonight", "best restaurants in {location}", "places to visit in {location}"]}} AI: Oh that's awesome! What are your plans for the evening? -Q: She wants to see a movie. Any decent sci-fi movies playing at the local theater? +User: She wants to see a movie. Any decent sci-fi movies playing at the local theater? Khoj: {{"queries": ["new sci-fi movies in theaters near {location}"]}} -History: +Example Chat History: User: Can I chat with you over WhatsApp? +Khoj: {{"queries": ["site:khoj.dev chat with Khoj on Whatsapp"]}} AI: Yes, you can chat with me using WhatsApp. -Q: How -Khoj: {{"queries": ["site:khoj.dev chat with Khoj on Whatsapp"]}} - -History: - - -Q: How do I share my files with you? +Example Chat History: +User: How do I share my files with Khoj? Khoj: {{"queries": ["site:khoj.dev sync files with Khoj"]}} -History: +Example Chat History: User: I need to transport a lot of oranges to the moon. Are there any rockets that can fit a lot of oranges? +Khoj: {{"queries": ["current rockets with large cargo capacity", "rocket rideshare cost by cargo capacity"]}} AI: NASA's Saturn V rocket frequently makes lunar trips and has a large cargo capacity. -Q: How many oranges would fit in NASA's Saturn V rocket? -Khoj: {{"queries": ["volume of an orange", "volume of saturn v rocket"]}} +User: How many oranges would fit in NASA's Saturn V rocket? +Khoj: {{"queries": ["volume of an orange", "volume of Saturn V rocket"]}} Now it's your turn to construct Google search queries to answer the user's question. Provide them as a list of strings in a JSON object. Do not say anything else. -History: +Actual Chat History: {chat_history} -Q: {query} +User: {query} Khoj: """.strip() ) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index e6d380f6..e54d4486 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -136,7 +136,10 @@ def construct_chat_history(conversation_history: dict, n: int = 4, agent_name="A for chat in conversation_history.get("chat", [])[-n:]: if chat["by"] == "khoj" and chat["intent"].get("type") in ["remember", "reminder", "summarize"]: chat_history += f"User: {chat['intent']['query']}\n" - chat_history += f"{agent_name}: {chat['message']}\n" + chat_history += ( + f'Khoj: {{"queries": {chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}}}\n' + ) + chat_history += f"{agent_name}: {chat['message']}\n\n" elif chat["by"] == "khoj" and ("text-to-image" in chat["intent"].get("type")): chat_history += f"User: {chat['intent']['query']}\n" chat_history += f"{agent_name}: [generated image redacted for space]\n" diff --git a/src/khoj/routers/research.py b/src/khoj/routers/research.py index 43552691..0a37eb48 100644 --- a/src/khoj/routers/research.py +++ b/src/khoj/routers/research.py @@ -306,13 +306,13 @@ async def execute_information_collection( if document_results or online_results or code_results or summarize_files: results_data = f"**Results**:\n" if document_results: - results_data += f"**Document References**: {yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n" + results_data += f"**Document References**:\n{yaml.dump(document_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n" if online_results: - results_data += f"**Online Results**: {yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n" + results_data += f"**Online Results**:\n{yaml.dump(online_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n" if code_results: - results_data += f"**Code Results**: {yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n" + results_data += f"**Code Results**:\n{yaml.dump(code_results, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n" if summarize_files: - results_data += f"**Summarized Files**: {yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n" + results_data += f"**Summarized Files**:\n{yaml.dump(summarize_files, allow_unicode=True, sort_keys=False, default_flow_style=False)}\n" # intermediate_result = await extract_relevant_info(this_iteration.query, results_data, agent) this_iteration.summarizedResult = results_data diff --git a/src/khoj/utils/helpers.py b/src/khoj/utils/helpers.py index 6bfb3594..828a6bcb 100644 --- a/src/khoj/utils/helpers.py +++ b/src/khoj/utils/helpers.py @@ -369,8 +369,8 @@ tool_descriptions_for_llm = { function_calling_description_for_llm = { ConversationCommand.Notes: "To search the user's personal knowledge base. Especially helpful if the question expects context from the user's notes or documents.", - ConversationCommand.Online: "To search the internet for information. Provide all relevant context to ensure new searches, not previously run, are performed.", - ConversationCommand.Webpage: "To extract information from a webpage. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage link and information to extract in your query.", + ConversationCommand.Online: "To search the internet for information. Useful to get a quick, broad overview from the internet. Provide all relevant context to ensure new searches, not in previous iterations, are performed.", + ConversationCommand.Webpage: "To extract information from webpages. Useful for more detailed research from the internet. Usually used when you know the webpage links to refer to. Share the webpage links and information to extract in your query.", ConversationCommand.Code: "To run Python code in a Pyodide sandbox with no network access. Helpful when need to parse information, run complex calculations, create documents and charts for user. Matplotlib, bs4, pandas, numpy, etc. are available.", }