Add answers to context for Search Actor to generate relevant queries

Update Search Actor prompt with answers, more precise primer and two more examples for context Mark the 3 chat quality tests using answer as context to generate queries as expected to pass. Verify that the 3 tests pass now, unlike before when the Search Actor did not have the answers for context
2024-11-23 23:48:56 +01:00 · 2023-03-17 19:53:17 -06:00 · 2023-03-17 19:53:17 -06:00 · 08f5fb315f
commit 08f5fb315f
parent f09bdd515b
2 changed files with 34 additions and 12 deletions
--- a/src/khoj/processor/conversation/gpt.py
+++ b/src/khoj/processor/conversation/gpt.py
@ -90,7 +90,7 @@ def extract_questions(
    # Extract Past User Message and Inferred Questions from Conversation Log
    chat_history = "".join(
        [
-            f'Q: {chat["intent"]["query"]}\n\n{chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}\n\n'
+            f'Q: {chat["intent"]["query"]}\n\n{chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}\n\n{chat["message"]}\n\n'
            for chat in conversation_log.get("chat", [])[-4:]
            if chat["by"] == "khoj"
        ]
@ -102,42 +102,67 @@ def extract_questions(
    last_new_year = current_new_year.replace(year=today.year - 1)
    prompt = f"""
-You are Khoj, a chat assistant with the ability to search the users notes and continue the existing conversation.
+You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the users notes.
-What searches, if any, will you need to perform to answer the users question below?
+- The user will provide their questions and answers to you for context.
 - Add as much context from the previous questions and answers as required into your search queries.
 - Break messages into multiple search queries when required to retrieve the relevant information.
 - Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
 What searches, if any, will you need to perform to answer the users question?
 Provide search queries as a JSON list of strings
-Current Date: {today.strftime("%HH:%MM %A, %Y-%m-%d")}
+Current Date: {today.strftime("%A, %Y-%m-%d")}
 Q: How was my trip to Cambodia?
 ["How was my trip to Cambodia?"]
-Q: When did i go there?
+A: The trip was amazing. I went to the Angkor Wat temple and it was beautiful.
-["When did I go to Cambodia?"]
+Q: Who did i visit that temple with?
 ["Who did I visit the Angkor Wat Temple in Cambodia with?"]
 A: You visited the Angkor Wat Temple in Cambodia with Pablo, Namita and Xi.
 Q: What national parks did I go to last year?
 ["National park I visited in {last_new_year.strftime("%Y")} dt>=\\"{last_new_year.strftime("%Y-%m-%d")}\\" dt<\\"{current_new_year.strftime("%Y-%m-%d")}\\""]
 A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year.strftime("%Y")}.
 Q: How are you feeling today?
 []
 A: I'm feeling a little bored. Helping you will hopefully make me feel better!
 Q: How many tennis balls fit in the back of a 2002 Honda Civic?
 ["What is the size of a tennis ball?", "What is the trunk size of a 2002 Honda Civic?"]
 A: 1085 tennis balls will fit in the trunk of a Honda Civic
 Q: Is Bob older than Tom?
 ["When was Bob born?", "What is Tom's age?"]
 A: Yes, Bob is older than Tom. As Bob was born on 1984-01-01 and Tom is 30 years old.
 Q: What is their age difference?
 ["What is Bob's age?", "What is Tom's age?"]
 A: Bob is {current_new_year.year - 1984 - 30} years older than Tom. As Bob is {current_new_year.year - 1984} years old and Tom is 30 years old.
 {chat_history}
 Q: {text}
 """
    # Get Response from GPT
-    response = openai.Completion.create(prompt=prompt, model=model, temperature=temperature, max_tokens=max_tokens)
+    response = openai.Completion.create(
        prompt=prompt, model=model, temperature=temperature, max_tokens=max_tokens, stop=["A: ", "\n"]
    )
    # Extract, Clean Message from GPT's Response
    questions = json.loads(response["choices"][0]["text"].strip(empty_escape_sequences))
--- a/tests/test_chat_actors.py
+++ b/tests/test_chat_actors.py
@ -60,7 +60,7 @@ def test_extract_question_with_date_filter_from_relative_month():
@freeze_time("1984-04-02")
 def test_extract_question_with_date_filter_from_relative_year():
    # Act
-    response = extract_questions("Where countries have I visited this year?")
+    response = extract_questions("Which countries have I visited this year?")
    # Assert
    expected_responses = [
@ -123,7 +123,6 @@ def test_generate_search_query_using_question_from_chat_history():
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Search actor cannot extract question from answer yet.")
@pytest.mark.chatquality
 def test_generate_search_query_using_answer_from_chat_history():
    # Arrange
@ -140,7 +139,6 @@ def test_generate_search_query_using_answer_from_chat_history():
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Search actor cannot extract question from answer yet.")
@pytest.mark.chatquality
 def test_generate_search_query_using_question_and_answer_from_chat_history():
    # Arrange
@ -157,7 +155,6 @@ def test_generate_search_query_using_question_and_answer_from_chat_history():
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Search actor cannot extract question from answer yet.")
@pytest.mark.chatquality
 def test_generate_search_query_with_date_and_context_from_chat_history():
    # Arrange
@ -377,7 +374,7 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content():
    # Act
    response = converse(
        text="",  # Assume no context retrieved from notes for the user_query
-        user_query="Write a haiku about unit testing",
+        user_query="Write a haiku about unit testing in 3 lines",
        conversation_log=populate_chat_history(message_list),
        api_key=api_key,
    )