Add answers to context for Search Actor to generate relevant queries

Update Search Actor prompt with answers, more precise primer and two more examples for context Mark the 3 chat quality tests using answer as context to generate queries as expected to pass. Verify that the 3 tests pass now, unlike before when the Search Actor did not have the answers for context
2024-11-23 23:48:56 +01:00 · 2023-03-17 19:53:17 -06:00 · 2023-03-17 19:53:17 -06:00 · 08f5fb315f
commit 08f5fb315f
parent f09bdd515b
2 changed files with 34 additions and 12 deletions
--- a/src/khoj/processor/conversation/gpt.py
+++ b/src/khoj/processor/conversation/gpt.py
@ -90,7 +90,7 @@ def extract_questions(
    # Extract Past User Message and Inferred Questions from Conversation Log
    chat_history = "".join(
        [
-            f'Q: {chat["intent"]["query"]}\n\n{chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}\n\n'
+            f'Q: {chat["intent"]["query"]}\n\n{chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}\n\n{chat["message"]}\n\n'
            for chat in conversation_log.get("chat", [])[-4:]
            if chat["by"] == "khoj"
        ]
@ -102,42 +102,67 @@ def extract_questions(
    last_new_year = current_new_year.replace(year=today.year - 1)

    prompt = f"""
-You are Khoj, a chat assistant with the ability to search the users notes and continue the existing conversation.
-What searches, if any, will you need to perform to answer the users question below?
+You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the users notes.
+- The user will provide their questions and answers to you for context.
+- Add as much context from the previous questions and answers as required into your search queries.
+- Break messages into multiple search queries when required to retrieve the relevant information.
+- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
+
+What searches, if any, will you need to perform to answer the users question?
 Provide search queries as a JSON list of strings
-Current Date: {today.strftime("%HH:%MM %A, %Y-%m-%d")}
+Current Date: {today.strftime("%A, %Y-%m-%d")}

 Q: How was my trip to Cambodia?

 ["How was my trip to Cambodia?"]

-Q: When did i go there?
+A: The trip was amazing. I went to the Angkor Wat temple and it was beautiful.

-["When did I go to Cambodia?"]
+Q: Who did i visit that temple with?
+
+["Who did I visit the Angkor Wat Temple in Cambodia with?"]
+
+A: You visited the Angkor Wat Temple in Cambodia with Pablo, Namita and Xi.

 Q: What national parks did I go to last year?

 ["National park I visited in {last_new_year.strftime("%Y")} dt>=\\"{last_new_year.strftime("%Y-%m-%d")}\\" dt<\\"{current_new_year.strftime("%Y-%m-%d")}\\""]

+A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year.strftime("%Y")}.
+
 Q: How are you feeling today?

 []

+A: I'm feeling a little bored. Helping you will hopefully make me feel better!
+
+Q: How many tennis balls fit in the back of a 2002 Honda Civic?
+
+["What is the size of a tennis ball?", "What is the trunk size of a 2002 Honda Civic?"]
+
+A: 1085 tennis balls will fit in the trunk of a Honda Civic
+
 Q: Is Bob older than Tom?

 ["When was Bob born?", "What is Tom's age?"]

+A: Yes, Bob is older than Tom. As Bob was born on 1984-01-01 and Tom is 30 years old.
+
 Q: What is their age difference?

 ["What is Bob's age?", "What is Tom's age?"]

+A: Bob is {current_new_year.year - 1984 - 30} years older than Tom. As Bob is {current_new_year.year - 1984} years old and Tom is 30 years old.
+
 {chat_history}
 Q: {text}

 """

    # Get Response from GPT
-    response = openai.Completion.create(prompt=prompt, model=model, temperature=temperature, max_tokens=max_tokens)
+    response = openai.Completion.create(
+        prompt=prompt, model=model, temperature=temperature, max_tokens=max_tokens, stop=["A: ", "\n"]
+    )

    # Extract, Clean Message from GPT's Response
    questions = json.loads(response["choices"][0]["text"].strip(empty_escape_sequences))
--- a/tests/test_chat_actors.py
+++ b/tests/test_chat_actors.py
@ -60,7 +60,7 @@ def test_extract_question_with_date_filter_from_relative_month():
@freeze_time("1984-04-02")
 def test_extract_question_with_date_filter_from_relative_year():
    # Act
-    response = extract_questions("Where countries have I visited this year?")
+    response = extract_questions("Which countries have I visited this year?")

    # Assert
    expected_responses = [
@ -123,7 +123,6 @@ def test_generate_search_query_using_question_from_chat_history():


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(reason="Search actor cannot extract question from answer yet.")
@pytest.mark.chatquality
 def test_generate_search_query_using_answer_from_chat_history():
    # Arrange
@ -140,7 +139,6 @@ def test_generate_search_query_using_answer_from_chat_history():


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(reason="Search actor cannot extract question from answer yet.")
@pytest.mark.chatquality
 def test_generate_search_query_using_question_and_answer_from_chat_history():
    # Arrange
@ -157,7 +155,6 @@ def test_generate_search_query_using_question_and_answer_from_chat_history():


 # ----------------------------------------------------------------------------------------------------
-@pytest.mark.xfail(reason="Search actor cannot extract question from answer yet.")
@pytest.mark.chatquality
 def test_generate_search_query_with_date_and_context_from_chat_history():
    # Arrange
@ -377,7 +374,7 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content():
    # Act
    response = converse(
        text="",  # Assume no context retrieved from notes for the user_query
-        user_query="Write a haiku about unit testing",
+        user_query="Write a haiku about unit testing in 3 lines",
        conversation_log=populate_chat_history(message_list),
        api_key=api_key,
    )