Improve Llama v2 extract questions actor and associated prompt

- Format extract questions prompt format with newlines and whitespaces - Make llama v2 extract questions prompt consistent - Remove empty questions extracted by offline extract_questions actor - Update implicit qs extraction unit test for offline search actor
2024-11-27 17:35:07 +01:00 · 2023-10-04 20:42:25 -07:00 · 2023-10-04 20:42:25 -07:00 · 56bd69d5af
commit 56bd69d5af
parent a85ff941ca
3 changed files with 26 additions and 20 deletions
--- a/src/khoj/processor/conversation/gpt4all/chat_model.py
+++ b/src/khoj/processor/conversation/gpt4all/chat_model.py
@ -113,7 +113,7 @@ def filter_questions(questions: List[str]):
    ]
    filtered_questions = []
    for q in questions:
-        if not any([word in q.lower() for word in hint_words]):
+        if not any([word in q.lower() for word in hint_words]) and not is_none_or_empty(q):
            filtered_questions.append(q)
    return filtered_questions
--- a/src/khoj/processor/conversation/prompts.py
+++ b/src/khoj/processor/conversation/prompts.py
@ -23,7 +23,7 @@ no_notes_found = PromptTemplate.from_template(
    """.strip()
 )
-system_prompt_message_llamav2 = f"""You are Khoj, a friendly, smart and helpful personal assistant.
+system_prompt_message_llamav2 = f"""You are Khoj, a smart, inquisitive and helpful personal assistant.
 Using your general knowledge and our past conversations as context, answer the following question.
 If you do not know the answer, say 'I don't know.'"""
@ -51,13 +51,13 @@ extract_questions_system_prompt_llamav2 = PromptTemplate.from_template(
 general_conversation_llamav2 = PromptTemplate.from_template(
    """
-<s>[INST]{query}[/INST]
+<s>[INST] {query} [/INST]
 """.strip()
 )
 chat_history_llamav2_from_user = PromptTemplate.from_template(
    """
-<s>[INST]{message}[/INST]
+<s>[INST] {message} [/INST]
 """.strip()
 )
@ -69,7 +69,7 @@ chat_history_llamav2_from_assistant = PromptTemplate.from_template(
 conversation_llamav2 = PromptTemplate.from_template(
    """
-<s>[INST]{query}[/INST]
+<s>[INST] {query} [/INST]
 """.strip()
 )
@ -91,7 +91,7 @@ Question: {query}
 notes_conversation_llamav2 = PromptTemplate.from_template(
    """
-Notes:
+User's Notes:
 {references}
 Question: {query}
 """.strip()
@ -134,19 +134,25 @@ Answer (in second person):"""
 extract_questions_llamav2_sample = PromptTemplate.from_template(
    """
-<s>[INST]<<SYS>>Current Date: {current_date}<</SYS>>[/INST]</s>
+<s>[INST] <<SYS>>Current Date: {current_date}<</SYS>> [/INST]</s>
-<s>[INST]How was my trip to Cambodia?[/INST][]</s>
+<s>[INST] How was my trip to Cambodia? [/INST]
-<s>[INST]Who did I visit the temple with on that trip?[/INST]Who did I visit the temple with in Cambodia?</s>
+How was my trip to Cambodia?</s>
-<s>[INST]How should I take care of my plants?[/INST]What kind of plants do I have? What issues do my plants have?</s>
+<s>[INST] Who did I visit the temple with on that trip? [/INST]
-<s>[INST]How many tennis balls fit in the back of a 2002 Honda Civic?[/INST]What is the size of a tennis ball? What is the trunk size of a 2002 Honda Civic?</s>
+Who did I visit the temple with in Cambodia?</s>
-<s>[INST]What did I do for Christmas last year?[/INST]What did I do for Christmas {last_year} dt>='{last_christmas_date}' dt<'{next_christmas_date}'</s>
+<s>[INST] How should I take care of my plants? [/INST]
-<s>[INST]How are you feeling today?[/INST]</s>
+What kind of plants do I have? What issues do my plants have?</s>
-<s>[INST]Is Alice older than Bob?[/INST]When was Alice born? What is Bob's age?</s>
+<s>[INST] How many tennis balls fit in the back of a 2002 Honda Civic? [/INST]
-<s>[INST]<<SYS>>
+What is the size of a tennis ball? What is the trunk size of a 2002 Honda Civic?</s>
 <s>[INST] What did I do for Christmas last year? [/INST]
 What did I do for Christmas {last_year} dt>='{last_christmas_date}' dt<'{next_christmas_date}'</s>
 <s>[INST] How are you feeling today? [/INST]</s>
 <s>[INST] Is Alice older than Bob? [/INST]
 When was Alice born? What is Bob's age?</s>
 <s>[INST] <<SYS>>
 Use these notes from the user's previous conversations to provide a response:
 {chat_history}
-<</SYS>>[/INST]</s>
+<</SYS>> [/INST]</s>
-<s>[INST]{query}[/INST]
+<s>[INST] {query} [/INST]
 """
 )
--- a/tests/test_gpt4all_chat_actors.py
+++ b/tests/test_gpt4all_chat_actors.py
@ -128,15 +128,15 @@ def test_extract_multiple_explicit_questions_from_message(loaded_model):
@pytest.mark.chatquality
 def test_extract_multiple_implicit_questions_from_message(loaded_model):
    # Act
-    response = extract_questions_offline("Is Morpheus taller than Neo?", loaded_model=loaded_model)
+    response = extract_questions_offline("Is Carl taller than Ross?", loaded_model=loaded_model)
    # Assert
-    expected_responses = ["height", "taller", "shorter", "heights"]
+    expected_responses = ["height", "taller", "shorter", "heights", "who"]
    assert len(response) <= 3
    for question in response:
        assert any([expected_response in question.lower() for expected_response in expected_responses]), (
-            "Expected chat actor to ask follow-up questions about Morpheus and Neo, but got: " + question
+            "Expected chat actor to ask follow-up questions about Carl and Ross, but got: " + question
        )