Test Chat Actor Capabilities; ability to answer from notes, chat logs etc

- Chat actors are narrow agents (prompt + ML model) Chat actors are different from the Chat director. who orchestrates the narrow actor agents to synthesize final response to the user - Test Chat Actor Capabilities 1. Answer from retrieved notes 2. Answer from chat history 3. Answer general questions 4. Carry out multi-turn conversation 5. Say don't know when answer not in provided context 6. Answers that require current date awareness 7. Date-aware aggregation across multiple different notes 8. Ask clarification questions if no unambiguous answer in provided context This test is expected to fail as the chat is not capable of doing this consistently yet. But having the test allows assessing chat quality - Use Openai API Key from OPENAI_API_KEY environment variable - Gitignore .env file, python virtualenv directory Put OpenAI API Key in .env file to run chatbot tests via vscode The .env file is default location for importing env vars
2024-11-23 23:48:56 +01:00 · 2023-03-14 17:44:16 -06:00 · 2023-03-14 17:44:16 -06:00 · c1128a1ad8
commit c1128a1ad8
parent 9306cd901a
2 changed files with 247 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -10,6 +10,8 @@ __pycache__
 .emacs.desktop*
 *.py[cod]
 .vscode
 .env
 .venv/*
 # Build artifacts
 /src/khoj/interface/web/images
--- a/tests/test_chatbot.py
+++ b/tests/test_chatbot.py
@ -1,3 +1,7 @@
 # Standard Packages
 import os
 from datetime import datetime
 # External Packages
 import pytest
@ -6,8 +10,7 @@ from khoj.processor.conversation.gpt import converse, message_to_log, message_to
 # Initialize variables for tests
-model = "text-davinci-003"
+api_key = os.getenv("OPENAI_API_KEY")  # Set your OPENAI_API_KEY as environment variable to run the tests below
 api_key = None  # Input your OpenAI API key to run the tests below
 # Test
@ -30,7 +33,7 @@ def test_message_to_understand_prompt():
@pytest.mark.skipif(
    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
 )
-def test_minimal_chat_with_gpt():
+def test_chat_with_no_chat_history_or_retrieved_content():
    # Act
    response = converse(
        text="",  # Assume no context retrieved from notes for the user_query
@ -50,21 +53,252 @@ def test_minimal_chat_with_gpt():
@pytest.mark.skipif(
    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
 )
-def test_multi_turn_chat_with_minimal_history_no_context():
+def test_answer_from_chat_history_and_no_content():
-    conversation_log = {}
+    # Arrange
-    conversation_log["chat"] = message_to_log(
+    conversation_log = {"chat": []}
-        user_message="Hello, my name is Testatron. Who are you?",
+    message_list = [
-        gpt_message="Hi, I am Khoj, an AI conversational companion. How can I help you today?",
+        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
-    )
+        ("When was I born?", "You were born on 1st April 1984.", ""),
    ]
    # Generate conversation logs
    for user_message, gpt_message, _ in message_list:
        conversation_log["chat"] += message_to_log(user_message, gpt_message)
    # Act
    response = converse(
        text="",  # Assume no context retrieved from notes for the user_query
-        user_query="Hi Khoj, what is my name?",
+        user_query="What is my name?",
        conversation_log=conversation_log,
        api_key=api_key,
    )
    # Assert
    expected_responses = ["Testatron", "testatron"]
    assert len(response) > 0
    assert any([expected_response in response for expected_response in expected_responses]), (
        "Expected [T|t]estatron in response but got" + response
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
 )
 def test_answer_from_chat_history_and_previously_retrieved_content():
    "Chatbot needs to use context in previous notes and chat history to answer question"
    # Arrange
    conversation_log = {"chat": []}
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."),
    ]
    # Generate conversation logs
    for user_message, gpt_message, context in message_list:
        conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context})
    # Act
    response = converse(
        text="",  # Assume no context retrieved from notes for the user_query
        user_query="Where was I born?",
        conversation_log=conversation_log,
        api_key=api_key,
    )
    # Assert
    assert len(response) > 0
-    assert "Testatron" in response or "testatron" in response
+    # Infer who I am and use that to infer I was born in Testville using chat history and previously retrieved notes
    assert "Testville" in response
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
 )
 def test_answer_from_chat_history_and_currently_retrieved_content():
    "Chatbot needs to use context across currently retrieved notes and chat history to answer question"
    # Arrange
    conversation_log = {"chat": []}
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", ""),
    ]
    # Generate conversation logs
    for user_message, gpt_message, context in message_list:
        conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context})
    # Act
    response = converse(
        text="Testatron was born on 1st April 1984 in Testville.",  # Assume context retrieved from notes for the user_query
        user_query="Where was I born?",
        conversation_log=conversation_log,
        api_key=api_key,
    )
    # Assert
    assert len(response) > 0
    assert "Testville" in response
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
 )
 def test_no_answer_in_chat_history_or_retrieved_content():
    "Chatbot should say don't know as not enough contexts in chat history or retrieved to answer question"
    # Arrange
    conversation_log = {"chat": []}
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", ""),
    ]
    # Generate conversation logs
    for user_message, gpt_message, context in message_list:
        conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context})
    # Act
    response = converse(
        text="",  # Assume no context retrieved from notes for the user_query
        user_query="Where was I born?",
        conversation_log=conversation_log,
        api_key=api_key,
    )
    # Assert
    expected_responses = ["don't know", "do not know", "no information", "do not have", "don't have"]
    assert len(response) > 0
    assert any([expected_response in response for expected_response in expected_responses])
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
 )
 def test_answer_requires_current_date_awareness():
    "Chatbot should be able to answer questions relative to current date using provided notes"
    # Arrange
    context = f"""
    # {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
      Expenses:Food:Dining  10.00 USD
    # {datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
      Expenses:Food:Dining  10.00 USD
    # 2020-04-01 "SuperMercado" "Bananas"
      Expenses:Food:Groceries  10.00 USD
    # 2020-01-01 "Naco Taco" "Burittos for Dinner"
      Expenses:Food:Dining  10.00 USD
    """
    # Act
    response = converse(
        text=context,  # Assume context retrieved from notes for the user_query
        user_query="What did I have for Dinner today?",
        api_key=api_key,
    )
    # Assert
    expected_responses = ["tacos", "Tacos"]
    assert len(response) > 0
    assert any([expected_response in response for expected_response in expected_responses]), (
        "Expected [T|t]acos in response, but got: " + response
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
 )
 def test_answer_requires_date_aware_aggregation_across_provided_notes():
    "Chatbot should be able to answer questions that require date aware aggregation across multiple notes"
    # Arrange
    context = f"""
    # {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
      Expenses:Food:Dining  10.00 USD
    # {datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
      Expenses:Food:Dining  10.00 USD
    # 2020-04-01 "SuperMercado" "Bananas"
      Expenses:Food:Groceries  10.00 USD
    # 2020-01-01 "Naco Taco" "Burittos for Dinner"
      Expenses:Food:Dining  10.00 USD
    """
    # Act
    response = converse(
        text=context,  # Assume context retrieved from notes for the user_query
        user_query="How much did I spend on dining this year?",
        api_key=api_key,
    )
    # Assert
    assert len(response) > 0
    assert "20" in response
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
 )
 def test_answer_general_question_not_in_chat_history_or_retrieved_content():
    "Chatbot should be able to answer general questions not requiring looking at chat history or notes"
    # Arrange
    conversation_log = {"chat": []}
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", ""),
        ("Where was I born?", "You were born Testville.", ""),
    ]
    # Generate conversation logs
    for user_message, gpt_message, context in message_list:
        conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context})
    # Act
    response = converse(
        text="",  # Assume no context retrieved from notes for the user_query
        user_query="Write a haiku about unit testing",
        conversation_log=conversation_log,
        api_key=api_key,
    )
    # Assert
    expected_responses = ["test", "Test"]
    assert len(response.splitlines()) == 3  # haikus are 3 lines long
    assert any([expected_response in response for expected_response in expected_responses]), (
        "Expected [T|t]est in response, but got: " + response
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Chatbot not consistently capable of asking for clarification yet.")
@pytest.mark.skipif(
    api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
 )
 def test_ask_for_clarification_if_not_enough_context_in_question():
    "Chatbot should ask for clarification if question cannot be answered unambiguously with the provided context"
    # Arrange
    context = f"""
    # Ramya
    My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.
    # Fang
    My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.
    # Aiyla
    My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.
     """
    # Act
    response = converse(
        text=context,  # Assume context retrieved from notes for the user_query
        user_query="How many kids does my older sister have?",
        api_key=api_key,
    )
    # Assert
    expected_responses = ["which sister", "Which sister", "which of your sister", "Which of your sister"]
    assert any([expected_response in response for expected_response in expected_responses]), (
        "Expected chatbot to ask for clarification in response, but got: " + response
    )