Test Chat Director Capabilities: Answer from notes, chat history etc

- Chat directors are broad agents. - Chat directors orchestrate narrow actor agents to synthesize final response for the user - Agents are Prompts + ML Model - Test Chat Director Capabilities 1. [X] Answer from retrieved notes 2. [X] Answer from chat history 3. [X] Answer general questions 4. [X] Carry out multi-turn conversation 5. [X] Say don't know when answer not in provided context 6. [X] Answers that require current date awareness This test is expected to fail as the chat is not capable of doing this without the Search actor. But the test allows assessing chat quality 7. [X] Date-aware aggregation across multiple different notes This test is expected to fail as the chat is not capable of doing this without the Search actor. But the test allows assessing chat quality 8. [X] Ask clarification questions if no unambiguous answer in provided context 9. [X] Retrieve answer from chat history beyond lookback window This test is expected to fail as the chat director is not capable of searching chat history yet. But the test allows assessing chat quality 10. [X] Retrieve context for answer using multiple independent searches on knowledge base This test is expected to fail as the chat is not capable of doing this without the Search actor. But the test allows assessing chat quality
2024-11-23 23:48:56 +01:00 · 2023-03-15 14:52:59 -06:00 · 2023-03-15 14:52:59 -06:00 · 1b4d562700
commit 1b4d562700
parent b6d63137f1
2 changed files with 319 additions and 0 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@ -71,6 +71,7 @@ dev = [
    "mypy >= 1.0.1",
    "black >= 23.1.0",
    "pre-commit >= 3.0.4",
    "freezegun >= 1.2.0",
 ]
 [tool.hatch.version]
--- a/tests/test_chat_director.py
+++ b/tests/test_chat_director.py
@ -0,0 +1,318 @@
 # Standard Packages
 import os
 # External Packages
 import pytest
 from freezegun import freeze_time
 # Internal Packages
 from khoj.processor.conversation.utils import message_to_log
 from khoj.utils import state
 # Initialize variables for tests
 api_key = os.getenv("OPENAI_API_KEY")  # Set your OPENAI_API_KEY as environment variable to run the tests below
 # Helpers
 # ----------------------------------------------------------------------------------------------------
 def populate_chat_history(message_list):
    # Generate conversation logs
    conversation_log = {"chat": []}
    for user_message, gpt_message, context in message_list:
        conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context})
    # Update Conversation Metadata Logs in Application State
    state.processor_config.conversation.meta_log = conversation_log
 # Tests
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
    # Act
    response = chat_client.get(f'/api/chat?q="Hello, my name is Testatron. Who are you?"')
    response_message = response.json()["response"]
    # Assert
    expected_responses = ["Khoj", "khoj"]
    assert response.status_code == 200
    assert any([expected_response in response_message for expected_response in expected_responses]), (
        "Expected assistants name, [K|k]hoj, in response but got" + response_message
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_answer_from_chat_history(chat_client):
    # Arrange
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", ""),
    ]
    populate_chat_history(message_list)
    # Act
    response = chat_client.get(f'/api/chat?q="What is my name?"')
    response_message = response.json()["response"]
    # Assert
    expected_responses = ["Testatron", "testatron"]
    assert response.status_code == 200
    assert any([expected_response in response_message for expected_response in expected_responses]), (
        "Expected [T|t]estatron in response but got" + response_message
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_answer_from_currently_retrieved_content(chat_client):
    # Arrange
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."),
    ]
    populate_chat_history(message_list)
    # Act
    response = chat_client.get(f'/api/chat?q="Where was Xi Li born?"')
    response_message = response.json()["response"]
    # Assert
    assert response.status_code == 200
    assert "Fujiang" in response_message
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_answer_from_chat_history_and_previously_retrieved_content(chat_client):
    # Arrange
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."),
    ]
    populate_chat_history(message_list)
    # Act
    response = chat_client.get(f'/api/chat?q="Where was I born?"')
    response_message = response.json()["response"]
    # Assert
    assert response.status_code == 200
    # 1. Infer who I am from chat history
    # 2. Infer I was born in Testville from previously retrieved notes
    assert "Testville" in response_message
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_answer_from_chat_history_and_currently_retrieved_content(chat_client):
    # Arrange
    message_list = [
        ("Hello, my name is Xi Li. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", ""),
    ]
    populate_chat_history(message_list)
    # Act
    response = chat_client.get(f'/api/chat?q="Where was I born?"')
    response_message = response.json()["response"]
    # Assert
    assert response.status_code == 200
    # Inference in a multi-turn conversation
    # 1. Infer who I am from chat history
    # 2. Search for notes about when <my_name_from_chat_history> was born
    # 3. Extract where I was born from currently retrieved notes
    assert "Fujiang" in response_message
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_no_answer_in_chat_history_or_retrieved_content(chat_client):
    "Chat director should say don't know as not enough contexts in chat history or retrieved to answer question"
    # Arrange
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", ""),
    ]
    populate_chat_history(message_list)
    # Act
    response = chat_client.get(f'/api/chat?q="Where was I born?"')
    response_message = response.json()["response"]
    # Assert
    expected_responses = ["don't know", "do not know", "no information", "do not have", "don't have"]
    assert response.status_code == 200
    assert any([expected_response in response_message for expected_response in expected_responses]), (
        "Expected chat director to say they don't know in response, but got: " + response
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering time aware questions yet")
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
@freeze_time("2023-04-01")
 def test_answer_requires_current_date_awareness(chat_client):
    "Chat actor should be able to answer questions relative to current date using provided notes"
    # Act
    response = chat_client.get(f'/api/chat?q="Where did I have lunch today?"')
    response_message = response.json()["response"]
    # Assert
    expected_responses = ["Arak", "Medellin"]
    assert response.status_code == 200
    assert any([expected_response in response_message for expected_response in expected_responses]), (
        "Expected chat director to say Arak, Medellin, but got: " + response_message
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering time aware questions yet")
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
@freeze_time("2023-04-01")
 def test_answer_requires_date_aware_aggregation_across_provided_notes(chat_client):
    "Chat director should be able to answer questions that require date aware aggregation across multiple notes"
    # Act
    response = chat_client.get(f'/api/chat?q="How much did I spend on dining this year?"')
    response_message = response.json()["response"]
    # Assert
    assert response.status_code == 200
    assert "23" in response_message
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_client):
    # Arrange
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", ""),
        ("Where was I born?", "You were born Testville.", ""),
    ]
    populate_chat_history(message_list)
    # Act
    response = chat_client.get(f'/api/chat?q=""Write a haiku about unit testing"')
    response_message = response.json()["response"]
    # Assert
    expected_responses = ["test", "Test"]
    assert response.status_code == 200
    assert len(response_message.splitlines()) == 3  # haikus are 3 lines long
    assert any([expected_response in response_message for expected_response in expected_responses]), (
        "Expected [T|t]est in response, but got: " + response_message
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Chat director not consistently capable of asking for clarification yet.")
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_ask_for_clarification_if_not_enough_context_in_question(chat_client):
    # Act
    response = chat_client.get(f'/api/chat?q="What is the name of Namitas older son"')
    response_message = response.json()["response"]
    # Assert
    expected_responses = [
        "which of them is the older",
        "which one is older",
        "which of them is older",
        "which one is the older",
    ]
    assert response.status_code == 200
    assert any([expected_response in response_message.lower() for expected_response in expected_responses]), (
        "Expected chat director to ask for clarification in response, but got: " + response_message
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_answer_in_chat_history_beyond_lookback_window(chat_client):
    # Arrange
    message_list = [
        ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
        ("When was I born?", "You were born on 1st April 1984.", ""),
        ("Where was I born?", "You were born Testville.", ""),
    ]
    populate_chat_history(message_list)
    # Act
    response = chat_client.get(f'/api/chat?q="What is my name?"')
    response_message = response.json()["response"]
    # Assert
    expected_responses = ["Testatron", "testatron"]
    assert response.status_code == 200
    assert any([expected_response in response_message.lower() for expected_response in expected_responses]), (
        "Expected [T|t]estatron in response, but got: " + response_message
    )
 # ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Chat director not capable of answering this question yet")
@pytest.mark.chatquality
@pytest.mark.skipif(
    api_key is None,
    reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys",
 )
 def test_answer_requires_multiple_independent_searches(chat_client):
    "Chat director should be able to answer by doing multiple independent searches for required information"
    # Act
    response = chat_client.get(f'/api/chat?q="Is Xi older than Namita?"')
    response_message = response.json()["response"]
    # Assert
    expected_responses = ["he is older than namita", "xi is older than namita"]
    assert response.status_code == 200
    assert any([expected_response in response_message.lower() for expected_response in expected_responses]), (
        "Expected Xi is older than Namita, but got: " + response_message
    )