diff --git a/pyproject.toml b/pyproject.toml index d8d985d5..8df96e4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,6 +71,7 @@ dev = [ "mypy >= 1.0.1", "black >= 23.1.0", "pre-commit >= 3.0.4", + "freezegun >= 1.2.0", ] [tool.hatch.version] diff --git a/tests/test_chat_director.py b/tests/test_chat_director.py new file mode 100644 index 00000000..ec56f8e4 --- /dev/null +++ b/tests/test_chat_director.py @@ -0,0 +1,318 @@ +# Standard Packages +import os + +# External Packages +import pytest +from freezegun import freeze_time + +# Internal Packages +from khoj.processor.conversation.utils import message_to_log +from khoj.utils import state + + +# Initialize variables for tests +api_key = os.getenv("OPENAI_API_KEY") # Set your OPENAI_API_KEY as environment variable to run the tests below + + +# Helpers +# ---------------------------------------------------------------------------------------------------- +def populate_chat_history(message_list): + # Generate conversation logs + conversation_log = {"chat": []} + for user_message, gpt_message, context in message_list: + conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context}) + + # Update Conversation Metadata Logs in Application State + state.processor_config.conversation.meta_log = conversation_log + + +# Tests +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_chat_with_no_chat_history_or_retrieved_content(chat_client): + # Act + response = chat_client.get(f'/api/chat?q="Hello, my name is Testatron. Who are you?"') + response_message = response.json()["response"] + + # Assert + expected_responses = ["Khoj", "khoj"] + assert response.status_code == 200 + assert any([expected_response in response_message for expected_response in expected_responses]), ( + "Expected assistants name, [K|k]hoj, in response but got" + response_message + ) + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_answer_from_chat_history(chat_client): + # Arrange + message_list = [ + ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), + ("When was I born?", "You were born on 1st April 1984.", ""), + ] + populate_chat_history(message_list) + + # Act + response = chat_client.get(f'/api/chat?q="What is my name?"') + response_message = response.json()["response"] + + # Assert + expected_responses = ["Testatron", "testatron"] + assert response.status_code == 200 + assert any([expected_response in response_message for expected_response in expected_responses]), ( + "Expected [T|t]estatron in response but got" + response_message + ) + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_answer_from_currently_retrieved_content(chat_client): + # Arrange + message_list = [ + ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), + ("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."), + ] + populate_chat_history(message_list) + + # Act + response = chat_client.get(f'/api/chat?q="Where was Xi Li born?"') + response_message = response.json()["response"] + + # Assert + assert response.status_code == 200 + assert "Fujiang" in response_message + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_answer_from_chat_history_and_previously_retrieved_content(chat_client): + # Arrange + message_list = [ + ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), + ("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."), + ] + populate_chat_history(message_list) + + # Act + response = chat_client.get(f'/api/chat?q="Where was I born?"') + response_message = response.json()["response"] + + # Assert + assert response.status_code == 200 + # 1. Infer who I am from chat history + # 2. Infer I was born in Testville from previously retrieved notes + assert "Testville" in response_message + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering this question yet") +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_answer_from_chat_history_and_currently_retrieved_content(chat_client): + # Arrange + message_list = [ + ("Hello, my name is Xi Li. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), + ("When was I born?", "You were born on 1st April 1984.", ""), + ] + populate_chat_history(message_list) + + # Act + response = chat_client.get(f'/api/chat?q="Where was I born?"') + response_message = response.json()["response"] + + # Assert + assert response.status_code == 200 + # Inference in a multi-turn conversation + # 1. Infer who I am from chat history + # 2. Search for notes about when was born + # 3. Extract where I was born from currently retrieved notes + assert "Fujiang" in response_message + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_no_answer_in_chat_history_or_retrieved_content(chat_client): + "Chat director should say don't know as not enough contexts in chat history or retrieved to answer question" + # Arrange + message_list = [ + ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), + ("When was I born?", "You were born on 1st April 1984.", ""), + ] + populate_chat_history(message_list) + + # Act + response = chat_client.get(f'/api/chat?q="Where was I born?"') + response_message = response.json()["response"] + + # Assert + expected_responses = ["don't know", "do not know", "no information", "do not have", "don't have"] + assert response.status_code == 200 + assert any([expected_response in response_message for expected_response in expected_responses]), ( + "Expected chat director to say they don't know in response, but got: " + response + ) + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering time aware questions yet") +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +@freeze_time("2023-04-01") +def test_answer_requires_current_date_awareness(chat_client): + "Chat actor should be able to answer questions relative to current date using provided notes" + # Act + response = chat_client.get(f'/api/chat?q="Where did I have lunch today?"') + response_message = response.json()["response"] + + # Assert + expected_responses = ["Arak", "Medellin"] + assert response.status_code == 200 + assert any([expected_response in response_message for expected_response in expected_responses]), ( + "Expected chat director to say Arak, Medellin, but got: " + response_message + ) + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(AssertionError, reason="Chat director not capable of answering time aware questions yet") +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +@freeze_time("2023-04-01") +def test_answer_requires_date_aware_aggregation_across_provided_notes(chat_client): + "Chat director should be able to answer questions that require date aware aggregation across multiple notes" + # Act + response = chat_client.get(f'/api/chat?q="How much did I spend on dining this year?"') + response_message = response.json()["response"] + + # Assert + assert response.status_code == 200 + assert "23" in response_message + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_client): + # Arrange + message_list = [ + ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), + ("When was I born?", "You were born on 1st April 1984.", ""), + ("Where was I born?", "You were born Testville.", ""), + ] + populate_chat_history(message_list) + + # Act + response = chat_client.get(f'/api/chat?q=""Write a haiku about unit testing"') + response_message = response.json()["response"] + + # Assert + expected_responses = ["test", "Test"] + assert response.status_code == 200 + assert len(response_message.splitlines()) == 3 # haikus are 3 lines long + assert any([expected_response in response_message for expected_response in expected_responses]), ( + "Expected [T|t]est in response, but got: " + response_message + ) + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(reason="Chat director not consistently capable of asking for clarification yet.") +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_ask_for_clarification_if_not_enough_context_in_question(chat_client): + # Act + response = chat_client.get(f'/api/chat?q="What is the name of Namitas older son"') + response_message = response.json()["response"] + + # Assert + expected_responses = [ + "which of them is the older", + "which one is older", + "which of them is older", + "which one is the older", + ] + assert response.status_code == 200 + assert any([expected_response in response_message.lower() for expected_response in expected_responses]), ( + "Expected chat director to ask for clarification in response, but got: " + response_message + ) + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(reason="Chat director not capable of answering this question yet") +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_answer_in_chat_history_beyond_lookback_window(chat_client): + # Arrange + message_list = [ + ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), + ("When was I born?", "You were born on 1st April 1984.", ""), + ("Where was I born?", "You were born Testville.", ""), + ] + populate_chat_history(message_list) + + # Act + response = chat_client.get(f'/api/chat?q="What is my name?"') + response_message = response.json()["response"] + + # Assert + expected_responses = ["Testatron", "testatron"] + assert response.status_code == 200 + assert any([expected_response in response_message.lower() for expected_response in expected_responses]), ( + "Expected [T|t]estatron in response, but got: " + response_message + ) + + +# ---------------------------------------------------------------------------------------------------- +@pytest.mark.xfail(reason="Chat director not capable of answering this question yet") +@pytest.mark.chatquality +@pytest.mark.skipif( + api_key is None, + reason="Set api_key variable to your OpenAI API key from https://platform.openai.com/account/api-keys", +) +def test_answer_requires_multiple_independent_searches(chat_client): + "Chat director should be able to answer by doing multiple independent searches for required information" + # Act + response = chat_client.get(f'/api/chat?q="Is Xi older than Namita?"') + response_message = response.json()["response"] + + # Assert + expected_responses = ["he is older than namita", "xi is older than namita"] + assert response.status_code == 200 + assert any([expected_response in response_message.lower() for expected_response in expected_responses]), ( + "Expected Xi is older than Namita, but got: " + response_message + )