Test Chat Actor Capabilities; ability to answer from notes, chat logs etc

- Chat actors are narrow agents (prompt + ML model)
  Chat actors are different from the Chat director. who orchestrates
  the narrow actor agents to synthesize final response to the user

- Test Chat Actor Capabilities
  1. Answer from retrieved notes
  2. Answer from chat history
  3. Answer general questions
  4. Carry out multi-turn conversation
  5. Say don't know when answer not in provided context
  6. Answers that require current date awareness
  7. Date-aware aggregation across multiple different notes
  8. Ask clarification questions if no unambiguous answer in provided context
     This test is expected to fail as the chat is not capable of doing
     this consistently yet. But having the test allows assessing chat quality

- Use Openai API Key from OPENAI_API_KEY environment variable
- Gitignore .env file, python virtualenv directory
  Put OpenAI API Key in .env file to run chatbot tests via vscode
  The .env file is default location for importing env vars
This commit is contained in:
Debanjum Singh Solanky 2023-03-14 17:44:16 -06:00
parent 9306cd901a
commit c1128a1ad8
2 changed files with 247 additions and 11 deletions

2
.gitignore vendored
View file

@ -10,6 +10,8 @@ __pycache__
.emacs.desktop* .emacs.desktop*
*.py[cod] *.py[cod]
.vscode .vscode
.env
.venv/*
# Build artifacts # Build artifacts
/src/khoj/interface/web/images /src/khoj/interface/web/images

View file

@ -1,3 +1,7 @@
# Standard Packages
import os
from datetime import datetime
# External Packages # External Packages
import pytest import pytest
@ -6,8 +10,7 @@ from khoj.processor.conversation.gpt import converse, message_to_log, message_to
# Initialize variables for tests # Initialize variables for tests
model = "text-davinci-003" api_key = os.getenv("OPENAI_API_KEY") # Set your OPENAI_API_KEY as environment variable to run the tests below
api_key = None # Input your OpenAI API key to run the tests below
# Test # Test
@ -30,7 +33,7 @@ def test_message_to_understand_prompt():
@pytest.mark.skipif( @pytest.mark.skipif(
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys" api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
) )
def test_minimal_chat_with_gpt(): def test_chat_with_no_chat_history_or_retrieved_content():
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query text="", # Assume no context retrieved from notes for the user_query
@ -50,21 +53,252 @@ def test_minimal_chat_with_gpt():
@pytest.mark.skipif( @pytest.mark.skipif(
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys" api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
) )
def test_multi_turn_chat_with_minimal_history_no_context(): def test_answer_from_chat_history_and_no_content():
conversation_log = {} # Arrange
conversation_log["chat"] = message_to_log( conversation_log = {"chat": []}
user_message="Hello, my name is Testatron. Who are you?", message_list = [
gpt_message="Hi, I am Khoj, an AI conversational companion. How can I help you today?", ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
) ("When was I born?", "You were born on 1st April 1984.", ""),
]
# Generate conversation logs
for user_message, gpt_message, _ in message_list:
conversation_log["chat"] += message_to_log(user_message, gpt_message)
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query text="", # Assume no context retrieved from notes for the user_query
user_query="Hi Khoj, what is my name?", user_query="What is my name?",
conversation_log=conversation_log,
api_key=api_key,
)
# Assert
expected_responses = ["Testatron", "testatron"]
assert len(response) > 0
assert any([expected_response in response for expected_response in expected_responses]), (
"Expected [T|t]estatron in response but got" + response
)
# ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
)
def test_answer_from_chat_history_and_previously_retrieved_content():
"Chatbot needs to use context in previous notes and chat history to answer question"
# Arrange
conversation_log = {"chat": []}
message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."),
]
# Generate conversation logs
for user_message, gpt_message, context in message_list:
conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context})
# Act
response = converse(
text="", # Assume no context retrieved from notes for the user_query
user_query="Where was I born?",
conversation_log=conversation_log, conversation_log=conversation_log,
api_key=api_key, api_key=api_key,
) )
# Assert # Assert
assert len(response) > 0 assert len(response) > 0
assert "Testatron" in response or "testatron" in response # Infer who I am and use that to infer I was born in Testville using chat history and previously retrieved notes
assert "Testville" in response
# ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
)
def test_answer_from_chat_history_and_currently_retrieved_content():
"Chatbot needs to use context across currently retrieved notes and chat history to answer question"
# Arrange
conversation_log = {"chat": []}
message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
("When was I born?", "You were born on 1st April 1984.", ""),
]
# Generate conversation logs
for user_message, gpt_message, context in message_list:
conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context})
# Act
response = converse(
text="Testatron was born on 1st April 1984 in Testville.", # Assume context retrieved from notes for the user_query
user_query="Where was I born?",
conversation_log=conversation_log,
api_key=api_key,
)
# Assert
assert len(response) > 0
assert "Testville" in response
# ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
)
def test_no_answer_in_chat_history_or_retrieved_content():
"Chatbot should say don't know as not enough contexts in chat history or retrieved to answer question"
# Arrange
conversation_log = {"chat": []}
message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
("When was I born?", "You were born on 1st April 1984.", ""),
]
# Generate conversation logs
for user_message, gpt_message, context in message_list:
conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context})
# Act
response = converse(
text="", # Assume no context retrieved from notes for the user_query
user_query="Where was I born?",
conversation_log=conversation_log,
api_key=api_key,
)
# Assert
expected_responses = ["don't know", "do not know", "no information", "do not have", "don't have"]
assert len(response) > 0
assert any([expected_response in response for expected_response in expected_responses])
# ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
)
def test_answer_requires_current_date_awareness():
"Chatbot should be able to answer questions relative to current date using provided notes"
# Arrange
context = f"""
# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD
# {datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
Expenses:Food:Dining 10.00 USD
# 2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD
# 2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD
"""
# Act
response = converse(
text=context, # Assume context retrieved from notes for the user_query
user_query="What did I have for Dinner today?",
api_key=api_key,
)
# Assert
expected_responses = ["tacos", "Tacos"]
assert len(response) > 0
assert any([expected_response in response for expected_response in expected_responses]), (
"Expected [T|t]acos in response, but got: " + response
)
# ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
)
def test_answer_requires_date_aware_aggregation_across_provided_notes():
"Chatbot should be able to answer questions that require date aware aggregation across multiple notes"
# Arrange
context = f"""
# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD
# {datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
Expenses:Food:Dining 10.00 USD
# 2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD
# 2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD
"""
# Act
response = converse(
text=context, # Assume context retrieved from notes for the user_query
user_query="How much did I spend on dining this year?",
api_key=api_key,
)
# Assert
assert len(response) > 0
assert "20" in response
# ----------------------------------------------------------------------------------------------------
@pytest.mark.skipif(
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
)
def test_answer_general_question_not_in_chat_history_or_retrieved_content():
"Chatbot should be able to answer general questions not requiring looking at chat history or notes"
# Arrange
conversation_log = {"chat": []}
message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""),
("When was I born?", "You were born on 1st April 1984.", ""),
("Where was I born?", "You were born Testville.", ""),
]
# Generate conversation logs
for user_message, gpt_message, context in message_list:
conversation_log["chat"] += message_to_log(user_message, gpt_message, {"context": context})
# Act
response = converse(
text="", # Assume no context retrieved from notes for the user_query
user_query="Write a haiku about unit testing",
conversation_log=conversation_log,
api_key=api_key,
)
# Assert
expected_responses = ["test", "Test"]
assert len(response.splitlines()) == 3 # haikus are 3 lines long
assert any([expected_response in response for expected_response in expected_responses]), (
"Expected [T|t]est in response, but got: " + response
)
# ----------------------------------------------------------------------------------------------------
@pytest.mark.xfail(reason="Chatbot not consistently capable of asking for clarification yet.")
@pytest.mark.skipif(
api_key is None, reason="Set api_key variable to your OpenAI API key from https://beta.openai.com/account/api-keys"
)
def test_ask_for_clarification_if_not_enough_context_in_question():
"Chatbot should ask for clarification if question cannot be answered unambiguously with the provided context"
# Arrange
context = f"""
# Ramya
My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.
# Fang
My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.
# Aiyla
My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.
"""
# Act
response = converse(
text=context, # Assume context retrieved from notes for the user_query
user_query="How many kids does my older sister have?",
api_key=api_key,
)
# Assert
expected_responses = ["which sister", "Which sister", "which of your sister", "Which of your sister"]
assert any([expected_response in response for expected_response in expected_responses]), (
"Expected chatbot to ask for clarification in response, but got: " + response
)