Update Chat API, Logs, Interfaces to store, use references as list

- Remove the need to split by magic string in emacs and chat interfaces
- Move compiling references into string as context for GPT to GPT layer
- Update setup in tests to use new style of setting references
- Name first argument to converse as more appropriate "references"
This commit is contained in:
Debanjum Singh Solanky 2023-03-24 21:55:22 +07:00
parent b08745b541
commit 508b2176b7
6 changed files with 91 additions and 86 deletions

View file

@ -443,9 +443,8 @@ RECEIVE-DATE is the message receive date."
(let* ((message (cdr (or (assoc 'response json-response) (assoc 'message json-response)))) (let* ((message (cdr (or (assoc 'response json-response) (assoc 'message json-response))))
(sender (cdr (assoc 'by json-response))) (sender (cdr (assoc 'by json-response)))
(receive-date (cdr (assoc 'created json-response))) (receive-date (cdr (assoc 'created json-response)))
(context (or (cdr (assoc 'context json-response)) "")) (references (or (cdr (assoc 'context json-response)) '()))
(reference-source-texts (split-string context "\n\n# " t)) (footnotes (mapcar #'khoj--generate-reference references))
(footnotes (mapcar #'khoj--generate-reference reference-source-texts))
(footnote-links (mapcar #'car footnotes)) (footnote-links (mapcar #'car footnotes))
(footnote-defs (mapcar #'cdr footnotes))) (footnote-defs (mapcar #'cdr footnotes)))
(thread-first (thread-first

View file

@ -39,7 +39,6 @@
let references = ''; let references = '';
if (context) { if (context) {
references = context references = context
.split("\n\n# ")
.map((reference, index) => generateReference(reference, index)) .map((reference, index) => generateReference(reference, index))
.join("<sup>,</sup>"); .join("<sup>,</sup>");
} }

View file

@ -223,13 +223,14 @@ A:{ "search-type": "notes" }"""
return json.loads(story.strip(empty_escape_sequences)) return json.loads(story.strip(empty_escape_sequences))
def converse(text, user_query, conversation_log={}, api_key=None, temperature=0.2): def converse(references, user_query, conversation_log={}, api_key=None, temperature=0.2):
""" """
Converse with user using OpenAI's ChatGPT Converse with user using OpenAI's ChatGPT
""" """
# Initialize Variables # Initialize Variables
model = "gpt-3.5-turbo" model = "gpt-3.5-turbo"
openai.api_key = api_key or os.getenv("OPENAI_API_KEY") openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
compiled_references = "\n\n".join({f"# {item}" for item in references})
personality_primer = "You are Khoj, a friendly, smart and helpful personal assistant." personality_primer = "You are Khoj, a friendly, smart and helpful personal assistant."
conversation_primer = f""" conversation_primer = f"""
@ -237,7 +238,7 @@ Using the notes and our past conversations as context, answer the following ques
Current Date: {datetime.now().strftime("%Y-%m-%d")} Current Date: {datetime.now().strftime("%Y-%m-%d")}
Notes: Notes:
{text} {compiled_references}
Question: {user_query}""" Question: {user_query}"""

View file

@ -215,11 +215,11 @@ def chat(q: Optional[str] = None):
result_list = [] result_list = []
for query in inferred_queries: for query in inferred_queries:
result_list.extend(search(query, n=5, r=True, score_threshold=-5.0, dedupe=False)) result_list.extend(search(query, n=5, r=True, score_threshold=-5.0, dedupe=False))
collated_result = "\n\n".join({f"# {item.additional['compiled']}" for item in result_list}) compiled_references = [item.additional["compiled"] for item in result_list]
try: try:
with timer("Generating chat response took", logger): with timer("Generating chat response took", logger):
gpt_response = converse(collated_result, q, meta_log, api_key=api_key) gpt_response = converse(compiled_references, q, meta_log, api_key=api_key)
status = "ok" status = "ok"
except Exception as e: except Exception as e:
gpt_response = str(e) gpt_response = str(e)
@ -231,8 +231,8 @@ def chat(q: Optional[str] = None):
q, q,
gpt_response, gpt_response,
user_message_metadata={"created": user_message_time}, user_message_metadata={"created": user_message_time},
khoj_message_metadata={"context": collated_result, "intent": {"inferred-queries": inferred_queries}}, khoj_message_metadata={"context": compiled_references, "intent": {"inferred-queries": inferred_queries}},
conversation_log=meta_log.get("chat", []), conversation_log=meta_log.get("chat", []),
) )
return {"status": status, "response": gpt_response, "context": collated_result} return {"status": status, "response": gpt_response, "context": compiled_references}

View file

@ -111,7 +111,7 @@ def test_extract_multiple_implicit_questions_from_message():
def test_generate_search_query_using_question_from_chat_history(): def test_generate_search_query_using_question_from_chat_history():
# Arrange # Arrange
message_list = [ message_list = [
("What is the name of Mr. Vaders daughter?", "Princess Leia", ""), ("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
] ]
# Act # Act
@ -127,7 +127,7 @@ def test_generate_search_query_using_question_from_chat_history():
def test_generate_search_query_using_answer_from_chat_history(): def test_generate_search_query_using_answer_from_chat_history():
# Arrange # Arrange
message_list = [ message_list = [
("What is the name of Mr. Vaders daughter?", "Princess Leia", ""), ("What is the name of Mr. Vader's daughter?", "Princess Leia", []),
] ]
# Act # Act
@ -143,7 +143,7 @@ def test_generate_search_query_using_answer_from_chat_history():
def test_generate_search_query_using_question_and_answer_from_chat_history(): def test_generate_search_query_using_question_and_answer_from_chat_history():
# Arrange # Arrange
message_list = [ message_list = [
("Does Luke Skywalker have any Siblings?", "Yes, Princess Leia", ""), ("Does Luke Skywalker have any Siblings?", "Yes, Princess Leia", []),
] ]
# Act # Act
@ -159,7 +159,7 @@ def test_generate_search_query_using_question_and_answer_from_chat_history():
def test_generate_search_query_with_date_and_context_from_chat_history(): def test_generate_search_query_with_date_and_context_from_chat_history():
# Arrange # Arrange
message_list = [ message_list = [
("When did I visit Masai Mara?", "You visited Masai Mara in April 2000", ""), ("When did I visit Masai Mara?", "You visited Masai Mara in April 2000", []),
] ]
# Act # Act
@ -184,7 +184,7 @@ def test_generate_search_query_with_date_and_context_from_chat_history():
def test_chat_with_no_chat_history_or_retrieved_content(): def test_chat_with_no_chat_history_or_retrieved_content():
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="Hello, my name is Testatron. Who are you?", user_query="Hello, my name is Testatron. Who are you?",
api_key=api_key, api_key=api_key,
) )
@ -202,13 +202,13 @@ def test_chat_with_no_chat_history_or_retrieved_content():
def test_answer_from_chat_history_and_no_content(): def test_answer_from_chat_history_and_no_content():
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="What is my name?", user_query="What is my name?",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@ -228,13 +228,17 @@ def test_answer_from_chat_history_and_previously_retrieved_content():
"Chat actor needs to use context in previous notes and chat history to answer question" "Chat actor needs to use context in previous notes and chat history to answer question"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."), (
"When was I born?",
"You were born on 1st April 1984.",
["Testatron was born on 1st April 1984 in Testville."],
),
] ]
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="Where was I born?", user_query="Where was I born?",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@ -252,13 +256,15 @@ def test_answer_from_chat_history_and_currently_retrieved_content():
"Chat actor needs to use context across currently retrieved notes and chat history to answer question" "Chat actor needs to use context across currently retrieved notes and chat history to answer question"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
# Act # Act
response = converse( response = converse(
text="Testatron was born on 1st April 1984 in Testville.", # Assume context retrieved from notes for the user_query references=[
"Testatron was born on 1st April 1984 in Testville."
], # Assume context retrieved from notes for the user_query
user_query="Where was I born?", user_query="Where was I born?",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@ -275,13 +281,13 @@ def test_no_answer_in_chat_history_or_retrieved_content():
"Chat actor should say don't know as not enough contexts in chat history or retrieved to answer question" "Chat actor should say don't know as not enough contexts in chat history or retrieved to answer question"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="Where was I born?", user_query="Where was I born?",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@ -300,23 +306,20 @@ def test_no_answer_in_chat_history_or_retrieved_content():
def test_answer_requires_current_date_awareness(): def test_answer_requires_current_date_awareness():
"Chat actor should be able to answer questions relative to current date using provided notes" "Chat actor should be able to answer questions relative to current date using provided notes"
# Arrange # Arrange
context = f""" context = [
# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner" f"""{datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD Expenses:Food:Dining 10.00 USD""",
f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
# {datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch" Expenses:Food:Dining 10.00 USD""",
Expenses:Food:Dining 10.00 USD f"""2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD""",
# 2020-04-01 "SuperMercado" "Bananas" f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Groceries 10.00 USD Expenses:Food:Dining 10.00 USD""",
]
# 2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD
"""
# Act # Act
response = converse( response = converse(
text=context, # Assume context retrieved from notes for the user_query references=context, # Assume context retrieved from notes for the user_query
user_query="What did I have for Dinner today?", user_query="What did I have for Dinner today?",
api_key=api_key, api_key=api_key,
) )
@ -334,23 +337,20 @@ def test_answer_requires_current_date_awareness():
def test_answer_requires_date_aware_aggregation_across_provided_notes(): def test_answer_requires_date_aware_aggregation_across_provided_notes():
"Chat actor should be able to answer questions that require date aware aggregation across multiple notes" "Chat actor should be able to answer questions that require date aware aggregation across multiple notes"
# Arrange # Arrange
context = f""" context = [
# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner" f"""# {datetime.now().strftime("%Y-%m-%d")} "Naco Taco" "Tacos for Dinner"
Expenses:Food:Dining 10.00 USD Expenses:Food:Dining 10.00 USD""",
f"""{datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch"
# {datetime.now().strftime("%Y-%m-%d")} "Sagar Ratna" "Dosa for Lunch" Expenses:Food:Dining 10.00 USD""",
Expenses:Food:Dining 10.00 USD f"""2020-04-01 "SuperMercado" "Bananas"
Expenses:Food:Groceries 10.00 USD""",
# 2020-04-01 "SuperMercado" "Bananas" f"""2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Groceries 10.00 USD Expenses:Food:Dining 10.00 USD""",
]
# 2020-01-01 "Naco Taco" "Burittos for Dinner"
Expenses:Food:Dining 10.00 USD
"""
# Act # Act
response = converse( response = converse(
text=context, # Assume context retrieved from notes for the user_query references=context, # Assume context retrieved from notes for the user_query
user_query="How much did I spend on dining this year?", user_query="How much did I spend on dining this year?",
api_key=api_key, api_key=api_key,
) )
@ -366,14 +366,14 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content():
"Chat actor should be able to answer general questions not requiring looking at chat history or notes" "Chat actor should be able to answer general questions not requiring looking at chat history or notes"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
("Where was I born?", "You were born Testville.", ""), ("Where was I born?", "You were born Testville.", []),
] ]
# Act # Act
response = converse( response = converse(
text="", # Assume no context retrieved from notes for the user_query references=[], # Assume no context retrieved from notes for the user_query
user_query="Write a haiku about unit testing in 3 lines", user_query="Write a haiku about unit testing in 3 lines",
conversation_log=populate_chat_history(message_list), conversation_log=populate_chat_history(message_list),
api_key=api_key, api_key=api_key,
@ -393,20 +393,18 @@ def test_answer_general_question_not_in_chat_history_or_retrieved_content():
def test_ask_for_clarification_if_not_enough_context_in_question(): def test_ask_for_clarification_if_not_enough_context_in_question():
"Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context" "Chat actor should ask for clarification if question cannot be answered unambiguously with the provided context"
# Arrange # Arrange
context = f""" context = [
# Ramya f"""# Ramya
My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani. My sister, Ramya, is married to Kali Devi. They have 2 kids, Ravi and Rani.""",
f"""# Fang
# Fang My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li.""",
My sister, Fang Liu is married to Xi Li. They have 1 kid, Xiao Li. f"""# Aiyla
My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.""",
# Aiyla ]
My sister, Aiyla is married to Tolga. They have 3 kids, Yildiz, Ali and Ahmet.
"""
# Act # Act
response = converse( response = converse(
text=context, # Assume context retrieved from notes for the user_query references=context, # Assume context retrieved from notes for the user_query
user_query="How many kids does my older sister have?", user_query="How many kids does my older sister have?",
api_key=api_key, api_key=api_key,
) )

View file

@ -56,8 +56,8 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
def test_answer_from_chat_history(chat_client): def test_answer_from_chat_history(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@ -78,8 +78,12 @@ def test_answer_from_chat_history(chat_client):
def test_answer_from_currently_retrieved_content(chat_client): def test_answer_from_currently_retrieved_content(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."), (
"When was I born?",
"You were born on 1st April 1984.",
["Testatron was born on 1st April 1984 in Testville."],
),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@ -97,8 +101,12 @@ def test_answer_from_currently_retrieved_content(chat_client):
def test_answer_from_chat_history_and_previously_retrieved_content(chat_client): def test_answer_from_chat_history_and_previously_retrieved_content(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", "Testatron was born on 1st April 1984 in Testville."), (
"When was I born?",
"You were born on 1st April 1984.",
["Testatron was born on 1st April 1984 in Testville."],
),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@ -119,8 +127,8 @@ def test_answer_from_chat_history_and_previously_retrieved_content(chat_client):
def test_answer_from_chat_history_and_currently_retrieved_content(chat_client): def test_answer_from_chat_history_and_currently_retrieved_content(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Xi Li. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Xi Li. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@ -143,8 +151,8 @@ def test_no_answer_in_chat_history_or_retrieved_content(chat_client):
"Chat director should say don't know as not enough contexts in chat history or retrieved to answer question" "Chat director should say don't know as not enough contexts in chat history or retrieved to answer question"
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@ -197,9 +205,9 @@ def test_answer_requires_date_aware_aggregation_across_provided_notes(chat_clien
def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_client): def test_answer_general_question_not_in_chat_history_or_retrieved_content(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
("Where was I born?", "You were born Testville.", ""), ("Where was I born?", "You were born Testville.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)
@ -243,9 +251,9 @@ def test_ask_for_clarification_if_not_enough_context_in_question(chat_client):
def test_answer_in_chat_history_beyond_lookback_window(chat_client): def test_answer_in_chat_history_beyond_lookback_window(chat_client):
# Arrange # Arrange
message_list = [ message_list = [
("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", ""), ("Hello, my name is Testatron. Who are you?", "Hi, I am Khoj, a personal assistant. How can I help?", []),
("When was I born?", "You were born on 1st April 1984.", ""), ("When was I born?", "You were born on 1st April 1984.", []),
("Where was I born?", "You were born Testville.", ""), ("Where was I born?", "You were born Testville.", []),
] ]
populate_chat_history(message_list) populate_chat_history(message_list)