Use past user messages, inferred questions as context to extract questions

- Keep inferred questions in logs
- Improve prompt to GPT to try use past questions as context
- Pass past user message and inferred questions as context to help GPT
  extract complete questions
- This should improve search results quality

- Example Expected Inferred Questions from User Message using History:
  1. "What is the name of Arun's daughter?"
    => "What is the name of Arun's daughter"
  2. "Where does she study?" =>
    => "Where does Arun's daughter study?" OR
    => "Where does Arun's daughter, Reena study?"
This commit is contained in:
Debanjum Singh Solanky 2023-03-16 15:14:47 -06:00
parent 1a5d1130f4
commit d871e04a81
2 changed files with 37 additions and 12 deletions

View file

@ -78,40 +78,63 @@ Summarize the notes in second person perspective:"""
return str(story).replace("\n\n", "")
def extract_questions(text, model="text-davinci-003", api_key=None, temperature=0, max_tokens=100):
def extract_questions(
text, model="text-davinci-003", conversation_log={}, api_key=None, temperature=0, max_tokens=100
):
"""
Infer search queries to retrieve relevant notes to answer user query
"""
# Initialize Variables
openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
# Extract Past User Message and Inferred Questions from Conversation Log
chat_history = "".join(
[
f'Q: {chat["intent"]["query"]}\n\n{chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}\n\n'
for chat in conversation_log.get("chat", [])[-4:]
if chat["by"] == "khoj"
]
)
# Get dates relative to today for prompt creation
today = datetime.today()
current_new_year = today.replace(month=1, day=1)
last_new_year = current_new_year.replace(year=today.year - 1)
prompt = f"""
You are Khoj, a chat assistant with the ability to search the users notes
What searches, if any, will you need to perform to answer the users question below? Provide search queries as a JSON list of strings
You are Khoj, a chat assistant with the ability to search the users notes and continue the existing conversation.
What searches, if any, will you need to perform to answer the users question below?
Provide search queries as a JSON list of strings
Current Date: {today.strftime("%HH:%MM %A, %Y-%m-%d")}
Q: How was my trip to Cambodia?
["My Cambodia trip experience"]
["How was my trip to Cambodia?"]
Q: How are you feeling?
Q: When did i go there?
[]
["When did I go to Cambodia?"]
Q: What national parks did I go to last year?
["National park I visited in {last_new_year.strftime("%Y")} dt>=\\"{last_new_year.strftime("%Y-%m-%d")}\\" dt<\\"{current_new_year.strftime("%Y-%m-%d")}\\""]
Q: How are you feeling today?
[]
Q: Is Bob older than Tom?
["When was Bob born?", "What is Tom's age?"]
Q: {text}"""
Q: What is their age difference?
["What is Bob's age?", "What is Tom's age?"]
{chat_history}
Q: {text}
"""
# Get Response from GPT
response = openai.Completion.create(prompt=prompt, model=model, temperature=temperature, max_tokens=max_tokens)

View file

@ -204,15 +204,14 @@ def chat(q: Optional[str] = None):
else:
return {"status": "ok", "response": []}
# Extract search queries from user message
queries = extract_questions(q, model=model, api_key=api_key)
# Infer search queries from user message
inferred_queries = extract_questions(q, model=model, api_key=api_key, conversation_log=meta_log)
# Collate search results as context for GPT
result_list = []
for query in queries:
for query in inferred_queries:
result_list.extend(search(query, n=2, r=True, score_threshold=0, dedupe=False))
collated_result = "\n\n".join({f"# {item.additional['compiled']}" for item in result_list})
logger.debug(f"Reference Context:\n{collated_result}")
try:
gpt_response = converse(collated_result, q, meta_log, api_key=api_key)
@ -224,7 +223,10 @@ def chat(q: Optional[str] = None):
# Update Conversation History
state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response)
state.processor_config.conversation.meta_log["chat"] = message_to_log(
q, gpt_response, khoj_message_metadata={"context": collated_result}, conversation_log=meta_log.get("chat", [])
q,
gpt_response,
khoj_message_metadata={"context": collated_result, "intent": {"inferred-queries": inferred_queries}},
conversation_log=meta_log.get("chat", []),
)
return {"status": status, "response": gpt_response, "context": collated_result}