mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Use past user messages, inferred questions as context to extract questions
- Keep inferred questions in logs - Improve prompt to GPT to try use past questions as context - Pass past user message and inferred questions as context to help GPT extract complete questions - This should improve search results quality - Example Expected Inferred Questions from User Message using History: 1. "What is the name of Arun's daughter?" => "What is the name of Arun's daughter" 2. "Where does she study?" => => "Where does Arun's daughter study?" OR => "Where does Arun's daughter, Reena study?"
This commit is contained in:
parent
1a5d1130f4
commit
d871e04a81
2 changed files with 37 additions and 12 deletions
|
@ -78,40 +78,63 @@ Summarize the notes in second person perspective:"""
|
||||||
return str(story).replace("\n\n", "")
|
return str(story).replace("\n\n", "")
|
||||||
|
|
||||||
|
|
||||||
def extract_questions(text, model="text-davinci-003", api_key=None, temperature=0, max_tokens=100):
|
def extract_questions(
|
||||||
|
text, model="text-davinci-003", conversation_log={}, api_key=None, temperature=0, max_tokens=100
|
||||||
|
):
|
||||||
"""
|
"""
|
||||||
Infer search queries to retrieve relevant notes to answer user query
|
Infer search queries to retrieve relevant notes to answer user query
|
||||||
"""
|
"""
|
||||||
# Initialize Variables
|
# Initialize Variables
|
||||||
openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
||||||
|
|
||||||
|
# Extract Past User Message and Inferred Questions from Conversation Log
|
||||||
|
chat_history = "".join(
|
||||||
|
[
|
||||||
|
f'Q: {chat["intent"]["query"]}\n\n{chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}\n\n'
|
||||||
|
for chat in conversation_log.get("chat", [])[-4:]
|
||||||
|
if chat["by"] == "khoj"
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
# Get dates relative to today for prompt creation
|
# Get dates relative to today for prompt creation
|
||||||
today = datetime.today()
|
today = datetime.today()
|
||||||
current_new_year = today.replace(month=1, day=1)
|
current_new_year = today.replace(month=1, day=1)
|
||||||
last_new_year = current_new_year.replace(year=today.year - 1)
|
last_new_year = current_new_year.replace(year=today.year - 1)
|
||||||
|
|
||||||
prompt = f"""
|
prompt = f"""
|
||||||
You are Khoj, a chat assistant with the ability to search the users notes
|
You are Khoj, a chat assistant with the ability to search the users notes and continue the existing conversation.
|
||||||
What searches, if any, will you need to perform to answer the users question below? Provide search queries as a JSON list of strings
|
What searches, if any, will you need to perform to answer the users question below?
|
||||||
|
Provide search queries as a JSON list of strings
|
||||||
Current Date: {today.strftime("%HH:%MM %A, %Y-%m-%d")}
|
Current Date: {today.strftime("%HH:%MM %A, %Y-%m-%d")}
|
||||||
|
|
||||||
Q: How was my trip to Cambodia?
|
Q: How was my trip to Cambodia?
|
||||||
|
|
||||||
["My Cambodia trip experience"]
|
["How was my trip to Cambodia?"]
|
||||||
|
|
||||||
Q: How are you feeling?
|
Q: When did i go there?
|
||||||
|
|
||||||
[]
|
["When did I go to Cambodia?"]
|
||||||
|
|
||||||
Q: What national parks did I go to last year?
|
Q: What national parks did I go to last year?
|
||||||
|
|
||||||
["National park I visited in {last_new_year.strftime("%Y")} dt>=\\"{last_new_year.strftime("%Y-%m-%d")}\\" dt<\\"{current_new_year.strftime("%Y-%m-%d")}\\""]
|
["National park I visited in {last_new_year.strftime("%Y")} dt>=\\"{last_new_year.strftime("%Y-%m-%d")}\\" dt<\\"{current_new_year.strftime("%Y-%m-%d")}\\""]
|
||||||
|
|
||||||
|
Q: How are you feeling today?
|
||||||
|
|
||||||
|
[]
|
||||||
|
|
||||||
Q: Is Bob older than Tom?
|
Q: Is Bob older than Tom?
|
||||||
|
|
||||||
["When was Bob born?", "What is Tom's age?"]
|
["When was Bob born?", "What is Tom's age?"]
|
||||||
|
|
||||||
Q: {text}"""
|
Q: What is their age difference?
|
||||||
|
|
||||||
|
["What is Bob's age?", "What is Tom's age?"]
|
||||||
|
|
||||||
|
{chat_history}
|
||||||
|
Q: {text}
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
# Get Response from GPT
|
# Get Response from GPT
|
||||||
response = openai.Completion.create(prompt=prompt, model=model, temperature=temperature, max_tokens=max_tokens)
|
response = openai.Completion.create(prompt=prompt, model=model, temperature=temperature, max_tokens=max_tokens)
|
||||||
|
|
|
@ -204,15 +204,14 @@ def chat(q: Optional[str] = None):
|
||||||
else:
|
else:
|
||||||
return {"status": "ok", "response": []}
|
return {"status": "ok", "response": []}
|
||||||
|
|
||||||
# Extract search queries from user message
|
# Infer search queries from user message
|
||||||
queries = extract_questions(q, model=model, api_key=api_key)
|
inferred_queries = extract_questions(q, model=model, api_key=api_key, conversation_log=meta_log)
|
||||||
|
|
||||||
# Collate search results as context for GPT
|
# Collate search results as context for GPT
|
||||||
result_list = []
|
result_list = []
|
||||||
for query in queries:
|
for query in inferred_queries:
|
||||||
result_list.extend(search(query, n=2, r=True, score_threshold=0, dedupe=False))
|
result_list.extend(search(query, n=2, r=True, score_threshold=0, dedupe=False))
|
||||||
collated_result = "\n\n".join({f"# {item.additional['compiled']}" for item in result_list})
|
collated_result = "\n\n".join({f"# {item.additional['compiled']}" for item in result_list})
|
||||||
logger.debug(f"Reference Context:\n{collated_result}")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
gpt_response = converse(collated_result, q, meta_log, api_key=api_key)
|
gpt_response = converse(collated_result, q, meta_log, api_key=api_key)
|
||||||
|
@ -224,7 +223,10 @@ def chat(q: Optional[str] = None):
|
||||||
# Update Conversation History
|
# Update Conversation History
|
||||||
state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response)
|
state.processor_config.conversation.chat_session = message_to_prompt(q, chat_session, gpt_message=gpt_response)
|
||||||
state.processor_config.conversation.meta_log["chat"] = message_to_log(
|
state.processor_config.conversation.meta_log["chat"] = message_to_log(
|
||||||
q, gpt_response, khoj_message_metadata={"context": collated_result}, conversation_log=meta_log.get("chat", [])
|
q,
|
||||||
|
gpt_response,
|
||||||
|
khoj_message_metadata={"context": collated_result, "intent": {"inferred-queries": inferred_queries}},
|
||||||
|
conversation_log=meta_log.get("chat", []),
|
||||||
)
|
)
|
||||||
|
|
||||||
return {"status": status, "response": gpt_response, "context": collated_result}
|
return {"status": status, "response": gpt_response, "context": collated_result}
|
||||||
|
|
Loading…
Reference in a new issue