Revert to using GPT to extract search queries from users message

- Reasons:
  - GPT can extract date aware search queries with date filters
    better than ChatGPT given the same prompt.
  - Need quality more than cost savings for now.
  - Need to figure ways to improve prompt for ChatGPT before using it
This commit is contained in:
Debanjum Singh Solanky 2023-03-18 04:06:14 -06:00
parent e28526bbc9
commit 601ff2541b
2 changed files with 58 additions and 73 deletions

View file

@ -9,11 +9,7 @@ import openai
# Internal Packages
from khoj.utils.constants import empty_escape_sequences
from khoj.processor.conversation.utils import (
message_to_prompt,
message_to_chatml,
generate_chatml_messages_with_context,
)
from khoj.processor.conversation.utils import message_to_prompt, generate_chatml_messages_with_context
logger = logging.getLogger(__name__)
@ -82,104 +78,92 @@ Summarize the notes in second person perspective:"""
return str(story).replace("\n\n", "")
def extract_questions(message, model="gpt-3.5-turbo", conversation_log={}, api_key=None, temperature=0):
def extract_questions(text, model="text-davinci-003", conversation_log={}, api_key=None, temperature=0, max_tokens=100):
"""
Infer search queries to retrieve relevant notes to respond to user's message
Infer search queries to retrieve relevant notes to answer user query
"""
# Initialize Variables
openai.api_key = api_key or os.getenv("OPENAI_API_KEY")
# Extract Past User Message and Inferred Questions from Conversation Log
chat_history = "".join(
[
f'Q: {chat["intent"]["query"]}\n\n{chat["intent"].get("inferred-queries") or list([chat["intent"]["query"]])}\n\n{chat["message"]}\n\n'
for chat in conversation_log.get("chat", [])[-4:]
if chat["by"] == "khoj"
]
)
# Get dates relative to today for prompt creation
today = datetime.today()
current_new_year = today.replace(month=1, day=1)
last_new_year = current_new_year.replace(year=today.year - 1)
personality_primer = f"""
prompt = f"""
You are Khoj, an extremely smart and helpful search assistant with the ability to retrieve information from the users notes.
The user will provide their questions and answers to you for context.
You can:
- The user will provide their questions and answers to you for context.
- Add as much context from the previous questions and answers as required into your search queries.
- Break messages into multiple search queries when required to retrieve the relevant information.
- Add date filters to your search queries from questions and answers when required to retrieve the relevant information.
What searches, if any, will you need to perform to answer the users question?
Provide search queries as a JSON list of strings
Current Date: {today.strftime("%H:%M %A %Y-%m-%d")}"""
Current Date: {today.strftime("%A, %Y-%m-%d")}
# Extract Past User Message and Inferred Questions from Conversation Log
# fmt: off
messages = [
# Add system message to prime GPT for the task
message_to_chatml(personality_primer, role="system"),
Q: How was my trip to Cambodia?
# Add example user message, inferred question and answer triplets to explain the task
message_to_chatml("Q: How was my trip to Cambodia?", role="user"),
message_to_chatml('["How was my trip to Cambodia?"]', role="assistant"),
message_to_chatml("A: The trip was amazing. I went to the Angkor Wat temple in August 2017 and it was beautiful.", role="user"),
["How was my trip to Cambodia?"]
message_to_chatml("Q: Who did I visit that temple with?", role="user"),
message_to_chatml('["Who did I visit the Angkor Wat Temple in Cambodia with? dt>=\\"2017-08-01\\" dt<\\"2017-09-01\\""]', role="assistant"),
message_to_chatml("A: You visited the Angkor Wat Temple in Cambodia with Pablo, Namita and Xi.", role="user"),
A: The trip was amazing. I went to the Angkor Wat temple and it was beautiful.
message_to_chatml("Q: What national parks did I go to last year?", role="user"),
message_to_chatml(
f'["National park I visited in {last_new_year.strftime("%Y")} dt>=\\"{last_new_year.strftime("%Y-%m-%d")}\\" dt<\\"{current_new_year.strftime("%Y-%m-%d")}\\""]',
role="assistant",
),
message_to_chatml(
f'A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year.strftime("%Y")}.',
role="user",
),
Q: Who did i visit that temple with?
message_to_chatml("Q: How are you feeling?", role="user"),
message_to_chatml("[]", role="assistant"),
message_to_chatml("A: I'm feeling a little bored. Helping you will hopefully make me feel better!", role="user"),
["Who did I visit the Angkor Wat Temple in Cambodia with?"]
message_to_chatml("Q: How many tennis balls fit in the back of a 2002 Honda Civic?", role="user"),
message_to_chatml('["What is the size of a tennis ball?", "What is the trunk size of a 2002 Honda Civic?"]', role="assistant"),
message_to_chatml("A: 1085 tennis balls will fit in the trunk of a Honda Civic", role="user"),
A: You visited the Angkor Wat Temple in Cambodia with Pablo, Namita and Xi.
message_to_chatml("Q: Is Bob older than Tom?", role="user"),
message_to_chatml('["When was Bob born?", "What is Tom\'s age?"]', role="assistant"),
message_to_chatml("A: Yes, Bob is older than Tom. As Bob was born on 1984-01-01 and Tom is 30 years old.", role="user"),
Q: What national parks did I go to last year?
message_to_chatml("Q: What is their age difference?", role="user"),
message_to_chatml('["What is Bob\'s age?", "What is Tom\'s age?"]', role="assistant"),
message_to_chatml(
f"A: Bob is {current_new_year.year - 1984 - 30} years older than Tom. As Bob is {current_new_year.year - 1984} years old and Tom is 30 years old.",
role="user",
),
]
# fmt: on
["National park I visited in {last_new_year.strftime("%Y")} dt>=\\"{last_new_year.strftime("%Y-%m-%d")}\\" dt<\\"{current_new_year.strftime("%Y-%m-%d")}\\""]
# Add last few user messages, inferred queries and answer triplets from actual conversation for context
for chat in conversation_log.get("chat", [])[-4:]:
if chat["by"] == "khoj":
queries = (
chat["intent"]["inferred-queries"]
if chat["intent"].get("inferred-queries", "[]") != "[]"
else [chat["intent"]["query"]]
)
messages.extend(
[
message_to_chatml(f'Q: {chat["intent"]["query"]}', role="user"),
message_to_chatml(f"{queries}", role="assistant"),
message_to_chatml(f'{chat["message"]}', role="user"),
]
)
A: You visited the Grand Canyon and Yellowstone National Park in {last_new_year.strftime("%Y")}.
# Finally add current user message for which to infer search queries to ChatML message list
messages.append(message_to_chatml(f"Q: {message}", role="user"))
Q: How are you feeling today?
[]
A: I'm feeling a little bored. Helping you will hopefully make me feel better!
Q: How many tennis balls fit in the back of a 2002 Honda Civic?
["What is the size of a tennis ball?", "What is the trunk size of a 2002 Honda Civic?"]
A: 1085 tennis balls will fit in the trunk of a Honda Civic
Q: Is Bob older than Tom?
["When was Bob born?", "What is Tom's age?"]
A: Yes, Bob is older than Tom. As Bob was born on 1984-01-01 and Tom is 30 years old.
Q: What is their age difference?
["What is Bob's age?", "What is Tom's age?"]
A: Bob is {current_new_year.year - 1984 - 30} years older than Tom. As Bob is {current_new_year.year - 1984} years old and Tom is 30 years old.
{chat_history}
Q: {text}
"""
# Get Response from GPT
response = openai.ChatCompletion.create(
messages=messages,
model=model,
temperature=temperature,
response = openai.Completion.create(
prompt=prompt, model=model, temperature=temperature, max_tokens=max_tokens, stop=["A: ", "\n"]
)
# Extract, Clean Message from GPT's Response
response_text = response["choices"][0]["message"]["content"]
response_text = response["choices"][0]["text"]
try:
questions = json.loads(
# Clean response to increase likelihood of valid JSON. E.g replace ' with " to enclose strings
@ -189,7 +173,7 @@ Current Date: {today.strftime("%H:%M %A %Y-%m-%d")}"""
.replace("', '", '", "')
)
except json.decoder.JSONDecodeError:
logger.warn(f"GPT returned invalid JSON. Set question to empty list.\n{response_text}")
logger.warn(f"GPT returned invalid JSON. Falling back to using user message as search query.\n{response_text}")
questions = [text]
logger.debug(f"Extracted Questions by GPT: {questions}")
return questions

View file

@ -191,6 +191,7 @@ def update(t: Optional[SearchType] = None, force: Optional[bool] = False):
def chat(q: Optional[str] = None):
# Initialize Variables
api_key = state.processor_config.conversation.openai_api_key
model = state.processor_config.conversation.model
# Load Conversation History
chat_session = state.processor_config.conversation.chat_session
@ -204,7 +205,7 @@ def chat(q: Optional[str] = None):
return {"status": "ok", "response": []}
# Infer search queries from user message
inferred_queries = extract_questions(q, api_key=api_key, conversation_log=meta_log)
inferred_queries = extract_questions(q, model=model, api_key=api_key, conversation_log=meta_log)
# Collate search results as context for GPT
result_list = []