mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Truncate message logs to below max supported prompt size by model
- Use tiktoken to count tokens for chat models - Make conversation turns to add to prompt configurable via method argument to generate_chatml_messages_with_context method
This commit is contained in:
parent
4725416fbd
commit
7e36f421f9
4 changed files with 30 additions and 6 deletions
|
@ -41,6 +41,7 @@ dependencies = [
|
|||
"fastapi == 0.77.1",
|
||||
"jinja2 == 3.1.2",
|
||||
"openai >= 0.27.0",
|
||||
"tiktoken >= 0.3.0",
|
||||
"pillow == 9.3.0",
|
||||
"pydantic == 1.9.1",
|
||||
"pyqt6 == 6.3.1",
|
||||
|
|
|
@ -247,6 +247,7 @@ Question: {user_query}"""
|
|||
conversation_primer,
|
||||
personality_primer,
|
||||
conversation_log,
|
||||
model,
|
||||
)
|
||||
|
||||
# Get Response from GPT
|
||||
|
|
|
@ -1,22 +1,44 @@
|
|||
# Standard Packages
|
||||
from datetime import datetime
|
||||
|
||||
# External Packages
|
||||
import tiktoken
|
||||
|
||||
# Internal Packages
|
||||
from khoj.utils.helpers import merge_dicts
|
||||
|
||||
|
||||
def generate_chatml_messages_with_context(user_message, system_message, conversation_log={}):
|
||||
max_prompt_size = {"gpt-3.5-turbo": 4096, "gpt-4": 8192}
|
||||
|
||||
|
||||
def generate_chatml_messages_with_context(
|
||||
user_message, system_message, conversation_log={}, model_name="gpt-3.5-turbo", lookback_turns=2
|
||||
):
|
||||
"""Generate messages for ChatGPT with context from previous conversation"""
|
||||
# Extract Chat History for Context
|
||||
chat_logs = [f'{chat["message"]}\n\nNotes:\n{chat.get("context","")}' for chat in conversation_log.get("chat", [])]
|
||||
last_backnforth = reciprocal_conversation_to_chatml(chat_logs[-2:])
|
||||
rest_backnforth = reciprocal_conversation_to_chatml(chat_logs[-4:-2])
|
||||
rest_backnforths = []
|
||||
# Extract in reverse chronological order
|
||||
for user_msg, assistant_msg in zip(chat_logs[-2::-2], chat_logs[::-2]):
|
||||
if len(rest_backnforths) >= 2 * lookback_turns:
|
||||
break
|
||||
rest_backnforths += reciprocal_conversation_to_chatml([user_msg, assistant_msg])[::-1]
|
||||
|
||||
# Format user and system messages to chatml format
|
||||
system_chatml_message = [message_to_chatml(system_message, "system")]
|
||||
user_chatml_message = [message_to_chatml(user_message, "user")]
|
||||
|
||||
return rest_backnforth + system_chatml_message + last_backnforth + user_chatml_message
|
||||
messages = user_chatml_message + rest_backnforths[:2] + system_chatml_message + rest_backnforths[2:]
|
||||
|
||||
# Truncate oldest messages from conversation history until under max supported prompt size by model
|
||||
encoder = tiktoken.encoding_for_model(model_name)
|
||||
tokens = sum([len(encoder.encode(value)) for message in messages for value in message.values()])
|
||||
while tokens > max_prompt_size[model_name]:
|
||||
messages.pop()
|
||||
tokens = sum([len(encoder.encode(value)) for message in messages for value in message.values()])
|
||||
|
||||
# Return message in chronological order
|
||||
return messages[::-1]
|
||||
|
||||
|
||||
def reciprocal_conversation_to_chatml(message_pair):
|
||||
|
|
|
@ -47,7 +47,7 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client):
|
|||
expected_responses = ["Khoj", "khoj"]
|
||||
assert response.status_code == 200
|
||||
assert any([expected_response in response_message for expected_response in expected_responses]), (
|
||||
"Expected assistants name, [K|k]hoj, in response but got" + response_message
|
||||
"Expected assistants name, [K|k]hoj, in response but got: " + response_message
|
||||
)
|
||||
|
||||
|
||||
|
@ -69,7 +69,7 @@ def test_answer_from_chat_history(chat_client):
|
|||
expected_responses = ["Testatron", "testatron"]
|
||||
assert response.status_code == 200
|
||||
assert any([expected_response in response_message for expected_response in expected_responses]), (
|
||||
"Expected [T|t]estatron in response but got" + response_message
|
||||
"Expected [T|t]estatron in response but got: " + response_message
|
||||
)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue