diff --git a/pyproject.toml b/pyproject.toml index 8df96e4f..06338201 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,6 +41,7 @@ dependencies = [ "fastapi == 0.77.1", "jinja2 == 3.1.2", "openai >= 0.27.0", + "tiktoken >= 0.3.0", "pillow == 9.3.0", "pydantic == 1.9.1", "pyqt6 == 6.3.1", diff --git a/src/khoj/processor/conversation/gpt.py b/src/khoj/processor/conversation/gpt.py index 14f02946..cfb236e5 100644 --- a/src/khoj/processor/conversation/gpt.py +++ b/src/khoj/processor/conversation/gpt.py @@ -247,6 +247,7 @@ Question: {user_query}""" conversation_primer, personality_primer, conversation_log, + model, ) # Get Response from GPT diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index 69f47aff..b8728b6a 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -1,22 +1,44 @@ # Standard Packages from datetime import datetime +# External Packages +import tiktoken + # Internal Packages from khoj.utils.helpers import merge_dicts -def generate_chatml_messages_with_context(user_message, system_message, conversation_log={}): +max_prompt_size = {"gpt-3.5-turbo": 4096, "gpt-4": 8192} + + +def generate_chatml_messages_with_context( + user_message, system_message, conversation_log={}, model_name="gpt-3.5-turbo", lookback_turns=2 +): """Generate messages for ChatGPT with context from previous conversation""" # Extract Chat History for Context chat_logs = [f'{chat["message"]}\n\nNotes:\n{chat.get("context","")}' for chat in conversation_log.get("chat", [])] - last_backnforth = reciprocal_conversation_to_chatml(chat_logs[-2:]) - rest_backnforth = reciprocal_conversation_to_chatml(chat_logs[-4:-2]) + rest_backnforths = [] + # Extract in reverse chronological order + for user_msg, assistant_msg in zip(chat_logs[-2::-2], chat_logs[::-2]): + if len(rest_backnforths) >= 2 * lookback_turns: + break + rest_backnforths += reciprocal_conversation_to_chatml([user_msg, assistant_msg])[::-1] # Format user and system messages to chatml format system_chatml_message = [message_to_chatml(system_message, "system")] user_chatml_message = [message_to_chatml(user_message, "user")] - return rest_backnforth + system_chatml_message + last_backnforth + user_chatml_message + messages = user_chatml_message + rest_backnforths[:2] + system_chatml_message + rest_backnforths[2:] + + # Truncate oldest messages from conversation history until under max supported prompt size by model + encoder = tiktoken.encoding_for_model(model_name) + tokens = sum([len(encoder.encode(value)) for message in messages for value in message.values()]) + while tokens > max_prompt_size[model_name]: + messages.pop() + tokens = sum([len(encoder.encode(value)) for message in messages for value in message.values()]) + + # Return message in chronological order + return messages[::-1] def reciprocal_conversation_to_chatml(message_pair): diff --git a/tests/test_chat_director.py b/tests/test_chat_director.py index 44fd29be..5feaa3f3 100644 --- a/tests/test_chat_director.py +++ b/tests/test_chat_director.py @@ -47,7 +47,7 @@ def test_chat_with_no_chat_history_or_retrieved_content(chat_client): expected_responses = ["Khoj", "khoj"] assert response.status_code == 200 assert any([expected_response in response_message for expected_response in expected_responses]), ( - "Expected assistants name, [K|k]hoj, in response but got" + response_message + "Expected assistants name, [K|k]hoj, in response but got: " + response_message ) @@ -69,7 +69,7 @@ def test_answer_from_chat_history(chat_client): expected_responses = ["Testatron", "testatron"] assert response.status_code == 200 assert any([expected_response in response_message for expected_response in expected_responses]), ( - "Expected [T|t]estatron in response but got" + response_message + "Expected [T|t]estatron in response but got: " + response_message )