mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Scale no. of chat history messages to use as context with max_prompt_size
Previously lookback turns was set to a static 2. But now that we support more chat models, their prompt size vary considerably. Make lookback_turns proportional to max_prompt_size. The truncate_messages can remove messages if they exceed max_prompt_size later This lets Khoj pass more of the chat history as context for models with larger context window
This commit is contained in:
parent
90e1d9e3d6
commit
644c3b787f
1 changed files with 13 additions and 11 deletions
|
@ -3,6 +3,7 @@ import logging
|
||||||
from time import perf_counter
|
from time import perf_counter
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
import queue
|
||||||
import tiktoken
|
import tiktoken
|
||||||
|
|
||||||
# External packages
|
# External packages
|
||||||
|
@ -10,7 +11,6 @@ from langchain.schema import ChatMessage
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
# Internal Packages
|
# Internal Packages
|
||||||
import queue
|
|
||||||
from khoj.utils.helpers import merge_dicts
|
from khoj.utils.helpers import merge_dicts
|
||||||
|
|
||||||
|
|
||||||
|
@ -89,11 +89,22 @@ def generate_chatml_messages_with_context(
|
||||||
system_message,
|
system_message,
|
||||||
conversation_log={},
|
conversation_log={},
|
||||||
model_name="gpt-3.5-turbo",
|
model_name="gpt-3.5-turbo",
|
||||||
lookback_turns=2,
|
|
||||||
max_prompt_size=None,
|
max_prompt_size=None,
|
||||||
tokenizer_name=None,
|
tokenizer_name=None,
|
||||||
):
|
):
|
||||||
"""Generate messages for ChatGPT with context from previous conversation"""
|
"""Generate messages for ChatGPT with context from previous conversation"""
|
||||||
|
# Set max prompt size from user config, pre-configured for model or to default prompt size
|
||||||
|
try:
|
||||||
|
max_prompt_size = max_prompt_size or model_to_prompt_size[model_name]
|
||||||
|
except:
|
||||||
|
max_prompt_size = 2000
|
||||||
|
logger.warning(
|
||||||
|
f"Fallback to default prompt size: {max_prompt_size}.\nConfigure max_prompt_size for unsupported model: {model_name} in Khoj settings to longer context window."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Scale lookback turns proportional to max prompt size supported by model
|
||||||
|
lookback_turns = max_prompt_size // 750
|
||||||
|
|
||||||
# Extract Chat History for Context
|
# Extract Chat History for Context
|
||||||
chat_logs = []
|
chat_logs = []
|
||||||
for chat in conversation_log.get("chat", []):
|
for chat in conversation_log.get("chat", []):
|
||||||
|
@ -113,15 +124,6 @@ def generate_chatml_messages_with_context(
|
||||||
|
|
||||||
messages = user_chatml_message + rest_backnforths + system_chatml_message
|
messages = user_chatml_message + rest_backnforths + system_chatml_message
|
||||||
|
|
||||||
# Set max prompt size from user config, pre-configured for model or to default prompt size
|
|
||||||
try:
|
|
||||||
max_prompt_size = max_prompt_size or model_to_prompt_size[model_name]
|
|
||||||
except:
|
|
||||||
max_prompt_size = 2000
|
|
||||||
logger.warning(
|
|
||||||
f"Fallback to default prompt size: {max_prompt_size}.\nConfigure max_prompt_size for unsupported model: {model_name} in Khoj settings to longer context window."
|
|
||||||
)
|
|
||||||
|
|
||||||
# Truncate oldest messages from conversation history until under max supported prompt size by model
|
# Truncate oldest messages from conversation history until under max supported prompt size by model
|
||||||
messages = truncate_messages(messages, max_prompt_size, model_name, tokenizer_name)
|
messages = truncate_messages(messages, max_prompt_size, model_name, tokenizer_name)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue