From b0ee78586cc9c972734953ac89e81d423c228fcb Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Thu, 18 Jul 2024 02:39:56 +0530 Subject: [PATCH] Improve offline chat truncation to consider message separator tokens --- src/khoj/processor/conversation/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/khoj/processor/conversation/utils.py b/src/khoj/processor/conversation/utils.py index c005dde7..797066d7 100644 --- a/src/khoj/processor/conversation/utils.py +++ b/src/khoj/processor/conversation/utils.py @@ -186,7 +186,7 @@ def generate_chatml_messages_with_context( def truncate_messages( messages: list[ChatMessage], - max_prompt_size, + max_prompt_size: int, model_name: str, loaded_model: Optional[Llama] = None, tokenizer_name=None, @@ -232,7 +232,8 @@ def truncate_messages( tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str]) # Drop older messages until under max supported prompt size by model - while (tokens + system_message_tokens) > max_prompt_size and len(messages) > 1: + # Reserves 4 tokens to demarcate each message (e.g <|im_start|>user, <|im_end|>, <|endoftext|> etc.) + while (tokens + system_message_tokens + 4 * len(messages)) > max_prompt_size and len(messages) > 1: messages.pop() tokens = sum([len(encoder.encode(message.content)) for message in messages if type(message.content) == str])