From 3675938df6cc67ff1e738d73b3f48576a581546c Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Tue, 13 Aug 2024 12:51:25 +0530 Subject: [PATCH] Support passing temperature to offline chat model chat actors - Use temperature of 0 by default for extract questions offline chat actor - Use temperature of 0.2 for send_message_to_model_offline (this is the default temperature set by llama.cpp) --- .../processor/conversation/offline/chat_model.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/khoj/processor/conversation/offline/chat_model.py b/src/khoj/processor/conversation/offline/chat_model.py index 2fabddc7..0244480f 100644 --- a/src/khoj/processor/conversation/offline/chat_model.py +++ b/src/khoj/processor/conversation/offline/chat_model.py @@ -32,6 +32,7 @@ def extract_questions_offline( location_data: LocationData = None, user: KhojUser = None, max_prompt_size: int = None, + temperature: float = 0, ) -> List[str]: """ Infer search queries to retrieve relevant notes to answer user query @@ -80,7 +81,11 @@ def extract_questions_offline( state.chat_lock.acquire() try: response = send_message_to_model_offline( - messages, loaded_model=offline_chat_model, model=model, max_prompt_size=max_prompt_size + messages, + loaded_model=offline_chat_model, + model=model, + max_prompt_size=max_prompt_size, + temperature=temperature, ) finally: state.chat_lock.release() @@ -232,6 +237,7 @@ def send_message_to_model_offline( messages: List[ChatMessage], loaded_model=None, model="NousResearch/Hermes-2-Pro-Mistral-7B-GGUF", + temperature: float = 0.2, streaming=False, stop=[], max_prompt_size: int = None, @@ -239,7 +245,9 @@ def send_message_to_model_offline( assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured" offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size) messages_dict = [{"role": message.role, "content": message.content} for message in messages] - response = offline_chat_model.create_chat_completion(messages_dict, stop=stop, stream=streaming) + response = offline_chat_model.create_chat_completion( + messages_dict, stop=stop, stream=streaming, temperature=temperature + ) if streaming: return response else: