mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Support passing temperature to offline chat model chat actors
- Use temperature of 0 by default for extract questions offline chat actor - Use temperature of 0.2 for send_message_to_model_offline (this is the default temperature set by llama.cpp)
This commit is contained in:
parent
878cc023a0
commit
3675938df6
1 changed files with 10 additions and 2 deletions
|
@ -32,6 +32,7 @@ def extract_questions_offline(
|
||||||
location_data: LocationData = None,
|
location_data: LocationData = None,
|
||||||
user: KhojUser = None,
|
user: KhojUser = None,
|
||||||
max_prompt_size: int = None,
|
max_prompt_size: int = None,
|
||||||
|
temperature: float = 0,
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Infer search queries to retrieve relevant notes to answer user query
|
Infer search queries to retrieve relevant notes to answer user query
|
||||||
|
@ -80,7 +81,11 @@ def extract_questions_offline(
|
||||||
state.chat_lock.acquire()
|
state.chat_lock.acquire()
|
||||||
try:
|
try:
|
||||||
response = send_message_to_model_offline(
|
response = send_message_to_model_offline(
|
||||||
messages, loaded_model=offline_chat_model, model=model, max_prompt_size=max_prompt_size
|
messages,
|
||||||
|
loaded_model=offline_chat_model,
|
||||||
|
model=model,
|
||||||
|
max_prompt_size=max_prompt_size,
|
||||||
|
temperature=temperature,
|
||||||
)
|
)
|
||||||
finally:
|
finally:
|
||||||
state.chat_lock.release()
|
state.chat_lock.release()
|
||||||
|
@ -232,6 +237,7 @@ def send_message_to_model_offline(
|
||||||
messages: List[ChatMessage],
|
messages: List[ChatMessage],
|
||||||
loaded_model=None,
|
loaded_model=None,
|
||||||
model="NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
|
model="NousResearch/Hermes-2-Pro-Mistral-7B-GGUF",
|
||||||
|
temperature: float = 0.2,
|
||||||
streaming=False,
|
streaming=False,
|
||||||
stop=[],
|
stop=[],
|
||||||
max_prompt_size: int = None,
|
max_prompt_size: int = None,
|
||||||
|
@ -239,7 +245,9 @@ def send_message_to_model_offline(
|
||||||
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
assert loaded_model is None or isinstance(loaded_model, Llama), "loaded_model must be of type Llama, if configured"
|
||||||
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
offline_chat_model = loaded_model or download_model(model, max_tokens=max_prompt_size)
|
||||||
messages_dict = [{"role": message.role, "content": message.content} for message in messages]
|
messages_dict = [{"role": message.role, "content": message.content} for message in messages]
|
||||||
response = offline_chat_model.create_chat_completion(messages_dict, stop=stop, stream=streaming)
|
response = offline_chat_model.create_chat_completion(
|
||||||
|
messages_dict, stop=stop, stream=streaming, temperature=temperature
|
||||||
|
)
|
||||||
if streaming:
|
if streaming:
|
||||||
return response
|
return response
|
||||||
else:
|
else:
|
||||||
|
|
Loading…
Reference in a new issue