mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 08:04:21 +00:00
add a lock around chat operations to prevent the offline model from getting bombarded and stealing a bunch of compute resources
- This also solves #367
This commit is contained in:
parent
6c3074061b
commit
1c52a6993f
2 changed files with 13 additions and 3 deletions
|
@ -10,6 +10,7 @@ from gpt4all import GPT4All
|
|||
from khoj.processor.conversation.utils import ThreadedGenerator, generate_chatml_messages_with_context
|
||||
from khoj.processor.conversation import prompts
|
||||
from khoj.utils.constants import empty_escape_sequences
|
||||
from khoj.utils import state
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -58,7 +59,11 @@ def extract_questions_offline(
|
|||
next_christmas_date=next_christmas_date,
|
||||
)
|
||||
message = system_prompt + example_questions
|
||||
response = gpt4all_model.generate(message, max_tokens=200, top_k=2, temp=0, n_batch=128)
|
||||
state.chat_lock.acquire()
|
||||
try:
|
||||
response = gpt4all_model.generate(message, max_tokens=200, top_k=2, temp=0, n_batch=128)
|
||||
finally:
|
||||
state.chat_lock.release()
|
||||
|
||||
# Extract, Clean Message from GPT's Response
|
||||
try:
|
||||
|
@ -162,6 +167,10 @@ def llm_thread(g, messages: List[ChatMessage], model: GPT4All):
|
|||
templated_user_message = prompts.general_conversation_llamav2.format(query=user_message.content)
|
||||
prompted_message = templated_system_message + chat_history + templated_user_message
|
||||
response_iterator = model.generate(prompted_message, streaming=True, max_tokens=1000, n_batch=256)
|
||||
for response in response_iterator:
|
||||
g.send(response)
|
||||
state.chat_lock.acquire()
|
||||
try:
|
||||
for response in response_iterator:
|
||||
g.send(response)
|
||||
finally:
|
||||
state.chat_lock.release()
|
||||
g.close()
|
||||
|
|
|
@ -25,6 +25,7 @@ port: int = None
|
|||
cli_args: List[str] = None
|
||||
query_cache = LRU()
|
||||
config_lock = threading.Lock()
|
||||
chat_lock = threading.Lock()
|
||||
SearchType = utils_config.SearchType
|
||||
telemetry: List[Dict[str, str]] = []
|
||||
previous_query: str = None
|
||||
|
|
Loading…
Add table
Reference in a new issue