mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Update chat config schema. Make max_prompt, chat tokenizer configurable
This provides flexibility to use non 1st party supported chat models - Create migration script to update khoj.yml config - Put `enable_offline_chat' under new `offline-chat' section Referring code needs to be updated to accomodate this change - Move `offline_chat_model' to `chat-model' under new `offline-chat' section - Put chat `tokenizer` under new `offline-chat' section - Put `max_prompt' under existing `conversation' section As `max_prompt' size effects both openai and offline chat models
This commit is contained in:
parent
247e75595c
commit
feb4f17e3d
9 changed files with 119 additions and 24 deletions
|
@ -19,7 +19,7 @@ from khoj.utils.config import (
|
||||||
)
|
)
|
||||||
from khoj.utils.helpers import resolve_absolute_path, merge_dicts
|
from khoj.utils.helpers import resolve_absolute_path, merge_dicts
|
||||||
from khoj.utils.fs_syncer import collect_files
|
from khoj.utils.fs_syncer import collect_files
|
||||||
from khoj.utils.rawconfig import FullConfig, ProcessorConfig, ConversationProcessorConfig
|
from khoj.utils.rawconfig import FullConfig, OfflineChatProcessorConfig, ProcessorConfig, ConversationProcessorConfig
|
||||||
from khoj.routers.indexer import configure_content, load_content, configure_search
|
from khoj.routers.indexer import configure_content, load_content, configure_search
|
||||||
|
|
||||||
|
|
||||||
|
@ -168,9 +168,7 @@ def configure_conversation_processor(
|
||||||
conversation_config=ConversationProcessorConfig(
|
conversation_config=ConversationProcessorConfig(
|
||||||
conversation_logfile=conversation_logfile,
|
conversation_logfile=conversation_logfile,
|
||||||
openai=(conversation_config.openai if (conversation_config is not None) else None),
|
openai=(conversation_config.openai if (conversation_config is not None) else None),
|
||||||
enable_offline_chat=(
|
offline_chat=conversation_config.offline_chat if conversation_config else OfflineChatProcessorConfig(),
|
||||||
conversation_config.enable_offline_chat if (conversation_config is not None) else False
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -236,7 +236,7 @@
|
||||||
</h3>
|
</h3>
|
||||||
</div>
|
</div>
|
||||||
<div class="card-description-row">
|
<div class="card-description-row">
|
||||||
<p class="card-description">Setup chat using OpenAI</p>
|
<p class="card-description">Setup online chat using OpenAI</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="card-action-row">
|
<div class="card-action-row">
|
||||||
<a class="card-button" href="/config/processor/conversation/openai">
|
<a class="card-button" href="/config/processor/conversation/openai">
|
||||||
|
@ -261,21 +261,21 @@
|
||||||
<img class="card-icon" src="/static/assets/icons/chat.svg" alt="Chat">
|
<img class="card-icon" src="/static/assets/icons/chat.svg" alt="Chat">
|
||||||
<h3 class="card-title">
|
<h3 class="card-title">
|
||||||
Offline Chat
|
Offline Chat
|
||||||
<img id="configured-icon-conversation-enable-offline-chat" class="configured-icon {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat and current_model_state.conversation_gpt4all %}enabled{% else %}disabled{% endif %}" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
|
<img id="configured-icon-conversation-enable-offline-chat" class="configured-icon {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat and current_model_state.conversation_gpt4all %}enabled{% else %}disabled{% endif %}" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
|
||||||
{% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat and not current_model_state.conversation_gpt4all %}
|
{% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat and not current_model_state.conversation_gpt4all %}
|
||||||
<img id="misconfigured-icon-conversation-enable-offline-chat" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="The model was not downloaded as expected.">
|
<img id="misconfigured-icon-conversation-enable-offline-chat" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="The model was not downloaded as expected.">
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</h3>
|
</h3>
|
||||||
</div>
|
</div>
|
||||||
<div class="card-description-row">
|
<div class="card-description-row">
|
||||||
<p class="card-description">Setup offline chat (Llama V2)</p>
|
<p class="card-description">Setup offline chat</p>
|
||||||
</div>
|
</div>
|
||||||
<div id="clear-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat %}enabled{% else %}disabled{% endif %}">
|
<div id="clear-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat %}enabled{% else %}disabled{% endif %}">
|
||||||
<button class="card-button" onclick="toggleEnableLocalLLLM(false)">
|
<button class="card-button" onclick="toggleEnableLocalLLLM(false)">
|
||||||
Disable
|
Disable
|
||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
<div id="set-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat %}disabled{% else %}enabled{% endif %}">
|
<div id="set-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat %}disabled{% else %}enabled{% endif %}">
|
||||||
<button class="card-button happy" onclick="toggleEnableLocalLLLM(true)">
|
<button class="card-button happy" onclick="toggleEnableLocalLLLM(true)">
|
||||||
Enable
|
Enable
|
||||||
</button>
|
</button>
|
||||||
|
@ -346,7 +346,7 @@
|
||||||
featuresHintText.classList.add("show");
|
featuresHintText.classList.add("show");
|
||||||
}
|
}
|
||||||
|
|
||||||
fetch('/api/config/data/processor/conversation/enable_offline_chat' + '?enable_offline_chat=' + enable, {
|
fetch('/api/config/data/processor/conversation/offline_chat' + '?enable_offline_chat=' + enable, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
headers: {
|
headers: {
|
||||||
'Content-Type': 'application/json',
|
'Content-Type': 'application/json',
|
||||||
|
|
83
src/khoj/migrations/migrate_offline_chat_schema.py
Normal file
83
src/khoj/migrations/migrate_offline_chat_schema.py
Normal file
|
@ -0,0 +1,83 @@
|
||||||
|
"""
|
||||||
|
Current format of khoj.yml
|
||||||
|
---
|
||||||
|
app:
|
||||||
|
...
|
||||||
|
content-type:
|
||||||
|
...
|
||||||
|
processor:
|
||||||
|
conversation:
|
||||||
|
enable-offline-chat: false
|
||||||
|
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
||||||
|
openai:
|
||||||
|
...
|
||||||
|
search-type:
|
||||||
|
...
|
||||||
|
|
||||||
|
New format of khoj.yml
|
||||||
|
---
|
||||||
|
app:
|
||||||
|
...
|
||||||
|
content-type:
|
||||||
|
...
|
||||||
|
processor:
|
||||||
|
conversation:
|
||||||
|
offline-chat:
|
||||||
|
enable-offline-chat: false
|
||||||
|
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
||||||
|
tokenizer: null
|
||||||
|
max_prompt_size: null
|
||||||
|
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
||||||
|
openai:
|
||||||
|
...
|
||||||
|
search-type:
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
|
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def migrate_offline_chat_schema(args):
|
||||||
|
schema_version = "0.12.3"
|
||||||
|
raw_config = load_config_from_file(args.config_file)
|
||||||
|
previous_version = raw_config.get("version")
|
||||||
|
|
||||||
|
if "processor" not in raw_config:
|
||||||
|
return args
|
||||||
|
if raw_config["processor"] is None:
|
||||||
|
return args
|
||||||
|
if "conversation" not in raw_config["processor"]:
|
||||||
|
return args
|
||||||
|
|
||||||
|
if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
|
||||||
|
logger.info(
|
||||||
|
f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
|
||||||
|
)
|
||||||
|
raw_config["version"] = schema_version
|
||||||
|
|
||||||
|
# Create max-prompt-size field in conversation processor schema
|
||||||
|
raw_config["processor"]["conversation"]["max-prompt-size"] = None
|
||||||
|
raw_config["processor"]["conversation"]["tokenizer"] = None
|
||||||
|
|
||||||
|
# Create offline chat schema based on existing enable_offline_chat field in khoj config schema
|
||||||
|
offline_chat_model = (
|
||||||
|
raw_config["processor"]["conversation"]
|
||||||
|
.get("offline-chat", {})
|
||||||
|
.get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
|
||||||
|
)
|
||||||
|
raw_config["processor"]["conversation"]["offline-chat"] = {
|
||||||
|
"enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
|
||||||
|
"chat-model": offline_chat_model,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Delete old enable-offline-chat field from conversation processor schema
|
||||||
|
if "enable-offline-chat" in raw_config["processor"]["conversation"]:
|
||||||
|
del raw_config["processor"]["conversation"]["enable-offline-chat"]
|
||||||
|
|
||||||
|
save_config_to_file(raw_config, args.config_file)
|
||||||
|
return args
|
|
@ -284,7 +284,7 @@ if not state.demo:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return {"status": "error", "message": str(e)}
|
return {"status": "error", "message": str(e)}
|
||||||
|
|
||||||
@api.post("/config/data/processor/conversation/enable_offline_chat", status_code=200)
|
@api.post("/config/data/processor/conversation/offline_chat", status_code=200)
|
||||||
async def set_processor_enable_offline_chat_config_data(
|
async def set_processor_enable_offline_chat_config_data(
|
||||||
request: Request,
|
request: Request,
|
||||||
enable_offline_chat: bool,
|
enable_offline_chat: bool,
|
||||||
|
@ -301,7 +301,7 @@ if not state.demo:
|
||||||
state.config.processor = ProcessorConfig(conversation=ConversationProcessorConfig(conversation_logfile=conversation_logfile)) # type: ignore
|
state.config.processor = ProcessorConfig(conversation=ConversationProcessorConfig(conversation_logfile=conversation_logfile)) # type: ignore
|
||||||
|
|
||||||
assert state.config.processor.conversation is not None
|
assert state.config.processor.conversation is not None
|
||||||
state.config.processor.conversation.enable_offline_chat = enable_offline_chat
|
state.config.processor.conversation.offline_chat.enable_offline_chat = enable_offline_chat
|
||||||
state.processor_config = configure_processor(state.config.processor, state.processor_config)
|
state.processor_config = configure_processor(state.config.processor, state.processor_config)
|
||||||
|
|
||||||
update_telemetry_state(
|
update_telemetry_state(
|
||||||
|
@ -707,7 +707,7 @@ async def chat(
|
||||||
)
|
)
|
||||||
conversation_command = get_conversation_command(query=q, any_references=not is_none_or_empty(compiled_references))
|
conversation_command = get_conversation_command(query=q, any_references=not is_none_or_empty(compiled_references))
|
||||||
if conversation_command == ConversationCommand.Help:
|
if conversation_command == ConversationCommand.Help:
|
||||||
model_type = "offline" if state.processor_config.conversation.enable_offline_chat else "openai"
|
model_type = "offline" if state.processor_config.conversation.offline_chat.enable_offline_chat else "openai"
|
||||||
formatted_help = help_message.format(model=model_type, version=state.khoj_version)
|
formatted_help = help_message.format(model=model_type, version=state.khoj_version)
|
||||||
return StreamingResponse(iter([formatted_help]), media_type="text/event-stream", status_code=200)
|
return StreamingResponse(iter([formatted_help]), media_type="text/event-stream", status_code=200)
|
||||||
|
|
||||||
|
@ -784,7 +784,7 @@ async def extract_references_and_questions(
|
||||||
# Infer search queries from user message
|
# Infer search queries from user message
|
||||||
with timer("Extracting search queries took", logger):
|
with timer("Extracting search queries took", logger):
|
||||||
# If we've reached here, either the user has enabled offline chat or the openai model is enabled.
|
# If we've reached here, either the user has enabled offline chat or the openai model is enabled.
|
||||||
if state.processor_config.conversation.enable_offline_chat:
|
if state.processor_config.conversation.offline_chat.enable_offline_chat:
|
||||||
loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
|
loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
|
||||||
inferred_queries = extract_questions_offline(
|
inferred_queries = extract_questions_offline(
|
||||||
defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False
|
defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False
|
||||||
|
@ -800,7 +800,7 @@ async def extract_references_and_questions(
|
||||||
with timer("Searching knowledge base took", logger):
|
with timer("Searching knowledge base took", logger):
|
||||||
result_list = []
|
result_list = []
|
||||||
for query in inferred_queries:
|
for query in inferred_queries:
|
||||||
n_items = min(n, 3) if state.processor_config.conversation.enable_offline_chat else n
|
n_items = min(n, 3) if state.processor_config.conversation.offline_chat.enable_offline_chat else n
|
||||||
result_list.extend(
|
result_list.extend(
|
||||||
await search(
|
await search(
|
||||||
f"{query} {filters_in_query}",
|
f"{query} {filters_in_query}",
|
||||||
|
|
|
@ -113,7 +113,7 @@ def generate_chat_response(
|
||||||
meta_log=meta_log,
|
meta_log=meta_log,
|
||||||
)
|
)
|
||||||
|
|
||||||
if state.processor_config.conversation.enable_offline_chat:
|
if state.processor_config.conversation.offline_chat.enable_offline_chat:
|
||||||
loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
|
loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
|
||||||
chat_response = converse_offline(
|
chat_response = converse_offline(
|
||||||
references=compiled_references,
|
references=compiled_references,
|
||||||
|
|
|
@ -9,6 +9,7 @@ from khoj.utils.yaml import parse_config_from_file
|
||||||
from khoj.migrations.migrate_version import migrate_config_to_version
|
from khoj.migrations.migrate_version import migrate_config_to_version
|
||||||
from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
|
from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
|
||||||
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
||||||
|
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
||||||
|
|
||||||
|
|
||||||
def cli(args=None):
|
def cli(args=None):
|
||||||
|
@ -55,7 +56,12 @@ def cli(args=None):
|
||||||
|
|
||||||
|
|
||||||
def run_migrations(args):
|
def run_migrations(args):
|
||||||
migrations = [migrate_config_to_version, migrate_processor_conversation_schema, migrate_offline_model]
|
migrations = [
|
||||||
|
migrate_config_to_version,
|
||||||
|
migrate_processor_conversation_schema,
|
||||||
|
migrate_offline_model,
|
||||||
|
migrate_offline_chat_schema,
|
||||||
|
]
|
||||||
for migration in migrations:
|
for migration in migrations:
|
||||||
args = migration(args)
|
args = migration(args)
|
||||||
return args
|
return args
|
||||||
|
|
|
@ -96,18 +96,18 @@ class ConversationProcessorConfigModel:
|
||||||
self.openai_model = conversation_config.openai
|
self.openai_model = conversation_config.openai
|
||||||
self.gpt4all_model = GPT4AllProcessorConfig()
|
self.gpt4all_model = GPT4AllProcessorConfig()
|
||||||
self.gpt4all_model.chat_model = conversation_config.offline_chat_model
|
self.gpt4all_model.chat_model = conversation_config.offline_chat_model
|
||||||
self.enable_offline_chat = conversation_config.enable_offline_chat
|
self.offline_chat = conversation_config.offline_chat
|
||||||
self.conversation_logfile = Path(conversation_config.conversation_logfile)
|
self.conversation_logfile = Path(conversation_config.conversation_logfile)
|
||||||
self.chat_session: List[str] = []
|
self.chat_session: List[str] = []
|
||||||
self.meta_log: dict = {}
|
self.meta_log: dict = {}
|
||||||
|
|
||||||
if self.enable_offline_chat:
|
if self.offline_chat.enable_offline_chat:
|
||||||
try:
|
try:
|
||||||
self.gpt4all_model.loaded_model = download_model(self.gpt4all_model.chat_model)
|
self.gpt4all_model.loaded_model = download_model(self.gpt4all_model.chat_model)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
|
self.offline_chat.enable_offline_chat = False
|
||||||
self.gpt4all_model.loaded_model = None
|
self.gpt4all_model.loaded_model = None
|
||||||
logger.error(f"Error while loading offline chat model: {e}", exc_info=True)
|
logger.error(f"Error while loading offline chat model: {e}", exc_info=True)
|
||||||
self.enable_offline_chat = False
|
|
||||||
else:
|
else:
|
||||||
self.gpt4all_model.loaded_model = None
|
self.gpt4all_model.loaded_model = None
|
||||||
|
|
||||||
|
|
|
@ -91,11 +91,17 @@ class OpenAIProcessorConfig(ConfigBase):
|
||||||
chat_model: Optional[str] = "gpt-3.5-turbo"
|
chat_model: Optional[str] = "gpt-3.5-turbo"
|
||||||
|
|
||||||
|
|
||||||
|
class OfflineChatProcessorConfig(ConfigBase):
|
||||||
|
enable_offline_chat: Optional[bool] = False
|
||||||
|
chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
||||||
|
|
||||||
|
|
||||||
class ConversationProcessorConfig(ConfigBase):
|
class ConversationProcessorConfig(ConfigBase):
|
||||||
conversation_logfile: Path
|
conversation_logfile: Path
|
||||||
openai: Optional[OpenAIProcessorConfig]
|
openai: Optional[OpenAIProcessorConfig]
|
||||||
enable_offline_chat: Optional[bool] = False
|
offline_chat: Optional[OfflineChatProcessorConfig]
|
||||||
offline_chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
max_prompt_size: Optional[int]
|
||||||
|
tokenizer: Optional[str]
|
||||||
|
|
||||||
|
|
||||||
class ProcessorConfig(ConfigBase):
|
class ProcessorConfig(ConfigBase):
|
||||||
|
|
|
@ -16,6 +16,7 @@ from khoj.utils.helpers import resolve_absolute_path
|
||||||
from khoj.utils.rawconfig import (
|
from khoj.utils.rawconfig import (
|
||||||
ContentConfig,
|
ContentConfig,
|
||||||
ConversationProcessorConfig,
|
ConversationProcessorConfig,
|
||||||
|
OfflineChatProcessorConfig,
|
||||||
OpenAIProcessorConfig,
|
OpenAIProcessorConfig,
|
||||||
ProcessorConfig,
|
ProcessorConfig,
|
||||||
TextContentConfig,
|
TextContentConfig,
|
||||||
|
@ -205,8 +206,9 @@ def processor_config_offline_chat(tmp_path_factory):
|
||||||
|
|
||||||
# Setup conversation processor
|
# Setup conversation processor
|
||||||
processor_config = ProcessorConfig()
|
processor_config = ProcessorConfig()
|
||||||
|
offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True)
|
||||||
processor_config.conversation = ConversationProcessorConfig(
|
processor_config.conversation = ConversationProcessorConfig(
|
||||||
enable_offline_chat=True,
|
offline_chat=offline_chat,
|
||||||
conversation_logfile=processor_dir.joinpath("conversation_logs.json"),
|
conversation_logfile=processor_dir.joinpath("conversation_logs.json"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue