From feb4f17e3d3e8aaabcf5a41c3be4f9d1914ec5b8 Mon Sep 17 00:00:00 2001 From: Debanjum Singh Solanky Date: Sun, 15 Oct 2023 14:19:29 -0700 Subject: [PATCH] Update chat config schema. Make max_prompt, chat tokenizer configurable This provides flexibility to use non 1st party supported chat models - Create migration script to update khoj.yml config - Put `enable_offline_chat' under new `offline-chat' section Referring code needs to be updated to accomodate this change - Move `offline_chat_model' to `chat-model' under new `offline-chat' section - Put chat `tokenizer` under new `offline-chat' section - Put `max_prompt' under existing `conversation' section As `max_prompt' size effects both openai and offline chat models --- src/khoj/configure.py | 6 +- src/khoj/interface/web/config.html | 14 ++-- .../migrations/migrate_offline_chat_schema.py | 83 +++++++++++++++++++ src/khoj/routers/api.py | 10 +-- src/khoj/routers/helpers.py | 2 +- src/khoj/utils/cli.py | 8 +- src/khoj/utils/config.py | 6 +- src/khoj/utils/rawconfig.py | 10 ++- tests/conftest.py | 4 +- 9 files changed, 119 insertions(+), 24 deletions(-) create mode 100644 src/khoj/migrations/migrate_offline_chat_schema.py diff --git a/src/khoj/configure.py b/src/khoj/configure.py index 7e6cc409..769f015c 100644 --- a/src/khoj/configure.py +++ b/src/khoj/configure.py @@ -19,7 +19,7 @@ from khoj.utils.config import ( ) from khoj.utils.helpers import resolve_absolute_path, merge_dicts from khoj.utils.fs_syncer import collect_files -from khoj.utils.rawconfig import FullConfig, ProcessorConfig, ConversationProcessorConfig +from khoj.utils.rawconfig import FullConfig, OfflineChatProcessorConfig, ProcessorConfig, ConversationProcessorConfig from khoj.routers.indexer import configure_content, load_content, configure_search @@ -168,9 +168,7 @@ def configure_conversation_processor( conversation_config=ConversationProcessorConfig( conversation_logfile=conversation_logfile, openai=(conversation_config.openai if (conversation_config is not None) else None), - enable_offline_chat=( - conversation_config.enable_offline_chat if (conversation_config is not None) else False - ), + offline_chat=conversation_config.offline_chat if conversation_config else OfflineChatProcessorConfig(), ) ) else: diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html index 3b295a88..d41ca26b 100644 --- a/src/khoj/interface/web/config.html +++ b/src/khoj/interface/web/config.html @@ -236,7 +236,7 @@
-

Setup chat using OpenAI

+

Setup online chat using OpenAI

@@ -261,21 +261,21 @@ Chat

Offline Chat - Configured - {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat and not current_model_state.conversation_gpt4all %} + Configured + {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat and not current_model_state.conversation_gpt4all %} Not Configured {% endif %}

-

Setup offline chat (Llama V2)

+

Setup offline chat

-
+
-
+
@@ -346,7 +346,7 @@ featuresHintText.classList.add("show"); } - fetch('/api/config/data/processor/conversation/enable_offline_chat' + '?enable_offline_chat=' + enable, { + fetch('/api/config/data/processor/conversation/offline_chat' + '?enable_offline_chat=' + enable, { method: 'POST', headers: { 'Content-Type': 'application/json', diff --git a/src/khoj/migrations/migrate_offline_chat_schema.py b/src/khoj/migrations/migrate_offline_chat_schema.py new file mode 100644 index 00000000..873783a3 --- /dev/null +++ b/src/khoj/migrations/migrate_offline_chat_schema.py @@ -0,0 +1,83 @@ +""" +Current format of khoj.yml +--- +app: + ... +content-type: + ... +processor: + conversation: + enable-offline-chat: false + conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json + openai: + ... +search-type: + ... + +New format of khoj.yml +--- +app: + ... +content-type: + ... +processor: + conversation: + offline-chat: + enable-offline-chat: false + chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin + tokenizer: null + max_prompt_size: null + conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json + openai: + ... +search-type: + ... +""" +import logging +from packaging import version + +from khoj.utils.yaml import load_config_from_file, save_config_to_file + + +logger = logging.getLogger(__name__) + + +def migrate_offline_chat_schema(args): + schema_version = "0.12.3" + raw_config = load_config_from_file(args.config_file) + previous_version = raw_config.get("version") + + if "processor" not in raw_config: + return args + if raw_config["processor"] is None: + return args + if "conversation" not in raw_config["processor"]: + return args + + if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"): + logger.info( + f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration" + ) + raw_config["version"] = schema_version + + # Create max-prompt-size field in conversation processor schema + raw_config["processor"]["conversation"]["max-prompt-size"] = None + raw_config["processor"]["conversation"]["tokenizer"] = None + + # Create offline chat schema based on existing enable_offline_chat field in khoj config schema + offline_chat_model = ( + raw_config["processor"]["conversation"] + .get("offline-chat", {}) + .get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin") + ) + raw_config["processor"]["conversation"]["offline-chat"] = { + "enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False), + "chat-model": offline_chat_model, + } + + # Delete old enable-offline-chat field from conversation processor schema + if "enable-offline-chat" in raw_config["processor"]["conversation"]: + del raw_config["processor"]["conversation"]["enable-offline-chat"] + + save_config_to_file(raw_config, args.config_file) + return args diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py index 2ff6bab0..91db7c58 100644 --- a/src/khoj/routers/api.py +++ b/src/khoj/routers/api.py @@ -284,7 +284,7 @@ if not state.demo: except Exception as e: return {"status": "error", "message": str(e)} - @api.post("/config/data/processor/conversation/enable_offline_chat", status_code=200) + @api.post("/config/data/processor/conversation/offline_chat", status_code=200) async def set_processor_enable_offline_chat_config_data( request: Request, enable_offline_chat: bool, @@ -301,7 +301,7 @@ if not state.demo: state.config.processor = ProcessorConfig(conversation=ConversationProcessorConfig(conversation_logfile=conversation_logfile)) # type: ignore assert state.config.processor.conversation is not None - state.config.processor.conversation.enable_offline_chat = enable_offline_chat + state.config.processor.conversation.offline_chat.enable_offline_chat = enable_offline_chat state.processor_config = configure_processor(state.config.processor, state.processor_config) update_telemetry_state( @@ -707,7 +707,7 @@ async def chat( ) conversation_command = get_conversation_command(query=q, any_references=not is_none_or_empty(compiled_references)) if conversation_command == ConversationCommand.Help: - model_type = "offline" if state.processor_config.conversation.enable_offline_chat else "openai" + model_type = "offline" if state.processor_config.conversation.offline_chat.enable_offline_chat else "openai" formatted_help = help_message.format(model=model_type, version=state.khoj_version) return StreamingResponse(iter([formatted_help]), media_type="text/event-stream", status_code=200) @@ -784,7 +784,7 @@ async def extract_references_and_questions( # Infer search queries from user message with timer("Extracting search queries took", logger): # If we've reached here, either the user has enabled offline chat or the openai model is enabled. - if state.processor_config.conversation.enable_offline_chat: + if state.processor_config.conversation.offline_chat.enable_offline_chat: loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model inferred_queries = extract_questions_offline( defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False @@ -800,7 +800,7 @@ async def extract_references_and_questions( with timer("Searching knowledge base took", logger): result_list = [] for query in inferred_queries: - n_items = min(n, 3) if state.processor_config.conversation.enable_offline_chat else n + n_items = min(n, 3) if state.processor_config.conversation.offline_chat.enable_offline_chat else n result_list.extend( await search( f"{query} {filters_in_query}", diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py index 3898d1b8..0bc66991 100644 --- a/src/khoj/routers/helpers.py +++ b/src/khoj/routers/helpers.py @@ -113,7 +113,7 @@ def generate_chat_response( meta_log=meta_log, ) - if state.processor_config.conversation.enable_offline_chat: + if state.processor_config.conversation.offline_chat.enable_offline_chat: loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model chat_response = converse_offline( references=compiled_references, diff --git a/src/khoj/utils/cli.py b/src/khoj/utils/cli.py index 78a9ccf9..1d6106cb 100644 --- a/src/khoj/utils/cli.py +++ b/src/khoj/utils/cli.py @@ -9,6 +9,7 @@ from khoj.utils.yaml import parse_config_from_file from khoj.migrations.migrate_version import migrate_config_to_version from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema from khoj.migrations.migrate_offline_model import migrate_offline_model +from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema def cli(args=None): @@ -55,7 +56,12 @@ def cli(args=None): def run_migrations(args): - migrations = [migrate_config_to_version, migrate_processor_conversation_schema, migrate_offline_model] + migrations = [ + migrate_config_to_version, + migrate_processor_conversation_schema, + migrate_offline_model, + migrate_offline_chat_schema, + ] for migration in migrations: args = migration(args) return args diff --git a/src/khoj/utils/config.py b/src/khoj/utils/config.py index 5accd2ad..90e8862a 100644 --- a/src/khoj/utils/config.py +++ b/src/khoj/utils/config.py @@ -96,18 +96,18 @@ class ConversationProcessorConfigModel: self.openai_model = conversation_config.openai self.gpt4all_model = GPT4AllProcessorConfig() self.gpt4all_model.chat_model = conversation_config.offline_chat_model - self.enable_offline_chat = conversation_config.enable_offline_chat + self.offline_chat = conversation_config.offline_chat self.conversation_logfile = Path(conversation_config.conversation_logfile) self.chat_session: List[str] = [] self.meta_log: dict = {} - if self.enable_offline_chat: + if self.offline_chat.enable_offline_chat: try: self.gpt4all_model.loaded_model = download_model(self.gpt4all_model.chat_model) except ValueError as e: + self.offline_chat.enable_offline_chat = False self.gpt4all_model.loaded_model = None logger.error(f"Error while loading offline chat model: {e}", exc_info=True) - self.enable_offline_chat = False else: self.gpt4all_model.loaded_model = None diff --git a/src/khoj/utils/rawconfig.py b/src/khoj/utils/rawconfig.py index 30a98354..f7c42266 100644 --- a/src/khoj/utils/rawconfig.py +++ b/src/khoj/utils/rawconfig.py @@ -91,11 +91,17 @@ class OpenAIProcessorConfig(ConfigBase): chat_model: Optional[str] = "gpt-3.5-turbo" +class OfflineChatProcessorConfig(ConfigBase): + enable_offline_chat: Optional[bool] = False + chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin" + + class ConversationProcessorConfig(ConfigBase): conversation_logfile: Path openai: Optional[OpenAIProcessorConfig] - enable_offline_chat: Optional[bool] = False - offline_chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin" + offline_chat: Optional[OfflineChatProcessorConfig] + max_prompt_size: Optional[int] + tokenizer: Optional[str] class ProcessorConfig(ConfigBase): diff --git a/tests/conftest.py b/tests/conftest.py index d851341d..f75dfceb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -16,6 +16,7 @@ from khoj.utils.helpers import resolve_absolute_path from khoj.utils.rawconfig import ( ContentConfig, ConversationProcessorConfig, + OfflineChatProcessorConfig, OpenAIProcessorConfig, ProcessorConfig, TextContentConfig, @@ -205,8 +206,9 @@ def processor_config_offline_chat(tmp_path_factory): # Setup conversation processor processor_config = ProcessorConfig() + offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True) processor_config.conversation = ConversationProcessorConfig( - enable_offline_chat=True, + offline_chat=offline_chat, conversation_logfile=processor_dir.joinpath("conversation_logs.json"), )