From feb4f17e3d3e8aaabcf5a41c3be4f9d1914ec5b8 Mon Sep 17 00:00:00 2001
From: Debanjum Singh Solanky <debanjum@gmail.com>
Date: Sun, 15 Oct 2023 14:19:29 -0700
Subject: [PATCH] Update chat config schema. Make max_prompt, chat tokenizer
 configurable

This provides flexibility to use non 1st party supported chat models

- Create migration script to update khoj.yml config
  - Put `enable_offline_chat' under new `offline-chat' section
    Referring code needs to be updated to accomodate this change
  - Move `offline_chat_model' to `chat-model' under new `offline-chat' section
  - Put chat `tokenizer` under new `offline-chat' section
  - Put `max_prompt' under existing `conversation' section
    As `max_prompt' size effects both openai and offline chat models
---
 src/khoj/configure.py                         |  6 +-
 src/khoj/interface/web/config.html            | 14 ++--
 .../migrations/migrate_offline_chat_schema.py | 83 +++++++++++++++++++
 src/khoj/routers/api.py                       | 10 +--
 src/khoj/routers/helpers.py                   |  2 +-
 src/khoj/utils/cli.py                         |  8 +-
 src/khoj/utils/config.py                      |  6 +-
 src/khoj/utils/rawconfig.py                   | 10 ++-
 tests/conftest.py                             |  4 +-
 9 files changed, 119 insertions(+), 24 deletions(-)
 create mode 100644 src/khoj/migrations/migrate_offline_chat_schema.py
diff --git a/src/khoj/configure.py b/src/khoj/configure.py
index 7e6cc409..769f015c 100644
--- a/src/khoj/configure.py
+++ b/src/khoj/configure.py
@@ -19,7 +19,7 @@ from khoj.utils.config import (
 )
 from khoj.utils.helpers import resolve_absolute_path, merge_dicts
 from khoj.utils.fs_syncer import collect_files
-from khoj.utils.rawconfig import FullConfig, ProcessorConfig, ConversationProcessorConfig
+from khoj.utils.rawconfig import FullConfig, OfflineChatProcessorConfig, ProcessorConfig, ConversationProcessorConfig
 from khoj.routers.indexer import configure_content, load_content, configure_search
 
 
@@ -168,9 +168,7 @@ def configure_conversation_processor(
             conversation_config=ConversationProcessorConfig(
                 conversation_logfile=conversation_logfile,
                 openai=(conversation_config.openai if (conversation_config is not None) else None),
-                enable_offline_chat=(
-                    conversation_config.enable_offline_chat if (conversation_config is not None) else False
-                ),
+                offline_chat=conversation_config.offline_chat if conversation_config else OfflineChatProcessorConfig(),
             )
         )
     else:
diff --git a/src/khoj/interface/web/config.html b/src/khoj/interface/web/config.html
index 3b295a88..d41ca26b 100644
--- a/src/khoj/interface/web/config.html
+++ b/src/khoj/interface/web/config.html
@@ -236,7 +236,7 @@
                     </h3>
                 </div>
                 <div class="card-description-row">
-                <p class="card-description">Setup chat using OpenAI</p>
+                <p class="card-description">Setup online chat using OpenAI</p>
                 </div>
                 <div class="card-action-row">
                     <a class="card-button" href="/config/processor/conversation/openai">
@@ -261,21 +261,21 @@
                     <img class="card-icon" src="/static/assets/icons/chat.svg" alt="Chat">
                     <h3 class="card-title">
                         Offline Chat
-                        <img id="configured-icon-conversation-enable-offline-chat" class="configured-icon {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat and current_model_state.conversation_gpt4all %}enabled{% else %}disabled{% endif %}" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
-                        {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat and not current_model_state.conversation_gpt4all %}
+                        <img id="configured-icon-conversation-enable-offline-chat" class="configured-icon {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat and current_model_state.conversation_gpt4all %}enabled{% else %}disabled{% endif %}" src="/static/assets/icons/confirm-icon.svg" alt="Configured">
+                        {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat and not current_model_state.conversation_gpt4all %}
                             <img id="misconfigured-icon-conversation-enable-offline-chat" class="configured-icon" src="/static/assets/icons/question-mark-icon.svg" alt="Not Configured" title="The model was not downloaded as expected.">
                         {% endif %}
                     </h3>
                 </div>
                 <div class="card-description-row">
-                <p class="card-description">Setup offline chat (Llama V2)</p>
+                <p class="card-description">Setup offline chat</p>
                 </div>
-                <div id="clear-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat %}enabled{% else %}disabled{% endif %}">
+                <div id="clear-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat %}enabled{% else %}disabled{% endif %}">
                     <button class="card-button" onclick="toggleEnableLocalLLLM(false)">
                         Disable
                     </button>
                 </div>
-                <div id="set-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.enable_offline_chat %}disabled{% else %}enabled{% endif %}">
+                <div id="set-enable-offline-chat" class="card-action-row {% if current_config.processor and current_config.processor.conversation and current_config.processor.conversation.offline_chat.enable_offline_chat %}disabled{% else %}enabled{% endif %}">
                     <button class="card-button happy" onclick="toggleEnableLocalLLLM(true)">
                         Enable
                     </button>
@@ -346,7 +346,7 @@
             featuresHintText.classList.add("show");
         }
 
-        fetch('/api/config/data/processor/conversation/enable_offline_chat' + '?enable_offline_chat=' + enable, {
+        fetch('/api/config/data/processor/conversation/offline_chat' + '?enable_offline_chat=' + enable, {
             method: 'POST',
             headers: {
                 'Content-Type': 'application/json',
diff --git a/src/khoj/migrations/migrate_offline_chat_schema.py b/src/khoj/migrations/migrate_offline_chat_schema.py
new file mode 100644
index 00000000..873783a3
--- /dev/null
+++ b/src/khoj/migrations/migrate_offline_chat_schema.py
@@ -0,0 +1,83 @@
+"""
+Current format of khoj.yml
+---
+app:
+    ...
+content-type:
+    ...
+processor:
+  conversation:
+    enable-offline-chat: false
+    conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
+    openai:
+        ...
+search-type:
+    ...
+
+New format of khoj.yml
+---
+app:
+    ...
+content-type:
+    ...
+processor:
+  conversation:
+    offline-chat:
+        enable-offline-chat: false
+        chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
+    tokenizer: null
+    max_prompt_size: null
+    conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
+    openai:
+        ...
+search-type:
+    ...
+"""
+import logging
+from packaging import version
+
+from khoj.utils.yaml import load_config_from_file, save_config_to_file
+
+
+logger = logging.getLogger(__name__)
+
+
+def migrate_offline_chat_schema(args):
+    schema_version = "0.12.3"
+    raw_config = load_config_from_file(args.config_file)
+    previous_version = raw_config.get("version")
+
+    if "processor" not in raw_config:
+        return args
+    if raw_config["processor"] is None:
+        return args
+    if "conversation" not in raw_config["processor"]:
+        return args
+
+    if previous_version is None or version.parse(previous_version) < version.parse("0.12.3"):
+        logger.info(
+            f"Upgrading config schema to {schema_version} from {previous_version} to make (offline) chat more configuration"
+        )
+        raw_config["version"] = schema_version
+
+        # Create max-prompt-size field in conversation processor schema
+        raw_config["processor"]["conversation"]["max-prompt-size"] = None
+        raw_config["processor"]["conversation"]["tokenizer"] = None
+
+        # Create offline chat schema based on existing enable_offline_chat field in khoj config schema
+        offline_chat_model = (
+            raw_config["processor"]["conversation"]
+            .get("offline-chat", {})
+            .get("chat-model", "llama-2-7b-chat.ggmlv3.q4_0.bin")
+        )
+        raw_config["processor"]["conversation"]["offline-chat"] = {
+            "enable-offline-chat": raw_config["processor"]["conversation"].get("enable-offline-chat", False),
+            "chat-model": offline_chat_model,
+        }
+
+        # Delete old enable-offline-chat field from conversation processor schema
+        if "enable-offline-chat" in raw_config["processor"]["conversation"]:
+            del raw_config["processor"]["conversation"]["enable-offline-chat"]
+
+        save_config_to_file(raw_config, args.config_file)
+    return args
diff --git a/src/khoj/routers/api.py b/src/khoj/routers/api.py
index 2ff6bab0..91db7c58 100644
--- a/src/khoj/routers/api.py
+++ b/src/khoj/routers/api.py
@@ -284,7 +284,7 @@ if not state.demo:
         except Exception as e:
             return {"status": "error", "message": str(e)}
 
-    @api.post("/config/data/processor/conversation/enable_offline_chat", status_code=200)
+    @api.post("/config/data/processor/conversation/offline_chat", status_code=200)
     async def set_processor_enable_offline_chat_config_data(
         request: Request,
         enable_offline_chat: bool,
@@ -301,7 +301,7 @@ if not state.demo:
             state.config.processor = ProcessorConfig(conversation=ConversationProcessorConfig(conversation_logfile=conversation_logfile))  # type: ignore
 
         assert state.config.processor.conversation is not None
-        state.config.processor.conversation.enable_offline_chat = enable_offline_chat
+        state.config.processor.conversation.offline_chat.enable_offline_chat = enable_offline_chat
         state.processor_config = configure_processor(state.config.processor, state.processor_config)
 
         update_telemetry_state(
@@ -707,7 +707,7 @@ async def chat(
     )
     conversation_command = get_conversation_command(query=q, any_references=not is_none_or_empty(compiled_references))
     if conversation_command == ConversationCommand.Help:
-        model_type = "offline" if state.processor_config.conversation.enable_offline_chat else "openai"
+        model_type = "offline" if state.processor_config.conversation.offline_chat.enable_offline_chat else "openai"
         formatted_help = help_message.format(model=model_type, version=state.khoj_version)
         return StreamingResponse(iter([formatted_help]), media_type="text/event-stream", status_code=200)
 
@@ -784,7 +784,7 @@ async def extract_references_and_questions(
     # Infer search queries from user message
     with timer("Extracting search queries took", logger):
         # If we've reached here, either the user has enabled offline chat or the openai model is enabled.
-        if state.processor_config.conversation.enable_offline_chat:
+        if state.processor_config.conversation.offline_chat.enable_offline_chat:
             loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
             inferred_queries = extract_questions_offline(
                 defiltered_query, loaded_model=loaded_model, conversation_log=meta_log, should_extract_questions=False
@@ -800,7 +800,7 @@ async def extract_references_and_questions(
     with timer("Searching knowledge base took", logger):
         result_list = []
         for query in inferred_queries:
-            n_items = min(n, 3) if state.processor_config.conversation.enable_offline_chat else n
+            n_items = min(n, 3) if state.processor_config.conversation.offline_chat.enable_offline_chat else n
             result_list.extend(
                 await search(
                     f"{query} {filters_in_query}",
diff --git a/src/khoj/routers/helpers.py b/src/khoj/routers/helpers.py
index 3898d1b8..0bc66991 100644
--- a/src/khoj/routers/helpers.py
+++ b/src/khoj/routers/helpers.py
@@ -113,7 +113,7 @@ def generate_chat_response(
             meta_log=meta_log,
         )
 
-        if state.processor_config.conversation.enable_offline_chat:
+        if state.processor_config.conversation.offline_chat.enable_offline_chat:
             loaded_model = state.processor_config.conversation.gpt4all_model.loaded_model
             chat_response = converse_offline(
                 references=compiled_references,
diff --git a/src/khoj/utils/cli.py b/src/khoj/utils/cli.py
index 78a9ccf9..1d6106cb 100644
--- a/src/khoj/utils/cli.py
+++ b/src/khoj/utils/cli.py
@@ -9,6 +9,7 @@ from khoj.utils.yaml import parse_config_from_file
 from khoj.migrations.migrate_version import migrate_config_to_version
 from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
 from khoj.migrations.migrate_offline_model import migrate_offline_model
+from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
 
 
 def cli(args=None):
@@ -55,7 +56,12 @@ def cli(args=None):
 
 
 def run_migrations(args):
-    migrations = [migrate_config_to_version, migrate_processor_conversation_schema, migrate_offline_model]
+    migrations = [
+        migrate_config_to_version,
+        migrate_processor_conversation_schema,
+        migrate_offline_model,
+        migrate_offline_chat_schema,
+    ]
     for migration in migrations:
         args = migration(args)
     return args
diff --git a/src/khoj/utils/config.py b/src/khoj/utils/config.py
index 5accd2ad..90e8862a 100644
--- a/src/khoj/utils/config.py
+++ b/src/khoj/utils/config.py
@@ -96,18 +96,18 @@ class ConversationProcessorConfigModel:
         self.openai_model = conversation_config.openai
         self.gpt4all_model = GPT4AllProcessorConfig()
         self.gpt4all_model.chat_model = conversation_config.offline_chat_model
-        self.enable_offline_chat = conversation_config.enable_offline_chat
+        self.offline_chat = conversation_config.offline_chat
         self.conversation_logfile = Path(conversation_config.conversation_logfile)
         self.chat_session: List[str] = []
         self.meta_log: dict = {}
 
-        if self.enable_offline_chat:
+        if self.offline_chat.enable_offline_chat:
             try:
                 self.gpt4all_model.loaded_model = download_model(self.gpt4all_model.chat_model)
             except ValueError as e:
+                self.offline_chat.enable_offline_chat = False
                 self.gpt4all_model.loaded_model = None
                 logger.error(f"Error while loading offline chat model: {e}", exc_info=True)
-                self.enable_offline_chat = False
         else:
             self.gpt4all_model.loaded_model = None
 
diff --git a/src/khoj/utils/rawconfig.py b/src/khoj/utils/rawconfig.py
index 30a98354..f7c42266 100644
--- a/src/khoj/utils/rawconfig.py
+++ b/src/khoj/utils/rawconfig.py
@@ -91,11 +91,17 @@ class OpenAIProcessorConfig(ConfigBase):
     chat_model: Optional[str] = "gpt-3.5-turbo"
 
 
+class OfflineChatProcessorConfig(ConfigBase):
+    enable_offline_chat: Optional[bool] = False
+    chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
+
+
 class ConversationProcessorConfig(ConfigBase):
     conversation_logfile: Path
     openai: Optional[OpenAIProcessorConfig]
-    enable_offline_chat: Optional[bool] = False
-    offline_chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
+    offline_chat: Optional[OfflineChatProcessorConfig]
+    max_prompt_size: Optional[int]
+    tokenizer: Optional[str]
 
 
 class ProcessorConfig(ConfigBase):
diff --git a/tests/conftest.py b/tests/conftest.py
index d851341d..f75dfceb 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -16,6 +16,7 @@ from khoj.utils.helpers import resolve_absolute_path
 from khoj.utils.rawconfig import (
     ContentConfig,
     ConversationProcessorConfig,
+    OfflineChatProcessorConfig,
     OpenAIProcessorConfig,
     ProcessorConfig,
     TextContentConfig,
@@ -205,8 +206,9 @@ def processor_config_offline_chat(tmp_path_factory):
 
     # Setup conversation processor
     processor_config = ProcessorConfig()
+    offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True)
     processor_config.conversation = ConversationProcessorConfig(
-        enable_offline_chat=True,
+        offline_chat=offline_chat,
         conversation_logfile=processor_dir.joinpath("conversation_logs.json"),
     )