Intelligently initialize a decent default set of chat model options

Given the LLM landscape is rapidly changing, providing a good default set of options should help reduce decision fatigue to get started Improve initialization flow during first run - Set Google, Anthropic Chat models too Previously only Offline, Openai chat models could be set during init - Add multiple chat models for each LLM provider Interactively set a comma separated list of models for each provider - Auto add default chat models for each provider in non-interactive model if the {OPENAI,GEMINI,ANTHROPIC}_API_KEY env var is set - Do not ask for max_tokens, tokenizer for offline models during initialization. Use better defaults inferred in code instead - Explicitly set default chat model to use If unset, it implicitly defaults to using the first chat model. Make it explicit to reduce this confusion Resolves #882
2024-11-23 15:38:55 +01:00 · 2024-09-19 12:58:22 -07:00 · 2024-09-19 12:58:22 -07:00 · 91c76d4152
commit 91c76d4152
parent f177723711
5 changed files with 185 additions and 73 deletions
--- a/docker-compose.yml
+++ b/docker-compose.yml
@ -44,10 +44,19 @@ services:
      - KHOJ_DEBUG=False
      - KHOJ_ADMIN_EMAIL=username@example.com
      - KHOJ_ADMIN_PASSWORD=password
-      # Uncomment the following lines to make your instance publicly accessible.
-      # Replace the domain with your domain. Proceed with caution, especially if you are using anonymous mode.
+      # Uncomment lines below to use chat models by each provider.
+      # Ensure you set your provider specific API keys.
+      # ---
+      # - OPENAI_API_KEY=your_openai_api_key
+      # - GEMINI_API_KEY=your_gemini_api_key
+      # - ANTHROPIC_API_KEY=your_anthropic_api_key
+      # Uncomment the necessary lines below to make your instance publicly accessible.
+      # Replace the KHOJ_DOMAIN with either your domain or IP address (no http/https prefix).
+      # Proceed with caution, especially if you are using anonymous mode.
+      # ---
      # - KHOJ_NO_HTTPS=True
      # - KHOJ_DOMAIN=192.168.0.104
+      # - KHOJ_DOMAIN=khoj.example.com
    command: --host="0.0.0.0" --port=42110 -vv --anonymous-mode --non-interactive


--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@ -18,13 +18,20 @@ from khoj.utils.helpers import is_none_or_empty, merge_dicts

 logger = logging.getLogger(__name__)
 model_to_prompt_size = {
+    # OpenAI Models
    "gpt-3.5-turbo": 12000,
-    "gpt-3.5-turbo-0125": 12000,
-    "gpt-4-0125-preview": 20000,
    "gpt-4-turbo-preview": 20000,
+    "gpt-4o": 20000,
    "gpt-4o-mini": 20000,
    "o1-preview": 20000,
    "o1-mini": 20000,
+    # Google Models
+    "gemini-1.5-flash": 20000,
+    "gemini-1.5-pro": 20000,
+    # Anthropic Models
+    "claude-3-5-sonnet-20240620": 20000,
+    "claude-3-opus-20240229": 20000,
+    # Offline Models
    "TheBloke/Mistral-7B-Instruct-v0.2-GGUF": 3500,
    "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF": 3500,
    "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
@ -163,7 +170,7 @@ def generate_chatml_messages_with_context(
        if loaded_model:
            max_prompt_size = infer_max_tokens(loaded_model.n_ctx(), model_to_prompt_size.get(model_name, math.inf))
        else:
-            max_prompt_size = model_to_prompt_size.get(model_name, 2000)
+            max_prompt_size = model_to_prompt_size.get(model_name, 10000)

    # Scale lookback turns proportional to max prompt size supported by model
    lookback_turns = max_prompt_size // 750
--- a/src/khoj/utils/constants.py
+++ b/src/khoj/utils/constants.py
@ -8,8 +8,15 @@ empty_escape_sequences = "\n|\r|\t| "
 app_env_filepath = "~/.khoj/env"
 telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
 content_directory = "~/.khoj/content/"
-default_offline_chat_model = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
-default_online_chat_model = "gpt-4o-mini"
+default_offline_chat_models = [
+    "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
+    "bartowski/gemma-2-9b-it-GGUF",
+    "bartowski/gemma-2-2b-it-GGUF",
+    "bartowski/Phi-3.5-mini-instruct-GGUF",
+]
+default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
+default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"]
+default_anthropic_chat_models = ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"]

 empty_config = {
    "search-type": {
--- a/src/khoj/utils/initialization.py
+++ b/src/khoj/utils/initialization.py
@ -6,11 +6,17 @@ from khoj.database.models import (
    ChatModelOptions,
    KhojUser,
    OpenAIProcessorConversationConfig,
+    ServerChatSettings,
    SpeechToTextModelOptions,
    TextToImageModelConfig,
 )
 from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer
-from khoj.utils.constants import default_offline_chat_model, default_online_chat_model
+from khoj.utils.constants import (
+    default_anthropic_chat_models,
+    default_gemini_chat_models,
+    default_offline_chat_models,
+    default_openai_chat_models,
+)

 logger = logging.getLogger(__name__)

@ -32,78 +38,44 @@ def initialization(interactive: bool = True):

    def _create_chat_configuration():
        logger.info(
-            "🗣️  Configure chat models available to your server. You can always update these at /server/admin using your admin account"
+            "🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
        )

-        try:
-            use_offline_model = "y" if not interactive else input("Use offline chat model? (y/n): ")
-            if use_offline_model == "y":
-                logger.info("🗣️ Setting up offline chat model")
-
-                if interactive:
-                    offline_chat_model = input(
-                        f"Enter the offline chat model you want to use. See HuggingFace for available GGUF models (default: {default_offline_chat_model}): "
-                    )
-                else:
-                    offline_chat_model = ""
-                if offline_chat_model == "":
-                    ChatModelOptions.objects.create(
-                        chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE
-                    )
-                else:
-                    default_max_tokens = model_to_prompt_size.get(offline_chat_model, 4000)
-                    max_tokens = input(
-                        f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):"
-                    )
-                    max_tokens = max_tokens or default_max_tokens
-
-                    default_tokenizer = model_to_tokenizer.get(
-                        offline_chat_model, "hf-internal-testing/llama-tokenizer"
-                    )
-                    tokenizer = input(
-                        f"Enter the tokenizer to use for the offline chat model (default: {default_tokenizer}):"
-                    )
-                    tokenizer = tokenizer or default_tokenizer
-
-                    ChatModelOptions.objects.create(
-                        chat_model=offline_chat_model,
-                        model_type=ChatModelOptions.ModelType.OFFLINE,
-                        max_prompt_size=max_tokens,
-                        tokenizer=tokenizer,
-                    )
-        except ModuleNotFoundError as e:
-            logger.warning("Offline models are not supported on this device.")
-
+        # Set up OpenAI's online models
        default_openai_api_key = os.getenv("OPENAI_API_KEY")
        default_use_openai_model = {True: "y", False: "n"}[default_openai_api_key != None]
-        use_openai_model = default_use_openai_model if not interactive else input("Use OpenAI models? (y/n): ")
-        if use_openai_model == "y":
-            logger.info("🗣️ Setting up your OpenAI configuration")
+        use_model_provider = default_use_openai_model if not interactive else input("Add OpenAI models? (y/n): ")
+        if use_model_provider == "y":
+            logger.info("️💬 Setting up your OpenAI configuration")
            if interactive:
-                api_key = input(f"Enter your OpenAI API key (default: {default_openai_api_key}): ")
+                user_api_key = input(f"Enter your OpenAI API key (default: {default_openai_api_key}): ")
+                api_key = user_api_key if user_api_key != "" else default_openai_api_key
            else:
                api_key = default_openai_api_key
-            OpenAIProcessorConversationConfig.objects.create(api_key=api_key)
+            chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="OpenAI")

            if interactive:
-                openai_chat_model = input(
-                    f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): "
+                chat_model_names = input(
+                    f"Enter the OpenAI chat models you want to use (default: {','.join(default_openai_chat_models)}): "
                )
-                openai_chat_model = openai_chat_model or default_online_chat_model
+                chat_models = chat_model_names.split(",") if chat_model_names != "" else default_openai_chat_models
+                chat_models = [model.strip() for model in chat_models]
            else:
-                openai_chat_model = default_online_chat_model
-            default_max_tokens = model_to_prompt_size.get(openai_chat_model, 10000)
-            if interactive:
-                max_tokens = input(
-                    f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): "
-                )
-                max_tokens = max_tokens or default_max_tokens
-            else:
-                max_tokens = default_max_tokens
-            ChatModelOptions.objects.create(
-                chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens
-            )
+                chat_models = default_openai_chat_models

+            # Add OpenAI chat models
+            for chat_model in chat_models:
+                vision_enabled = chat_model in ["gpt-4o-mini", "gpt-4o"]
+                default_max_tokens = model_to_prompt_size.get(chat_model)
+                ChatModelOptions.objects.create(
+                    chat_model=chat_model,
+                    model_type=ChatModelOptions.ModelType.OPENAI,
+                    max_prompt_size=default_max_tokens,
+                    openai_config=chat_model_provider,
+                    vision_enabled=vision_enabled,
+                )
+
+            # Add OpenAI speech to text model
            default_speech2text_model = "whisper-1"
            if interactive:
                openai_speech2text_model = input(
@ -116,6 +88,7 @@ def initialization(interactive: bool = True):
                model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI
            )

+            # Add OpenAI text to image model
            default_text_to_image_model = "dall-e-3"
            if interactive:
                openai_text_to_image_model = input(
@ -128,9 +101,124 @@ def initialization(interactive: bool = True):
                model_name=openai_text_to_image_model, model_type=TextToImageModelConfig.ModelType.OPENAI
            )

-        if use_offline_model == "y" or use_openai_model == "y":
-            logger.info("🗣️  Chat model configuration complete")
+        # Set up Google's Gemini online chat models
+        default_gemini_api_key = os.getenv("GEMINI_API_KEY")
+        default_use_gemini_model = {True: "y", False: "n"}[default_gemini_api_key != None]
+        use_model_provider = default_use_gemini_model if not interactive else input("Add Google's chat models? (y/n): ")
+        if use_model_provider == "y":
+            logger.info("️💬 Setting up your Google Gemini configuration")
+            if interactive:
+                user_api_key = input(f"Enter your Gemini API key (default: {default_gemini_api_key}): ")
+                api_key = user_api_key if user_api_key != "" else default_gemini_api_key
+            else:
+                api_key = default_gemini_api_key
+            chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="Gemini")

+            if interactive:
+                chat_model_names = input(
+                    f"Enter the Gemini chat models you want to use (default: {','.join(default_gemini_chat_models)}): "
+                )
+                chat_models = chat_model_names.split(",") if chat_model_names != "" else default_gemini_chat_models
+                chat_models = [model.strip() for model in chat_models]
+            else:
+                chat_models = default_gemini_chat_models
+
+            # Add Gemini chat models
+            for chat_model in chat_models:
+                default_max_tokens = model_to_prompt_size.get(chat_model)
+                vision_enabled = False
+                ChatModelOptions.objects.create(
+                    chat_model=chat_model,
+                    model_type=ChatModelOptions.ModelType.GOOGLE,
+                    max_prompt_size=default_max_tokens,
+                    openai_config=chat_model_provider,
+                    vision_enabled=False,
+                )
+
+        # Set up Anthropic's online chat models
+        default_anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
+        default_use_anthropic_model = {True: "y", False: "n"}[default_anthropic_api_key != None]
+        use_model_provider = (
+            default_use_anthropic_model if not interactive else input("Add Anthropic's chat models? (y/n): ")
+        )
+        if use_model_provider == "y":
+            logger.info("️💬 Setting up your Anthropic configuration")
+            if interactive:
+                user_api_key = input(f"Enter your Anthropic API key (default: {default_anthropic_api_key}): ")
+                api_key = user_api_key if user_api_key != "" else default_anthropic_api_key
+            else:
+                api_key = default_anthropic_api_key
+            chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="Anthropic")
+
+            if interactive:
+                chat_model_names = input(
+                    f"Enter the Anthropic chat models you want to use (default: {','.join(default_anthropic_chat_models)}): "
+                )
+                chat_models = chat_model_names.split(",") if chat_model_names != "" else default_anthropic_chat_models
+                chat_models = [model.strip() for model in chat_models]
+            else:
+                chat_models = default_anthropic_chat_models
+
+            # Add Anthropic chat models
+            for chat_model in chat_models:
+                vision_enabled = False
+                default_max_tokens = model_to_prompt_size.get(chat_model)
+                ChatModelOptions.objects.create(
+                    chat_model=chat_model,
+                    model_type=ChatModelOptions.ModelType.ANTHROPIC,
+                    max_prompt_size=default_max_tokens,
+                    openai_config=chat_model_provider,
+                    vision_enabled=False,
+                )
+
+        # Set up offline chat models
+        use_model_provider = "y" if not interactive else input("Add Offline chat models? (y/n): ")
+        if use_model_provider == "y":
+            logger.info("️💬 Setting up Offline chat models")
+
+            if interactive:
+                chat_model_names = input(
+                    f"Enter the offline chat models you want to use. See HuggingFace for available GGUF models (default: {','.join(default_offline_chat_models)}): "
+                )
+                chat_models = chat_model_names.split(",") if chat_model_names != "" else default_offline_chat_models
+                chat_models = [model.strip() for model in chat_models]
+            else:
+                chat_models = default_offline_chat_models
+
+            # Add chat models
+            for chat_model in chat_models:
+                default_max_tokens = model_to_prompt_size.get(chat_model)
+                default_tokenizer = model_to_tokenizer.get(chat_model)
+                ChatModelOptions.objects.create(
+                    chat_model=chat_model,
+                    model_type=ChatModelOptions.ModelType.OFFLINE,
+                    max_prompt_size=default_max_tokens,
+                    tokenizer=default_tokenizer,
+                )
+
+        chat_models_configured = ChatModelOptions.objects.count()
+
+        # Explicitly set default chat model
+        if chat_models_configured > 0:
+            default_chat_model_name = ChatModelOptions.objects.first().chat_model
+            # If there are multiple chat models, ask the user to choose the default chat model
+            if chat_models_configured > 1 and interactive:
+                user_chat_model_name = input(
+                    f"Enter the default chat model to use (default: {default_chat_model_name}): "
+                )
+            else:
+                user_chat_model_name = None
+
+            # If the user's choice is valid, set it as the default chat model
+            if user_chat_model_name and ChatModelOptions.objects.filter(chat_model=user_chat_model_name).exists():
+                default_chat_model_name = user_chat_model_name
+
+            # Create a server chat settings object with the default chat model
+            default_chat_model = ChatModelOptions.objects.filter(chat_model=default_chat_model_name).first()
+            ServerChatSettings.objects.create(chat_default=default_chat_model)
+            logger.info("🗣️ Chat model configuration complete")
+
+        # Set up offline speech to text model
        use_offline_speech2text_model = "n" if not interactive else input("Use offline speech to text model? (y/n): ")
        if use_offline_speech2text_model == "y":
            logger.info("🗣️ Setting up offline speech to text model")
@ -163,7 +251,8 @@ def initialization(interactive: bool = True):
            try:
                _create_chat_configuration()
                break
-            # Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
+            # Some environments don't support interactive input. We catch the exception and return if that's the case.
+            # The admin can still configure their settings from the admin page.
            except EOFError:
                return
            except Exception as e:
--- a/tests/test_offline_chat_actors.py
+++ b/tests/test_offline_chat_actors.py
@ -19,12 +19,12 @@ from khoj.processor.conversation.offline.chat_model import (
 from khoj.processor.conversation.offline.utils import download_model
 from khoj.processor.conversation.utils import message_to_log
 from khoj.routers.helpers import aget_relevant_output_modes
-from khoj.utils.constants import default_offline_chat_model
+from khoj.utils.constants import default_offline_chat_models


@pytest.fixture(scope="session")
 def loaded_model():
-    return download_model(default_offline_chat_model, max_tokens=5000)
+    return download_model(default_offline_chat_models[0], max_tokens=5000)


 freezegun.configure(extend_ignore_list=["transformers"])