Intelligently initialize a decent default set of chat model options

Given the LLM landscape is rapidly changing, providing a good default
set of options should help reduce decision fatigue to get started

Improve initialization flow during first run
- Set Google, Anthropic Chat models too
  Previously only Offline, Openai chat models could be set during init

- Add multiple chat models for each LLM provider
  Interactively set a comma separated list of models for each provider

- Auto add default chat models for each provider in non-interactive
  model if the {OPENAI,GEMINI,ANTHROPIC}_API_KEY env var is set

- Do not ask for max_tokens, tokenizer for offline models during
  initialization. Use better defaults inferred in code instead

- Explicitly set default chat model to use
  If unset, it implicitly defaults to using the first chat model.
  Make it explicit to reduce this confusion

Resolves #882
This commit is contained in:
Debanjum Singh Solanky 2024-09-19 12:58:22 -07:00
parent f177723711
commit 91c76d4152
5 changed files with 185 additions and 73 deletions

View file

@ -44,10 +44,19 @@ services:
- KHOJ_DEBUG=False
- KHOJ_ADMIN_EMAIL=username@example.com
- KHOJ_ADMIN_PASSWORD=password
# Uncomment the following lines to make your instance publicly accessible.
# Replace the domain with your domain. Proceed with caution, especially if you are using anonymous mode.
# Uncomment lines below to use chat models by each provider.
# Ensure you set your provider specific API keys.
# ---
# - OPENAI_API_KEY=your_openai_api_key
# - GEMINI_API_KEY=your_gemini_api_key
# - ANTHROPIC_API_KEY=your_anthropic_api_key
# Uncomment the necessary lines below to make your instance publicly accessible.
# Replace the KHOJ_DOMAIN with either your domain or IP address (no http/https prefix).
# Proceed with caution, especially if you are using anonymous mode.
# ---
# - KHOJ_NO_HTTPS=True
# - KHOJ_DOMAIN=192.168.0.104
# - KHOJ_DOMAIN=khoj.example.com
command: --host="0.0.0.0" --port=42110 -vv --anonymous-mode --non-interactive

View file

@ -18,13 +18,20 @@ from khoj.utils.helpers import is_none_or_empty, merge_dicts
logger = logging.getLogger(__name__)
model_to_prompt_size = {
# OpenAI Models
"gpt-3.5-turbo": 12000,
"gpt-3.5-turbo-0125": 12000,
"gpt-4-0125-preview": 20000,
"gpt-4-turbo-preview": 20000,
"gpt-4o": 20000,
"gpt-4o-mini": 20000,
"o1-preview": 20000,
"o1-mini": 20000,
# Google Models
"gemini-1.5-flash": 20000,
"gemini-1.5-pro": 20000,
# Anthropic Models
"claude-3-5-sonnet-20240620": 20000,
"claude-3-opus-20240229": 20000,
# Offline Models
"TheBloke/Mistral-7B-Instruct-v0.2-GGUF": 3500,
"NousResearch/Hermes-2-Pro-Mistral-7B-GGUF": 3500,
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
@ -163,7 +170,7 @@ def generate_chatml_messages_with_context(
if loaded_model:
max_prompt_size = infer_max_tokens(loaded_model.n_ctx(), model_to_prompt_size.get(model_name, math.inf))
else:
max_prompt_size = model_to_prompt_size.get(model_name, 2000)
max_prompt_size = model_to_prompt_size.get(model_name, 10000)
# Scale lookback turns proportional to max prompt size supported by model
lookback_turns = max_prompt_size // 750

View file

@ -8,8 +8,15 @@ empty_escape_sequences = "\n|\r|\t| "
app_env_filepath = "~/.khoj/env"
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
content_directory = "~/.khoj/content/"
default_offline_chat_model = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
default_online_chat_model = "gpt-4o-mini"
default_offline_chat_models = [
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
"bartowski/gemma-2-9b-it-GGUF",
"bartowski/gemma-2-2b-it-GGUF",
"bartowski/Phi-3.5-mini-instruct-GGUF",
]
default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"]
default_anthropic_chat_models = ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"]
empty_config = {
"search-type": {

View file

@ -6,11 +6,17 @@ from khoj.database.models import (
ChatModelOptions,
KhojUser,
OpenAIProcessorConversationConfig,
ServerChatSettings,
SpeechToTextModelOptions,
TextToImageModelConfig,
)
from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer
from khoj.utils.constants import default_offline_chat_model, default_online_chat_model
from khoj.utils.constants import (
default_anthropic_chat_models,
default_gemini_chat_models,
default_offline_chat_models,
default_openai_chat_models,
)
logger = logging.getLogger(__name__)
@ -32,78 +38,44 @@ def initialization(interactive: bool = True):
def _create_chat_configuration():
logger.info(
"🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
"🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
)
try:
use_offline_model = "y" if not interactive else input("Use offline chat model? (y/n): ")
if use_offline_model == "y":
logger.info("🗣️ Setting up offline chat model")
if interactive:
offline_chat_model = input(
f"Enter the offline chat model you want to use. See HuggingFace for available GGUF models (default: {default_offline_chat_model}): "
)
else:
offline_chat_model = ""
if offline_chat_model == "":
ChatModelOptions.objects.create(
chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE
)
else:
default_max_tokens = model_to_prompt_size.get(offline_chat_model, 4000)
max_tokens = input(
f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):"
)
max_tokens = max_tokens or default_max_tokens
default_tokenizer = model_to_tokenizer.get(
offline_chat_model, "hf-internal-testing/llama-tokenizer"
)
tokenizer = input(
f"Enter the tokenizer to use for the offline chat model (default: {default_tokenizer}):"
)
tokenizer = tokenizer or default_tokenizer
ChatModelOptions.objects.create(
chat_model=offline_chat_model,
model_type=ChatModelOptions.ModelType.OFFLINE,
max_prompt_size=max_tokens,
tokenizer=tokenizer,
)
except ModuleNotFoundError as e:
logger.warning("Offline models are not supported on this device.")
# Set up OpenAI's online models
default_openai_api_key = os.getenv("OPENAI_API_KEY")
default_use_openai_model = {True: "y", False: "n"}[default_openai_api_key != None]
use_openai_model = default_use_openai_model if not interactive else input("Use OpenAI models? (y/n): ")
if use_openai_model == "y":
logger.info("🗣 Setting up your OpenAI configuration")
use_model_provider = default_use_openai_model if not interactive else input("Add OpenAI models? (y/n): ")
if use_model_provider == "y":
logger.info("️💬 Setting up your OpenAI configuration")
if interactive:
api_key = input(f"Enter your OpenAI API key (default: {default_openai_api_key}): ")
user_api_key = input(f"Enter your OpenAI API key (default: {default_openai_api_key}): ")
api_key = user_api_key if user_api_key != "" else default_openai_api_key
else:
api_key = default_openai_api_key
OpenAIProcessorConversationConfig.objects.create(api_key=api_key)
chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="OpenAI")
if interactive:
openai_chat_model = input(
f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): "
chat_model_names = input(
f"Enter the OpenAI chat models you want to use (default: {','.join(default_openai_chat_models)}): "
)
openai_chat_model = openai_chat_model or default_online_chat_model
chat_models = chat_model_names.split(",") if chat_model_names != "" else default_openai_chat_models
chat_models = [model.strip() for model in chat_models]
else:
openai_chat_model = default_online_chat_model
default_max_tokens = model_to_prompt_size.get(openai_chat_model, 10000)
if interactive:
max_tokens = input(
f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): "
)
max_tokens = max_tokens or default_max_tokens
else:
max_tokens = default_max_tokens
ChatModelOptions.objects.create(
chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens
)
chat_models = default_openai_chat_models
# Add OpenAI chat models
for chat_model in chat_models:
vision_enabled = chat_model in ["gpt-4o-mini", "gpt-4o"]
default_max_tokens = model_to_prompt_size.get(chat_model)
ChatModelOptions.objects.create(
chat_model=chat_model,
model_type=ChatModelOptions.ModelType.OPENAI,
max_prompt_size=default_max_tokens,
openai_config=chat_model_provider,
vision_enabled=vision_enabled,
)
# Add OpenAI speech to text model
default_speech2text_model = "whisper-1"
if interactive:
openai_speech2text_model = input(
@ -116,6 +88,7 @@ def initialization(interactive: bool = True):
model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI
)
# Add OpenAI text to image model
default_text_to_image_model = "dall-e-3"
if interactive:
openai_text_to_image_model = input(
@ -128,9 +101,124 @@ def initialization(interactive: bool = True):
model_name=openai_text_to_image_model, model_type=TextToImageModelConfig.ModelType.OPENAI
)
if use_offline_model == "y" or use_openai_model == "y":
logger.info("🗣️ Chat model configuration complete")
# Set up Google's Gemini online chat models
default_gemini_api_key = os.getenv("GEMINI_API_KEY")
default_use_gemini_model = {True: "y", False: "n"}[default_gemini_api_key != None]
use_model_provider = default_use_gemini_model if not interactive else input("Add Google's chat models? (y/n): ")
if use_model_provider == "y":
logger.info("️💬 Setting up your Google Gemini configuration")
if interactive:
user_api_key = input(f"Enter your Gemini API key (default: {default_gemini_api_key}): ")
api_key = user_api_key if user_api_key != "" else default_gemini_api_key
else:
api_key = default_gemini_api_key
chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="Gemini")
if interactive:
chat_model_names = input(
f"Enter the Gemini chat models you want to use (default: {','.join(default_gemini_chat_models)}): "
)
chat_models = chat_model_names.split(",") if chat_model_names != "" else default_gemini_chat_models
chat_models = [model.strip() for model in chat_models]
else:
chat_models = default_gemini_chat_models
# Add Gemini chat models
for chat_model in chat_models:
default_max_tokens = model_to_prompt_size.get(chat_model)
vision_enabled = False
ChatModelOptions.objects.create(
chat_model=chat_model,
model_type=ChatModelOptions.ModelType.GOOGLE,
max_prompt_size=default_max_tokens,
openai_config=chat_model_provider,
vision_enabled=False,
)
# Set up Anthropic's online chat models
default_anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
default_use_anthropic_model = {True: "y", False: "n"}[default_anthropic_api_key != None]
use_model_provider = (
default_use_anthropic_model if not interactive else input("Add Anthropic's chat models? (y/n): ")
)
if use_model_provider == "y":
logger.info("️💬 Setting up your Anthropic configuration")
if interactive:
user_api_key = input(f"Enter your Anthropic API key (default: {default_anthropic_api_key}): ")
api_key = user_api_key if user_api_key != "" else default_anthropic_api_key
else:
api_key = default_anthropic_api_key
chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="Anthropic")
if interactive:
chat_model_names = input(
f"Enter the Anthropic chat models you want to use (default: {','.join(default_anthropic_chat_models)}): "
)
chat_models = chat_model_names.split(",") if chat_model_names != "" else default_anthropic_chat_models
chat_models = [model.strip() for model in chat_models]
else:
chat_models = default_anthropic_chat_models
# Add Anthropic chat models
for chat_model in chat_models:
vision_enabled = False
default_max_tokens = model_to_prompt_size.get(chat_model)
ChatModelOptions.objects.create(
chat_model=chat_model,
model_type=ChatModelOptions.ModelType.ANTHROPIC,
max_prompt_size=default_max_tokens,
openai_config=chat_model_provider,
vision_enabled=False,
)
# Set up offline chat models
use_model_provider = "y" if not interactive else input("Add Offline chat models? (y/n): ")
if use_model_provider == "y":
logger.info("️💬 Setting up Offline chat models")
if interactive:
chat_model_names = input(
f"Enter the offline chat models you want to use. See HuggingFace for available GGUF models (default: {','.join(default_offline_chat_models)}): "
)
chat_models = chat_model_names.split(",") if chat_model_names != "" else default_offline_chat_models
chat_models = [model.strip() for model in chat_models]
else:
chat_models = default_offline_chat_models
# Add chat models
for chat_model in chat_models:
default_max_tokens = model_to_prompt_size.get(chat_model)
default_tokenizer = model_to_tokenizer.get(chat_model)
ChatModelOptions.objects.create(
chat_model=chat_model,
model_type=ChatModelOptions.ModelType.OFFLINE,
max_prompt_size=default_max_tokens,
tokenizer=default_tokenizer,
)
chat_models_configured = ChatModelOptions.objects.count()
# Explicitly set default chat model
if chat_models_configured > 0:
default_chat_model_name = ChatModelOptions.objects.first().chat_model
# If there are multiple chat models, ask the user to choose the default chat model
if chat_models_configured > 1 and interactive:
user_chat_model_name = input(
f"Enter the default chat model to use (default: {default_chat_model_name}): "
)
else:
user_chat_model_name = None
# If the user's choice is valid, set it as the default chat model
if user_chat_model_name and ChatModelOptions.objects.filter(chat_model=user_chat_model_name).exists():
default_chat_model_name = user_chat_model_name
# Create a server chat settings object with the default chat model
default_chat_model = ChatModelOptions.objects.filter(chat_model=default_chat_model_name).first()
ServerChatSettings.objects.create(chat_default=default_chat_model)
logger.info("🗣️ Chat model configuration complete")
# Set up offline speech to text model
use_offline_speech2text_model = "n" if not interactive else input("Use offline speech to text model? (y/n): ")
if use_offline_speech2text_model == "y":
logger.info("🗣️ Setting up offline speech to text model")
@ -163,7 +251,8 @@ def initialization(interactive: bool = True):
try:
_create_chat_configuration()
break
# Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
# Some environments don't support interactive input. We catch the exception and return if that's the case.
# The admin can still configure their settings from the admin page.
except EOFError:
return
except Exception as e:

View file

@ -19,12 +19,12 @@ from khoj.processor.conversation.offline.chat_model import (
from khoj.processor.conversation.offline.utils import download_model
from khoj.processor.conversation.utils import message_to_log
from khoj.routers.helpers import aget_relevant_output_modes
from khoj.utils.constants import default_offline_chat_model
from khoj.utils.constants import default_offline_chat_models
@pytest.fixture(scope="session")
def loaded_model():
return download_model(default_offline_chat_model, max_tokens=5000)
return download_model(default_offline_chat_models[0], max_tokens=5000)
freezegun.configure(extend_ignore_list=["transformers"])