mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Intelligently initialize a decent default set of chat model options
Given the LLM landscape is rapidly changing, providing a good default set of options should help reduce decision fatigue to get started Improve initialization flow during first run - Set Google, Anthropic Chat models too Previously only Offline, Openai chat models could be set during init - Add multiple chat models for each LLM provider Interactively set a comma separated list of models for each provider - Auto add default chat models for each provider in non-interactive model if the {OPENAI,GEMINI,ANTHROPIC}_API_KEY env var is set - Do not ask for max_tokens, tokenizer for offline models during initialization. Use better defaults inferred in code instead - Explicitly set default chat model to use If unset, it implicitly defaults to using the first chat model. Make it explicit to reduce this confusion Resolves #882
This commit is contained in:
parent
f177723711
commit
91c76d4152
5 changed files with 185 additions and 73 deletions
|
@ -44,10 +44,19 @@ services:
|
|||
- KHOJ_DEBUG=False
|
||||
- KHOJ_ADMIN_EMAIL=username@example.com
|
||||
- KHOJ_ADMIN_PASSWORD=password
|
||||
# Uncomment the following lines to make your instance publicly accessible.
|
||||
# Replace the domain with your domain. Proceed with caution, especially if you are using anonymous mode.
|
||||
# Uncomment lines below to use chat models by each provider.
|
||||
# Ensure you set your provider specific API keys.
|
||||
# ---
|
||||
# - OPENAI_API_KEY=your_openai_api_key
|
||||
# - GEMINI_API_KEY=your_gemini_api_key
|
||||
# - ANTHROPIC_API_KEY=your_anthropic_api_key
|
||||
# Uncomment the necessary lines below to make your instance publicly accessible.
|
||||
# Replace the KHOJ_DOMAIN with either your domain or IP address (no http/https prefix).
|
||||
# Proceed with caution, especially if you are using anonymous mode.
|
||||
# ---
|
||||
# - KHOJ_NO_HTTPS=True
|
||||
# - KHOJ_DOMAIN=192.168.0.104
|
||||
# - KHOJ_DOMAIN=khoj.example.com
|
||||
command: --host="0.0.0.0" --port=42110 -vv --anonymous-mode --non-interactive
|
||||
|
||||
|
||||
|
|
|
@ -18,13 +18,20 @@ from khoj.utils.helpers import is_none_or_empty, merge_dicts
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
model_to_prompt_size = {
|
||||
# OpenAI Models
|
||||
"gpt-3.5-turbo": 12000,
|
||||
"gpt-3.5-turbo-0125": 12000,
|
||||
"gpt-4-0125-preview": 20000,
|
||||
"gpt-4-turbo-preview": 20000,
|
||||
"gpt-4o": 20000,
|
||||
"gpt-4o-mini": 20000,
|
||||
"o1-preview": 20000,
|
||||
"o1-mini": 20000,
|
||||
# Google Models
|
||||
"gemini-1.5-flash": 20000,
|
||||
"gemini-1.5-pro": 20000,
|
||||
# Anthropic Models
|
||||
"claude-3-5-sonnet-20240620": 20000,
|
||||
"claude-3-opus-20240229": 20000,
|
||||
# Offline Models
|
||||
"TheBloke/Mistral-7B-Instruct-v0.2-GGUF": 3500,
|
||||
"NousResearch/Hermes-2-Pro-Mistral-7B-GGUF": 3500,
|
||||
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
|
||||
|
@ -163,7 +170,7 @@ def generate_chatml_messages_with_context(
|
|||
if loaded_model:
|
||||
max_prompt_size = infer_max_tokens(loaded_model.n_ctx(), model_to_prompt_size.get(model_name, math.inf))
|
||||
else:
|
||||
max_prompt_size = model_to_prompt_size.get(model_name, 2000)
|
||||
max_prompt_size = model_to_prompt_size.get(model_name, 10000)
|
||||
|
||||
# Scale lookback turns proportional to max prompt size supported by model
|
||||
lookback_turns = max_prompt_size // 750
|
||||
|
|
|
@ -8,8 +8,15 @@ empty_escape_sequences = "\n|\r|\t| "
|
|||
app_env_filepath = "~/.khoj/env"
|
||||
telemetry_server = "https://khoj.beta.haletic.com/v1/telemetry"
|
||||
content_directory = "~/.khoj/content/"
|
||||
default_offline_chat_model = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF"
|
||||
default_online_chat_model = "gpt-4o-mini"
|
||||
default_offline_chat_models = [
|
||||
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF",
|
||||
"bartowski/gemma-2-9b-it-GGUF",
|
||||
"bartowski/gemma-2-2b-it-GGUF",
|
||||
"bartowski/Phi-3.5-mini-instruct-GGUF",
|
||||
]
|
||||
default_openai_chat_models = ["gpt-4o-mini", "gpt-4o"]
|
||||
default_gemini_chat_models = ["gemini-1.5-flash", "gemini-1.5-pro"]
|
||||
default_anthropic_chat_models = ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"]
|
||||
|
||||
empty_config = {
|
||||
"search-type": {
|
||||
|
|
|
@ -6,11 +6,17 @@ from khoj.database.models import (
|
|||
ChatModelOptions,
|
||||
KhojUser,
|
||||
OpenAIProcessorConversationConfig,
|
||||
ServerChatSettings,
|
||||
SpeechToTextModelOptions,
|
||||
TextToImageModelConfig,
|
||||
)
|
||||
from khoj.processor.conversation.utils import model_to_prompt_size, model_to_tokenizer
|
||||
from khoj.utils.constants import default_offline_chat_model, default_online_chat_model
|
||||
from khoj.utils.constants import (
|
||||
default_anthropic_chat_models,
|
||||
default_gemini_chat_models,
|
||||
default_offline_chat_models,
|
||||
default_openai_chat_models,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -32,78 +38,44 @@ def initialization(interactive: bool = True):
|
|||
|
||||
def _create_chat_configuration():
|
||||
logger.info(
|
||||
"🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
|
||||
"🗣️ Configure chat models available to your server. You can always update these at /server/admin using your admin account"
|
||||
)
|
||||
|
||||
try:
|
||||
use_offline_model = "y" if not interactive else input("Use offline chat model? (y/n): ")
|
||||
if use_offline_model == "y":
|
||||
logger.info("🗣️ Setting up offline chat model")
|
||||
|
||||
if interactive:
|
||||
offline_chat_model = input(
|
||||
f"Enter the offline chat model you want to use. See HuggingFace for available GGUF models (default: {default_offline_chat_model}): "
|
||||
)
|
||||
else:
|
||||
offline_chat_model = ""
|
||||
if offline_chat_model == "":
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=default_offline_chat_model, model_type=ChatModelOptions.ModelType.OFFLINE
|
||||
)
|
||||
else:
|
||||
default_max_tokens = model_to_prompt_size.get(offline_chat_model, 4000)
|
||||
max_tokens = input(
|
||||
f"Enter the maximum number of tokens to use for the offline chat model (default {default_max_tokens}):"
|
||||
)
|
||||
max_tokens = max_tokens or default_max_tokens
|
||||
|
||||
default_tokenizer = model_to_tokenizer.get(
|
||||
offline_chat_model, "hf-internal-testing/llama-tokenizer"
|
||||
)
|
||||
tokenizer = input(
|
||||
f"Enter the tokenizer to use for the offline chat model (default: {default_tokenizer}):"
|
||||
)
|
||||
tokenizer = tokenizer or default_tokenizer
|
||||
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=offline_chat_model,
|
||||
model_type=ChatModelOptions.ModelType.OFFLINE,
|
||||
max_prompt_size=max_tokens,
|
||||
tokenizer=tokenizer,
|
||||
)
|
||||
except ModuleNotFoundError as e:
|
||||
logger.warning("Offline models are not supported on this device.")
|
||||
|
||||
# Set up OpenAI's online models
|
||||
default_openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
default_use_openai_model = {True: "y", False: "n"}[default_openai_api_key != None]
|
||||
use_openai_model = default_use_openai_model if not interactive else input("Use OpenAI models? (y/n): ")
|
||||
if use_openai_model == "y":
|
||||
logger.info("🗣️ Setting up your OpenAI configuration")
|
||||
use_model_provider = default_use_openai_model if not interactive else input("Add OpenAI models? (y/n): ")
|
||||
if use_model_provider == "y":
|
||||
logger.info("️💬 Setting up your OpenAI configuration")
|
||||
if interactive:
|
||||
api_key = input(f"Enter your OpenAI API key (default: {default_openai_api_key}): ")
|
||||
user_api_key = input(f"Enter your OpenAI API key (default: {default_openai_api_key}): ")
|
||||
api_key = user_api_key if user_api_key != "" else default_openai_api_key
|
||||
else:
|
||||
api_key = default_openai_api_key
|
||||
OpenAIProcessorConversationConfig.objects.create(api_key=api_key)
|
||||
chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="OpenAI")
|
||||
|
||||
if interactive:
|
||||
openai_chat_model = input(
|
||||
f"Enter the OpenAI chat model you want to use (default: {default_online_chat_model}): "
|
||||
chat_model_names = input(
|
||||
f"Enter the OpenAI chat models you want to use (default: {','.join(default_openai_chat_models)}): "
|
||||
)
|
||||
openai_chat_model = openai_chat_model or default_online_chat_model
|
||||
chat_models = chat_model_names.split(",") if chat_model_names != "" else default_openai_chat_models
|
||||
chat_models = [model.strip() for model in chat_models]
|
||||
else:
|
||||
openai_chat_model = default_online_chat_model
|
||||
default_max_tokens = model_to_prompt_size.get(openai_chat_model, 10000)
|
||||
if interactive:
|
||||
max_tokens = input(
|
||||
f"Enter the maximum number of tokens to use for the OpenAI chat model (default: {default_max_tokens}): "
|
||||
)
|
||||
max_tokens = max_tokens or default_max_tokens
|
||||
else:
|
||||
max_tokens = default_max_tokens
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=openai_chat_model, model_type=ChatModelOptions.ModelType.OPENAI, max_prompt_size=max_tokens
|
||||
)
|
||||
chat_models = default_openai_chat_models
|
||||
|
||||
# Add OpenAI chat models
|
||||
for chat_model in chat_models:
|
||||
vision_enabled = chat_model in ["gpt-4o-mini", "gpt-4o"]
|
||||
default_max_tokens = model_to_prompt_size.get(chat_model)
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=chat_model,
|
||||
model_type=ChatModelOptions.ModelType.OPENAI,
|
||||
max_prompt_size=default_max_tokens,
|
||||
openai_config=chat_model_provider,
|
||||
vision_enabled=vision_enabled,
|
||||
)
|
||||
|
||||
# Add OpenAI speech to text model
|
||||
default_speech2text_model = "whisper-1"
|
||||
if interactive:
|
||||
openai_speech2text_model = input(
|
||||
|
@ -116,6 +88,7 @@ def initialization(interactive: bool = True):
|
|||
model_name=openai_speech2text_model, model_type=SpeechToTextModelOptions.ModelType.OPENAI
|
||||
)
|
||||
|
||||
# Add OpenAI text to image model
|
||||
default_text_to_image_model = "dall-e-3"
|
||||
if interactive:
|
||||
openai_text_to_image_model = input(
|
||||
|
@ -128,9 +101,124 @@ def initialization(interactive: bool = True):
|
|||
model_name=openai_text_to_image_model, model_type=TextToImageModelConfig.ModelType.OPENAI
|
||||
)
|
||||
|
||||
if use_offline_model == "y" or use_openai_model == "y":
|
||||
logger.info("🗣️ Chat model configuration complete")
|
||||
# Set up Google's Gemini online chat models
|
||||
default_gemini_api_key = os.getenv("GEMINI_API_KEY")
|
||||
default_use_gemini_model = {True: "y", False: "n"}[default_gemini_api_key != None]
|
||||
use_model_provider = default_use_gemini_model if not interactive else input("Add Google's chat models? (y/n): ")
|
||||
if use_model_provider == "y":
|
||||
logger.info("️💬 Setting up your Google Gemini configuration")
|
||||
if interactive:
|
||||
user_api_key = input(f"Enter your Gemini API key (default: {default_gemini_api_key}): ")
|
||||
api_key = user_api_key if user_api_key != "" else default_gemini_api_key
|
||||
else:
|
||||
api_key = default_gemini_api_key
|
||||
chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="Gemini")
|
||||
|
||||
if interactive:
|
||||
chat_model_names = input(
|
||||
f"Enter the Gemini chat models you want to use (default: {','.join(default_gemini_chat_models)}): "
|
||||
)
|
||||
chat_models = chat_model_names.split(",") if chat_model_names != "" else default_gemini_chat_models
|
||||
chat_models = [model.strip() for model in chat_models]
|
||||
else:
|
||||
chat_models = default_gemini_chat_models
|
||||
|
||||
# Add Gemini chat models
|
||||
for chat_model in chat_models:
|
||||
default_max_tokens = model_to_prompt_size.get(chat_model)
|
||||
vision_enabled = False
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=chat_model,
|
||||
model_type=ChatModelOptions.ModelType.GOOGLE,
|
||||
max_prompt_size=default_max_tokens,
|
||||
openai_config=chat_model_provider,
|
||||
vision_enabled=False,
|
||||
)
|
||||
|
||||
# Set up Anthropic's online chat models
|
||||
default_anthropic_api_key = os.getenv("ANTHROPIC_API_KEY")
|
||||
default_use_anthropic_model = {True: "y", False: "n"}[default_anthropic_api_key != None]
|
||||
use_model_provider = (
|
||||
default_use_anthropic_model if not interactive else input("Add Anthropic's chat models? (y/n): ")
|
||||
)
|
||||
if use_model_provider == "y":
|
||||
logger.info("️💬 Setting up your Anthropic configuration")
|
||||
if interactive:
|
||||
user_api_key = input(f"Enter your Anthropic API key (default: {default_anthropic_api_key}): ")
|
||||
api_key = user_api_key if user_api_key != "" else default_anthropic_api_key
|
||||
else:
|
||||
api_key = default_anthropic_api_key
|
||||
chat_model_provider = OpenAIProcessorConversationConfig.objects.create(api_key=api_key, name="Anthropic")
|
||||
|
||||
if interactive:
|
||||
chat_model_names = input(
|
||||
f"Enter the Anthropic chat models you want to use (default: {','.join(default_anthropic_chat_models)}): "
|
||||
)
|
||||
chat_models = chat_model_names.split(",") if chat_model_names != "" else default_anthropic_chat_models
|
||||
chat_models = [model.strip() for model in chat_models]
|
||||
else:
|
||||
chat_models = default_anthropic_chat_models
|
||||
|
||||
# Add Anthropic chat models
|
||||
for chat_model in chat_models:
|
||||
vision_enabled = False
|
||||
default_max_tokens = model_to_prompt_size.get(chat_model)
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=chat_model,
|
||||
model_type=ChatModelOptions.ModelType.ANTHROPIC,
|
||||
max_prompt_size=default_max_tokens,
|
||||
openai_config=chat_model_provider,
|
||||
vision_enabled=False,
|
||||
)
|
||||
|
||||
# Set up offline chat models
|
||||
use_model_provider = "y" if not interactive else input("Add Offline chat models? (y/n): ")
|
||||
if use_model_provider == "y":
|
||||
logger.info("️💬 Setting up Offline chat models")
|
||||
|
||||
if interactive:
|
||||
chat_model_names = input(
|
||||
f"Enter the offline chat models you want to use. See HuggingFace for available GGUF models (default: {','.join(default_offline_chat_models)}): "
|
||||
)
|
||||
chat_models = chat_model_names.split(",") if chat_model_names != "" else default_offline_chat_models
|
||||
chat_models = [model.strip() for model in chat_models]
|
||||
else:
|
||||
chat_models = default_offline_chat_models
|
||||
|
||||
# Add chat models
|
||||
for chat_model in chat_models:
|
||||
default_max_tokens = model_to_prompt_size.get(chat_model)
|
||||
default_tokenizer = model_to_tokenizer.get(chat_model)
|
||||
ChatModelOptions.objects.create(
|
||||
chat_model=chat_model,
|
||||
model_type=ChatModelOptions.ModelType.OFFLINE,
|
||||
max_prompt_size=default_max_tokens,
|
||||
tokenizer=default_tokenizer,
|
||||
)
|
||||
|
||||
chat_models_configured = ChatModelOptions.objects.count()
|
||||
|
||||
# Explicitly set default chat model
|
||||
if chat_models_configured > 0:
|
||||
default_chat_model_name = ChatModelOptions.objects.first().chat_model
|
||||
# If there are multiple chat models, ask the user to choose the default chat model
|
||||
if chat_models_configured > 1 and interactive:
|
||||
user_chat_model_name = input(
|
||||
f"Enter the default chat model to use (default: {default_chat_model_name}): "
|
||||
)
|
||||
else:
|
||||
user_chat_model_name = None
|
||||
|
||||
# If the user's choice is valid, set it as the default chat model
|
||||
if user_chat_model_name and ChatModelOptions.objects.filter(chat_model=user_chat_model_name).exists():
|
||||
default_chat_model_name = user_chat_model_name
|
||||
|
||||
# Create a server chat settings object with the default chat model
|
||||
default_chat_model = ChatModelOptions.objects.filter(chat_model=default_chat_model_name).first()
|
||||
ServerChatSettings.objects.create(chat_default=default_chat_model)
|
||||
logger.info("🗣️ Chat model configuration complete")
|
||||
|
||||
# Set up offline speech to text model
|
||||
use_offline_speech2text_model = "n" if not interactive else input("Use offline speech to text model? (y/n): ")
|
||||
if use_offline_speech2text_model == "y":
|
||||
logger.info("🗣️ Setting up offline speech to text model")
|
||||
|
@ -163,7 +251,8 @@ def initialization(interactive: bool = True):
|
|||
try:
|
||||
_create_chat_configuration()
|
||||
break
|
||||
# Some environments don't support interactive input. We catch the exception and return if that's the case. The admin can still configure their settings from the admin page.
|
||||
# Some environments don't support interactive input. We catch the exception and return if that's the case.
|
||||
# The admin can still configure their settings from the admin page.
|
||||
except EOFError:
|
||||
return
|
||||
except Exception as e:
|
||||
|
|
|
@ -19,12 +19,12 @@ from khoj.processor.conversation.offline.chat_model import (
|
|||
from khoj.processor.conversation.offline.utils import download_model
|
||||
from khoj.processor.conversation.utils import message_to_log
|
||||
from khoj.routers.helpers import aget_relevant_output_modes
|
||||
from khoj.utils.constants import default_offline_chat_model
|
||||
from khoj.utils.constants import default_offline_chat_models
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
def loaded_model():
|
||||
return download_model(default_offline_chat_model, max_tokens=5000)
|
||||
return download_model(default_offline_chat_models[0], max_tokens=5000)
|
||||
|
||||
|
||||
freezegun.configure(extend_ignore_list=["transformers"])
|
||||
|
|
Loading…
Reference in a new issue