Bump default max prompt size for commercial chat models
Some checks are pending
dockerize / Publish Khoj Docker Images (push) Waiting to run
dockerize / manifest (push) Blocked by required conditions
build and deploy github pages for documentation / deploy (push) Waiting to run
pre-commit / Setup Application and Lint (push) Waiting to run
pypi / Publish Python Package to PyPI (push) Waiting to run
test / Run Tests (push) Waiting to run

This commit is contained in:
Debanjum 2024-12-21 17:31:05 -08:00
parent 37ae48d9cf
commit 8d129c4675

View file

@ -34,7 +34,6 @@ from khoj.search_filter.word_filter import WordFilter
from khoj.utils import state from khoj.utils import state
from khoj.utils.helpers import ( from khoj.utils.helpers import (
ConversationCommand, ConversationCommand,
in_debug_mode,
is_none_or_empty, is_none_or_empty,
is_promptrace_enabled, is_promptrace_enabled,
merge_dicts, merge_dicts,
@ -47,28 +46,27 @@ logger = logging.getLogger(__name__)
try: try:
from git import Repo from git import Repo
except ImportError: except ImportError:
if in_debug_mode(): if is_promptrace_enabled():
logger.warning("GitPython not installed. `pip install gitpython` to enable prompt tracer.") logger.warning("GitPython not installed. `pip install gitpython` to use prompt tracer.")
model_to_prompt_size = { model_to_prompt_size = {
# OpenAI Models # OpenAI Models
"gpt-4o": 20000, "gpt-4o": 60000,
"gpt-4o-mini": 20000, "gpt-4o-mini": 60000,
"o1": 20000, "o1": 20000,
"o1-mini": 20000, "o1-mini": 60000,
# Google Models # Google Models
"gemini-1.5-flash": 20000, "gemini-1.5-flash": 60000,
"gemini-1.5-pro": 20000, "gemini-1.5-pro": 60000,
# Anthropic Models # Anthropic Models
"claude-3-5-sonnet-20241022": 20000, "claude-3-5-sonnet-20241022": 60000,
"claude-3-5-haiku-20241022": 20000, "claude-3-5-haiku-20241022": 60000,
# Offline Models # Offline Models
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000, "Qwen/Qwen2.5-14B-Instruct-GGUF": 20000,
"bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000, "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
"bartowski/Llama-3.2-3B-Instruct-GGUF": 20000, "bartowski/Llama-3.2-3B-Instruct-GGUF": 20000,
"bartowski/gemma-2-9b-it-GGUF": 6000, "bartowski/gemma-2-9b-it-GGUF": 6000,
"bartowski/gemma-2-2b-it-GGUF": 6000, "bartowski/gemma-2-2b-it-GGUF": 6000,
"Qwen/Qwen2.5-14B-Instruct-GGUF": 20000,
} }
model_to_tokenizer: Dict[str, str] = {} model_to_tokenizer: Dict[str, str] = {}