Default to gpt-4o-mini instead of gpt-3.5-turbo in tests, func args

GPT-4o-mini is cheaper, smarter and can hold more context than GPT-3.5-turbo. In production, we also default to gpt-4o-mini, so makes sense to upgrade defaults and tests to work with it
2024-11-23 15:38:55 +01:00 · 2024-08-22 19:04:49 -07:00 · 2024-08-22 19:04:49 -07:00 · 9986c183ea
commit 9986c183ea
parent 8a4c20d59a
5 changed files with 7 additions and 6 deletions
--- a/src/khoj/processor/conversation/openai/gpt.py
+++ b/src/khoj/processor/conversation/openai/gpt.py
@ -112,7 +112,7 @@ def converse(
    user_query,
    online_results: Optional[Dict[str, Dict]] = None,
    conversation_log={},
-    model: str = "gpt-3.5-turbo",
+    model: str = "gpt-4o-mini",
    api_key: Optional[str] = None,
    api_base_url: Optional[str] = None,
    temperature: float = 0.2,
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@ -23,6 +23,7 @@ model_to_prompt_size = {
    "gpt-3.5-turbo-0125": 12000,
    "gpt-4-0125-preview": 20000,
    "gpt-4-turbo-preview": 20000,
+    "gpt-4o-mini": 20000,
    "TheBloke/Mistral-7B-Instruct-v0.2-GGUF": 3500,
    "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF": 3500,
    "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF": 20000,
@ -136,7 +137,7 @@ def generate_chatml_messages_with_context(
    user_message,
    system_message=None,
    conversation_log={},
-    model_name="gpt-3.5-turbo",
+    model_name="gpt-4o-mini",
    loaded_model: Optional[Llama] = None,
    max_prompt_size=None,
    tokenizer_name=None,
--- a/src/khoj/utils/rawconfig.py
+++ b/src/khoj/utils/rawconfig.py
@ -89,7 +89,7 @@ class SearchConfig(ConfigBase):

 class OpenAIProcessorConfig(ConfigBase):
    api_key: str
-    chat_model: Optional[str] = "gpt-3.5-turbo"
+    chat_model: Optional[str] = "gpt-4o-mini"


 class OfflineChatProcessorConfig(ConfigBase):
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -192,7 +192,7 @@ def offline_agent():
@pytest.mark.django_db
@pytest.fixture
 def openai_agent():
-    chat_model = ChatModelOptionsFactory(chat_model="gpt-3.5-turbo", model_type="openai")
+    chat_model = ChatModelOptionsFactory(chat_model="gpt-4o-mini", model_type="openai")
    return Agent.objects.create(
        name="Accountant",
        chat_model=chat_model,
@ -301,7 +301,7 @@ def chat_client_builder(search_config, user, index_content=True, require_auth=Fa

    # Initialize Processor from Config
    if os.getenv("OPENAI_API_KEY"):
-        chat_model = ChatModelOptionsFactory(chat_model="gpt-3.5-turbo", model_type="openai")
+        chat_model = ChatModelOptionsFactory(chat_model="gpt-4o-mini", model_type="openai")
        chat_model.openai_config = OpenAIProcessorConversationConfigFactory()
        UserConversationProcessorConfigFactory(user=user, setting=chat_model)

--- a/tests/test_conversation_utils.py
+++ b/tests/test_conversation_utils.py
@ -6,7 +6,7 @@ from khoj.processor.conversation import utils

 class TestTruncateMessage:
    max_prompt_size = 10
-    model_name = "gpt-3.5-turbo"
+    model_name = "gpt-4o-mini"
    encoder = tiktoken.encoding_for_model(model_name)

    def test_truncate_message_all_small(self):