From 2a76c69d0db16d46fa858bf206746237138a0920 Mon Sep 17 00:00:00 2001 From: Debanjum Date: Mon, 18 Nov 2024 14:19:09 -0800 Subject: [PATCH] Run online, offine chat actor, director tests for any supported provider - Previously online chat actors, director tests only worked with openai. This change allows running them for any supported onlnie provider including Google, Anthropic and Openai. - Enable online/offline chat actor, director in two ways: 1. Explicitly setting KHOJ_TEST_CHAT_PROVIDER environment variable to google, anthropic, openai, offline 2. Implicitly by the first API key found from openai, google or anthropic. - Default offline chat provider to use Llama 3.1 3B for faster, lower compute test runs --- tests/conftest.py | 22 +++++++++--- tests/helpers.py | 36 ++++++++++++++++--- tests/test_offline_chat_actors.py | 5 ++- tests/test_offline_chat_director.py | 6 ++-- ...t_actors.py => test_online_chat_actors.py} | 15 ++++---- ...rector.py => test_online_chat_director.py} | 7 ++-- 6 files changed, 64 insertions(+), 27 deletions(-) rename tests/{test_openai_chat_actors.py => test_online_chat_actors.py} (98%) rename tests/{test_openai_chat_director.py => test_online_chat_director.py} (99%) diff --git a/tests/conftest.py b/tests/conftest.py index b91af758..7c233594 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ from khoj.configure import ( ) from khoj.database.models import ( Agent, + ChatModelOptions, GithubConfig, GithubRepoConfig, KhojApiUser, @@ -39,6 +40,8 @@ from tests.helpers import ( SubscriptionFactory, UserConversationProcessorConfigFactory, UserFactory, + get_chat_api_key, + get_chat_provider, ) @@ -307,10 +310,19 @@ def chat_client_builder(search_config, user, index_content=True, require_auth=Fa configure_content(user, all_files) # Initialize Processor from Config - if os.getenv("OPENAI_API_KEY"): - chat_model = ChatModelOptionsFactory(chat_model="gpt-4o-mini", model_type="openai") - chat_model.openai_config = OpenAIProcessorConversationConfigFactory() - UserConversationProcessorConfigFactory(user=user, setting=chat_model) + chat_provider = get_chat_provider() + online_chat_model: ChatModelOptionsFactory = None + if chat_provider == ChatModelOptions.ModelType.OPENAI: + online_chat_model = ChatModelOptionsFactory(chat_model="gpt-4o-mini", model_type="openai") + elif chat_provider == ChatModelOptions.ModelType.GOOGLE: + online_chat_model = ChatModelOptionsFactory(chat_model="gemini-1.5-flash", model_type="google") + elif chat_provider == ChatModelOptions.ModelType.ANTHROPIC: + online_chat_model = ChatModelOptionsFactory(chat_model="claude-3-5-haiku-20241022", model_type="anthropic") + if online_chat_model: + online_chat_model.openai_config = OpenAIProcessorConversationConfigFactory( + api_key=get_chat_api_key(chat_provider) + ) + UserConversationProcessorConfigFactory(user=user, setting=online_chat_model) state.anonymous_mode = not require_auth @@ -385,7 +397,7 @@ def client_offline_chat(search_config: SearchConfig, default_user2: KhojUser): # Initialize Processor from Config ChatModelOptionsFactory( - chat_model="bartowski/Meta-Llama-3.1-8B-Instruct-GGUF", + chat_model="bartowski/Meta-Llama-3.1-3B-Instruct-GGUF", tokenizer=None, max_prompt_size=None, model_type="offline", diff --git a/tests/helpers.py b/tests/helpers.py index ae5c7779..6c70536e 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -17,6 +17,32 @@ from khoj.database.models import ( ) +def get_chat_provider(default: ChatModelOptions.ModelType | None = ChatModelOptions.ModelType.OFFLINE): + provider = os.getenv("KHOJ_TEST_CHAT_PROVIDER") + if provider and provider in ChatModelOptions.ModelType: + return ChatModelOptions.ModelType(provider) + elif os.getenv("OPENAI_API_KEY"): + return ChatModelOptions.ModelType.OPENAI + elif os.getenv("GEMINI_API_KEY"): + return ChatModelOptions.ModelType.GOOGLE + elif os.getenv("ANTHROPIC_API_KEY"): + return ChatModelOptions.ModelType.ANTHROPIC + else: + return default + + +def get_chat_api_key(provider: ChatModelOptions.ModelType = None): + provider = provider or get_chat_provider() + if provider == ChatModelOptions.ModelType.OPENAI: + return os.getenv("OPENAI_API_KEY") + elif provider == ChatModelOptions.ModelType.GOOGLE: + return os.getenv("GEMINI_API_KEY") + elif provider == ChatModelOptions.ModelType.ANTHROPIC: + return os.getenv("ANTHROPIC_API_KEY") + else: + return os.getenv("OPENAI_API_KEY") or os.getenv("GEMINI_API_KEY") or os.getenv("ANTHROPIC_API_KEY") + + class UserFactory(factory.django.DjangoModelFactory): class Meta: model = KhojUser @@ -40,19 +66,19 @@ class OpenAIProcessorConversationConfigFactory(factory.django.DjangoModelFactory class Meta: model = OpenAIProcessorConversationConfig - api_key = os.getenv("OPENAI_API_KEY") + api_key = get_chat_api_key() class ChatModelOptionsFactory(factory.django.DjangoModelFactory): class Meta: model = ChatModelOptions - max_prompt_size = 3500 + max_prompt_size = 20000 tokenizer = None - chat_model = "bartowski/Meta-Llama-3.1-8B-Instruct-GGUF" - model_type = "offline" + chat_model = "bartowski/Meta-Llama-3.2-3B-Instruct-GGUF" + model_type = get_chat_provider() openai_config = factory.LazyAttribute( - lambda obj: OpenAIProcessorConversationConfigFactory() if os.getenv("OPENAI_API_KEY") else None + lambda obj: OpenAIProcessorConversationConfigFactory() if get_chat_api_key() else None ) diff --git a/tests/test_offline_chat_actors.py b/tests/test_offline_chat_actors.py index 83768834..de0bbab6 100644 --- a/tests/test_offline_chat_actors.py +++ b/tests/test_offline_chat_actors.py @@ -2,7 +2,10 @@ from datetime import datetime import pytest -SKIP_TESTS = True +from khoj.database.models import ChatModelOptions +from tests.helpers import get_chat_provider + +SKIP_TESTS = get_chat_provider(default=None) != ChatModelOptions.ModelType.OFFLINE pytestmark = pytest.mark.skipif( SKIP_TESTS, reason="Disable in CI to avoid long test runs.", diff --git a/tests/test_offline_chat_director.py b/tests/test_offline_chat_director.py index aa9bd5d1..bb57d1b6 100644 --- a/tests/test_offline_chat_director.py +++ b/tests/test_offline_chat_director.py @@ -4,13 +4,13 @@ import pytest from faker import Faker from freezegun import freeze_time -from khoj.database.models import Agent, Entry, KhojUser +from khoj.database.models import Agent, ChatModelOptions, Entry, KhojUser from khoj.processor.conversation import prompts from khoj.processor.conversation.utils import message_to_log from khoj.routers.helpers import aget_data_sources_and_output_format -from tests.helpers import ConversationFactory +from tests.helpers import ConversationFactory, get_chat_provider -SKIP_TESTS = True +SKIP_TESTS = get_chat_provider(default=None) != ChatModelOptions.ModelType.OFFLINE pytestmark = pytest.mark.skipif( SKIP_TESTS, reason="Disable in CI to avoid long test runs.", diff --git a/tests/test_openai_chat_actors.py b/tests/test_online_chat_actors.py similarity index 98% rename from tests/test_openai_chat_actors.py rename to tests/test_online_chat_actors.py index 87533ab4..449ed9b1 100644 --- a/tests/test_openai_chat_actors.py +++ b/tests/test_online_chat_actors.py @@ -14,14 +14,13 @@ from khoj.routers.helpers import ( should_notify, ) from khoj.utils.helpers import ConversationCommand -from khoj.utils.rawconfig import LocationData -from tests.conftest import default_user2 +from tests.helpers import get_chat_api_key # Initialize variables for tests -api_key = os.getenv("OPENAI_API_KEY") +api_key = get_chat_api_key() if api_key is None: pytest.skip( - reason="Set OPENAI_API_KEY environment variable to run tests below. Get OpenAI API key from https://platform.openai.com/account/api-keys", + reason="Set OPENAI_API_KEY, GEMINI_API_KEY or ANTHROPIC_API_KEY environment variable to run tests below.", allow_module_level=True, ) @@ -42,7 +41,6 @@ def test_extract_question_with_date_filter_from_relative_day(): ("dt>='1984-04-01'", "dt<'1984-04-02'"), ("dt>'1984-03-31'", "dt<'1984-04-02'"), ] - assert len(response) == 1 assert any([start in response[0] and end in response[0] for start, end in expected_responses]), ( "Expected date filter to limit to 1st April 1984 in response but got: " + response[0] ) @@ -105,9 +103,9 @@ def test_extract_multiple_implicit_questions_from_message(): expected_responses = [ ("morpheus", "neo"), ] - assert len(response) == 2 + assert len(response) > 1 assert any([start in response[0].lower() and end in response[1].lower() for start, end in expected_responses]), ( - "Expected two search queries in response but got: " + response[0] + "Expected more than one search query in response but got: " + response[0] ) @@ -153,8 +151,7 @@ def test_generate_search_query_using_question_and_answer_from_chat_history(): response = extract_questions("Who is their father?", conversation_log=populate_chat_history(message_list)) # Assert - assert len(response) == 1 - assert "Leia" in response[0] and "Luke" in response[0] + assert any(["Leia" in response or "Luke" in response for response in response]) # ---------------------------------------------------------------------------------------------------- diff --git a/tests/test_openai_chat_director.py b/tests/test_online_chat_director.py similarity index 99% rename from tests/test_openai_chat_director.py rename to tests/test_online_chat_director.py index 49cda98b..317384cb 100644 --- a/tests/test_openai_chat_director.py +++ b/tests/test_online_chat_director.py @@ -1,18 +1,17 @@ import os import urllib.parse -from urllib.parse import quote import pytest from freezegun import freeze_time -from khoj.database.models import Agent, Entry, KhojUser, LocalPdfConfig +from khoj.database.models import Agent, Entry, KhojUser from khoj.processor.conversation import prompts from khoj.processor.conversation.utils import message_to_log from khoj.routers.helpers import aget_data_sources_and_output_format -from tests.helpers import ConversationFactory +from tests.helpers import ConversationFactory, get_chat_api_key # Initialize variables for tests -api_key = os.getenv("OPENAI_API_KEY") +api_key = get_chat_api_key() if api_key is None: pytest.skip( reason="Set OPENAI_API_KEY environment variable to run tests below. Get OpenAI API key from https://platform.openai.com/account/api-keys",