khoj/tests/helpers.py
Debanjum Singh Solanky 8ca39a436c Use llama.cpp for offline chat models
- Benefits of moving to llama-cpp-python from gpt4all:
  - Support for all GGUF format chat models
  - Support for AMD, Nvidia, Mac, Vulcan GPU machines (instead of just Vulcan, Mac)
  - Supports models with more capabilities like tools, schema
    enforcement, speculative ddecoding, image gen etc.
- Upgrade default chat model, prompt size, tokenizer for new supported
  chat models

- Load offline chat model when present on disk without requiring internet
  - Load model onto GPU if not disabled and device has GPU
  - Load model onto CPU if loading model onto GPU fails
  - Create helper function to check and load model from disk, when model
    glob is present on disk.

    `Llama.from_pretrained' needs internet to get repo info from
    HuggingFace. This isn't required, if the model is already downloaded

    Didn't find any existing HF or llama.cpp method that looked for model
    glob on disk without internet
2024-03-26 22:33:01 +05:30

95 lines
2.3 KiB
Python

import os
from datetime import datetime
import factory
from django.utils.timezone import make_aware
from khoj.database.models import (
ChatModelOptions,
Conversation,
KhojApiUser,
KhojUser,
OfflineChatProcessorConversationConfig,
OpenAIProcessorConversationConfig,
SearchModelConfig,
Subscription,
UserConversationConfig,
)
class UserFactory(factory.django.DjangoModelFactory):
class Meta:
model = KhojUser
username = factory.Faker("name")
email = factory.Faker("email")
password = factory.Faker("password")
uuid = factory.Faker("uuid4")
class ApiUserFactory(factory.django.DjangoModelFactory):
class Meta:
model = KhojApiUser
user = None
name = factory.Faker("name")
token = factory.Faker("password")
class ChatModelOptionsFactory(factory.django.DjangoModelFactory):
class Meta:
model = ChatModelOptions
max_prompt_size = 3500
tokenizer = None
chat_model = "NousResearch/Hermes-2-Pro-Mistral-7B-GGUF"
model_type = "offline"
class UserConversationProcessorConfigFactory(factory.django.DjangoModelFactory):
class Meta:
model = UserConversationConfig
user = factory.SubFactory(UserFactory)
setting = factory.SubFactory(ChatModelOptionsFactory)
class OfflineChatProcessorConversationConfigFactory(factory.django.DjangoModelFactory):
class Meta:
model = OfflineChatProcessorConversationConfig
enabled = True
class OpenAIProcessorConversationConfigFactory(factory.django.DjangoModelFactory):
class Meta:
model = OpenAIProcessorConversationConfig
api_key = os.getenv("OPENAI_API_KEY")
class ConversationFactory(factory.django.DjangoModelFactory):
class Meta:
model = Conversation
user = factory.SubFactory(UserFactory)
class SearchModelFactory(factory.django.DjangoModelFactory):
class Meta:
model = SearchModelConfig
name = "default"
model_type = "text"
bi_encoder = "thenlper/gte-small"
cross_encoder = "cross-encoder/ms-marco-MiniLM-L-6-v2"
class SubscriptionFactory(factory.django.DjangoModelFactory):
class Meta:
model = Subscription
user = factory.SubFactory(UserFactory)
type = "standard"
is_recurring = False
renewal_date = make_aware(datetime.strptime("2100-04-01", "%Y-%m-%d"))