mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Upgrade to latest GPT4All. Use Mistral as default offline chat model
GPT4all now supports gguf llama.cpp chat models. Latest GPT4All (+mistral) performs much at least 3x faster. On Macbook Pro at ~10s response start time vs 30s-120s earlier. Mistral is also a better chat model, although it hallucinates more than llama-2
This commit is contained in:
parent
6dc0df3afb
commit
0f1ebcae18
10 changed files with 84 additions and 11 deletions
|
@ -60,8 +60,8 @@ dependencies = [
|
||||||
"bs4 >= 0.0.1",
|
"bs4 >= 0.0.1",
|
||||||
"anyio == 3.7.1",
|
"anyio == 3.7.1",
|
||||||
"pymupdf >= 1.23.3",
|
"pymupdf >= 1.23.3",
|
||||||
"gpt4all == 1.0.12; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
"gpt4all >= 2.0.0; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
||||||
"gpt4all == 1.0.12; platform_system == 'Windows' or platform_system == 'Darwin'",
|
"gpt4all >= 2.0.0; platform_system == 'Windows' or platform_system == 'Darwin'",
|
||||||
]
|
]
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
|
|
||||||
|
|
69
src/khoj/migrations/migrate_offline_chat_default_model.py
Normal file
69
src/khoj/migrations/migrate_offline_chat_default_model.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
"""
|
||||||
|
Current format of khoj.yml
|
||||||
|
---
|
||||||
|
app:
|
||||||
|
...
|
||||||
|
content-type:
|
||||||
|
...
|
||||||
|
processor:
|
||||||
|
conversation:
|
||||||
|
offline-chat:
|
||||||
|
enable-offline-chat: false
|
||||||
|
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
||||||
|
...
|
||||||
|
search-type:
|
||||||
|
...
|
||||||
|
|
||||||
|
New format of khoj.yml
|
||||||
|
---
|
||||||
|
app:
|
||||||
|
...
|
||||||
|
content-type:
|
||||||
|
...
|
||||||
|
processor:
|
||||||
|
conversation:
|
||||||
|
offline-chat:
|
||||||
|
enable-offline-chat: false
|
||||||
|
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||||
|
...
|
||||||
|
search-type:
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
|
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def migrate_offline_chat_default_model(args):
|
||||||
|
schema_version = "0.12.4"
|
||||||
|
raw_config = load_config_from_file(args.config_file)
|
||||||
|
previous_version = raw_config.get("version")
|
||||||
|
|
||||||
|
if "processor" not in raw_config:
|
||||||
|
return args
|
||||||
|
if raw_config["processor"] is None:
|
||||||
|
return args
|
||||||
|
if "conversation" not in raw_config["processor"]:
|
||||||
|
return args
|
||||||
|
if "offline-chat" not in raw_config["processor"]["conversation"]:
|
||||||
|
return args
|
||||||
|
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
|
||||||
|
return args
|
||||||
|
|
||||||
|
if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
|
||||||
|
logger.info(
|
||||||
|
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
|
||||||
|
)
|
||||||
|
raw_config["version"] = schema_version
|
||||||
|
|
||||||
|
# Update offline chat model to mistral in GGUF format to use latest GPT4All
|
||||||
|
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
|
||||||
|
if offline_chat_model.endswith(".bin"):
|
||||||
|
raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||||
|
|
||||||
|
save_config_to_file(raw_config, args.config_file)
|
||||||
|
return args
|
|
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def extract_questions_offline(
|
def extract_questions_offline(
|
||||||
text: str,
|
text: str,
|
||||||
model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
|
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||||
loaded_model: Union[Any, None] = None,
|
loaded_model: Union[Any, None] = None,
|
||||||
conversation_log={},
|
conversation_log={},
|
||||||
use_history: bool = True,
|
use_history: bool = True,
|
||||||
|
@ -123,7 +123,7 @@ def converse_offline(
|
||||||
references,
|
references,
|
||||||
user_query,
|
user_query,
|
||||||
conversation_log={},
|
conversation_log={},
|
||||||
model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
|
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||||
loaded_model: Union[Any, None] = None,
|
loaded_model: Union[Any, None] = None,
|
||||||
completion_func=None,
|
completion_func=None,
|
||||||
conversation_command=ConversationCommand.Default,
|
conversation_command=ConversationCommand.Default,
|
||||||
|
|
|
@ -14,9 +14,9 @@ def download_model(model_name: str):
|
||||||
# Use GPU for Chat Model, if available
|
# Use GPU for Chat Model, if available
|
||||||
try:
|
try:
|
||||||
model = GPT4All(model_name=model_name, device="gpu")
|
model = GPT4All(model_name=model_name, device="gpu")
|
||||||
logger.debug("Loaded chat model to GPU.")
|
logger.debug(f"Loaded {model_name} chat model to GPU.")
|
||||||
except ValueError:
|
except ValueError:
|
||||||
model = GPT4All(model_name=model_name)
|
model = GPT4All(model_name=model_name)
|
||||||
logger.debug("Loaded chat model to CPU.")
|
logger.debug(f"Loaded {model_name} chat model to CPU.")
|
||||||
|
|
||||||
return model
|
return model
|
||||||
|
|
|
@ -20,9 +20,11 @@ model_to_prompt_size = {
|
||||||
"gpt-4": 8192,
|
"gpt-4": 8192,
|
||||||
"llama-2-7b-chat.ggmlv3.q4_0.bin": 1548,
|
"llama-2-7b-chat.ggmlv3.q4_0.bin": 1548,
|
||||||
"gpt-3.5-turbo-16k": 15000,
|
"gpt-3.5-turbo-16k": 15000,
|
||||||
|
"mistral-7b-instruct-v0.1.Q4_0.gguf": 1548,
|
||||||
}
|
}
|
||||||
model_to_tokenizer = {
|
model_to_tokenizer = {
|
||||||
"llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer",
|
"llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer",
|
||||||
|
"mistral-7b-instruct-v0.1.Q4_0.gguf": "mistralai/Mistral-7B-Instruct-v0.1",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,7 @@ from khoj.migrations.migrate_version import migrate_config_to_version
|
||||||
from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
|
from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
|
||||||
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
||||||
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
||||||
|
from khoj.migrations.migrate_offline_chat_default_model import migrate_offline_chat_default_model
|
||||||
|
|
||||||
|
|
||||||
def cli(args=None):
|
def cli(args=None):
|
||||||
|
@ -61,6 +62,7 @@ def run_migrations(args):
|
||||||
migrate_processor_conversation_schema,
|
migrate_processor_conversation_schema,
|
||||||
migrate_offline_model,
|
migrate_offline_model,
|
||||||
migrate_offline_chat_schema,
|
migrate_offline_chat_schema,
|
||||||
|
migrate_offline_chat_default_model,
|
||||||
]
|
]
|
||||||
for migration in migrations:
|
for migration in migrations:
|
||||||
args = migration(args)
|
args = migration(args)
|
||||||
|
|
|
@ -55,7 +55,7 @@ empty_config = {
|
||||||
},
|
},
|
||||||
"offline-chat": {
|
"offline-chat": {
|
||||||
"enable-offline-chat": False,
|
"enable-offline-chat": False,
|
||||||
"chat-model": "llama-2-7b-chat.ggmlv3.q4_0.bin",
|
"chat-model": "mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||||
},
|
},
|
||||||
"tokenizer": None,
|
"tokenizer": None,
|
||||||
"max-prompt-size": None,
|
"max-prompt-size": None,
|
||||||
|
@ -132,7 +132,7 @@ default_config = {
|
||||||
},
|
},
|
||||||
"offline-chat": {
|
"offline-chat": {
|
||||||
"enable-offline-chat": False,
|
"enable-offline-chat": False,
|
||||||
"chat-model": "llama-2-7b-chat.ggmlv3.q4_0.bin",
|
"chat-model": "mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||||
},
|
},
|
||||||
"tokenizer": None,
|
"tokenizer": None,
|
||||||
"max-prompt-size": None,
|
"max-prompt-size": None,
|
||||||
|
|
|
@ -93,7 +93,7 @@ class OpenAIProcessorConfig(ConfigBase):
|
||||||
|
|
||||||
class OfflineChatProcessorConfig(ConfigBase):
|
class OfflineChatProcessorConfig(ConfigBase):
|
||||||
enable_offline_chat: Optional[bool] = False
|
enable_offline_chat: Optional[bool] = False
|
||||||
chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
chat_model: Optional[str] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||||
|
|
||||||
|
|
||||||
class ConversationProcessorConfig(ConfigBase):
|
class ConversationProcessorConfig(ConfigBase):
|
||||||
|
|
|
@ -206,7 +206,7 @@ def processor_config_offline_chat(tmp_path_factory):
|
||||||
|
|
||||||
# Setup conversation processor
|
# Setup conversation processor
|
||||||
processor_config = ProcessorConfig()
|
processor_config = ProcessorConfig()
|
||||||
offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True)
|
offline_chat = OfflineChatProcessorConfig(enable_offline_chat=True, chat_model="mistral-7b-instruct-v0.1.Q4_0.gguf")
|
||||||
processor_config.conversation = ConversationProcessorConfig(
|
processor_config.conversation = ConversationProcessorConfig(
|
||||||
offline_chat=offline_chat,
|
offline_chat=offline_chat,
|
||||||
conversation_logfile=processor_dir.joinpath("conversation_logs.json"),
|
conversation_logfile=processor_dir.joinpath("conversation_logs.json"),
|
||||||
|
|
|
@ -24,7 +24,7 @@ from khoj.processor.conversation.gpt4all.utils import download_model
|
||||||
|
|
||||||
from khoj.processor.conversation.utils import message_to_log
|
from khoj.processor.conversation.utils import message_to_log
|
||||||
|
|
||||||
MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
MODEL_NAME = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
|
|
Loading…
Reference in a new issue