mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Merge branch 'master' of github.com:khoj-ai/khoj into features/multi-user-support-khoj
Merge changes to use latest GPT4All with GPU, GGUF model support into khoj multi-user support rearchitecture branch
This commit is contained in:
commit
345856e7be
16 changed files with 107 additions and 18 deletions
|
@ -10,7 +10,8 @@
|
|||
Offline chat stays completely private and works without internet. But it is slower, lower quality and more compute intensive.
|
||||
|
||||
> **System Requirements**:
|
||||
> - Machine with at least **6 GB of RAM** and **4 GB of Disk** available
|
||||
> - Minimum 8 GB RAM. Recommend **16Gb VRAM**
|
||||
> - Minimum **5 GB of Disk** available
|
||||
> - A CPU supporting [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) is required
|
||||
> - A Mac M1+ or [Vulcan supported GPU](https://vulkan.gpuinfo.org/) should significantly speed up chat response times
|
||||
|
||||
|
|
|
@ -62,8 +62,8 @@ dependencies = [
|
|||
"pymupdf >= 1.23.5",
|
||||
"django == 4.2.5",
|
||||
"authlib == 1.2.1",
|
||||
"gpt4all == 1.0.12; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
||||
"gpt4all == 1.0.12; platform_system == 'Windows' or platform_system == 'Darwin'",
|
||||
"gpt4all >= 2.0.0; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
||||
"gpt4all >= 2.0.0; platform_system == 'Windows' or platform_system == 'Darwin'",
|
||||
"itsdangerous == 2.1.2",
|
||||
"httpx == 0.25.0",
|
||||
"pgvector == 0.2.3",
|
||||
|
|
|
@ -38,7 +38,7 @@ export class KhojChatModal extends Modal {
|
|||
await this.getChatHistory();
|
||||
|
||||
// Add chat input field
|
||||
contentEl.createEl("input",
|
||||
const chatInput = contentEl.createEl("input",
|
||||
{
|
||||
attr: {
|
||||
type: "text",
|
||||
|
@ -48,10 +48,11 @@ export class KhojChatModal extends Modal {
|
|||
class: "khoj-chat-input option"
|
||||
}
|
||||
})
|
||||
.addEventListener('change', (event) => { this.result = (<HTMLInputElement>event.target).value });
|
||||
chatInput.addEventListener('change', (event) => { this.result = (<HTMLInputElement>event.target).value });
|
||||
|
||||
// Scroll to bottom of modal, till the send message input box
|
||||
this.modalEl.scrollTop = this.modalEl.scrollHeight;
|
||||
chatInput.focus();
|
||||
}
|
||||
|
||||
generateReference(messageEl: any, reference: string, index: number) {
|
||||
|
|
|
@ -122,6 +122,7 @@ def set_state(args):
|
|||
state.demo = args.demo
|
||||
state.anonymous_mode = args.anonymous_mode
|
||||
state.khoj_version = version("khoj-assistant")
|
||||
state.chat_on_gpu = args.chat_on_gpu
|
||||
|
||||
|
||||
def start_server(app, host=None, port=None, socket=None):
|
||||
|
|
69
src/khoj/migrations/migrate_offline_chat_default_model.py
Normal file
69
src/khoj/migrations/migrate_offline_chat_default_model.py
Normal file
|
@ -0,0 +1,69 @@
|
|||
"""
|
||||
Current format of khoj.yml
|
||||
---
|
||||
app:
|
||||
...
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
offline-chat:
|
||||
enable-offline-chat: false
|
||||
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
||||
...
|
||||
search-type:
|
||||
...
|
||||
|
||||
New format of khoj.yml
|
||||
---
|
||||
app:
|
||||
...
|
||||
content-type:
|
||||
...
|
||||
processor:
|
||||
conversation:
|
||||
offline-chat:
|
||||
enable-offline-chat: false
|
||||
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||
...
|
||||
search-type:
|
||||
...
|
||||
"""
|
||||
import logging
|
||||
from packaging import version
|
||||
|
||||
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def migrate_offline_chat_default_model(args):
|
||||
schema_version = "0.12.4"
|
||||
raw_config = load_config_from_file(args.config_file)
|
||||
previous_version = raw_config.get("version")
|
||||
|
||||
if "processor" not in raw_config:
|
||||
return args
|
||||
if raw_config["processor"] is None:
|
||||
return args
|
||||
if "conversation" not in raw_config["processor"]:
|
||||
return args
|
||||
if "offline-chat" not in raw_config["processor"]["conversation"]:
|
||||
return args
|
||||
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
|
||||
return args
|
||||
|
||||
if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
|
||||
logger.info(
|
||||
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
|
||||
)
|
||||
raw_config["version"] = schema_version
|
||||
|
||||
# Update offline chat model to mistral in GGUF format to use latest GPT4All
|
||||
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
|
||||
if offline_chat_model.endswith(".bin"):
|
||||
raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||
|
||||
save_config_to_file(raw_config, args.config_file)
|
||||
return args
|
|
@ -9,7 +9,7 @@ processor:
|
|||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
||||
max-prompt-size: null
|
||||
offline-chat:
|
||||
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
||||
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||
enable-offline-chat: false
|
||||
openai:
|
||||
api-key: sk-blah
|
||||
|
@ -46,7 +46,7 @@ processor:
|
|||
- chat-model: gpt-3.5-turbo
|
||||
tokenizer: null
|
||||
type: openai
|
||||
- chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
||||
- chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||
tokenizer: null
|
||||
type: offline
|
||||
search-type:
|
||||
|
|
|
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
def extract_questions_offline(
|
||||
text: str,
|
||||
model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
|
||||
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||
loaded_model: Union[Any, None] = None,
|
||||
conversation_log={},
|
||||
use_history: bool = True,
|
||||
|
@ -123,7 +123,7 @@ def converse_offline(
|
|||
references,
|
||||
user_query,
|
||||
conversation_log={},
|
||||
model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
|
||||
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||
loaded_model: Union[Any, None] = None,
|
||||
completion_func=None,
|
||||
conversation_command=ConversationCommand.Default,
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import logging
|
||||
|
||||
from khoj.utils import state
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -16,8 +17,13 @@ def download_model(model_name: str):
|
|||
|
||||
# Decide whether to load model to GPU or CPU
|
||||
try:
|
||||
# Check if machine has GPU and GPU has enough free memory to load the chat model
|
||||
device = "gpu" if gpt4all.pyllmodel.LLModel().list_gpu(chat_model_config["path"]) else "cpu"
|
||||
# Try load chat model to GPU if:
|
||||
# 1. Loading chat model to GPU isn't disabled via CLI and
|
||||
# 2. Machine has GPU
|
||||
# 3. GPU has enough free memory to load the chat model
|
||||
device = (
|
||||
"gpu" if state.chat_on_gpu and gpt4all.pyllmodel.LLModel().list_gpu(chat_model_config["path"]) else "cpu"
|
||||
)
|
||||
except ValueError:
|
||||
device = "cpu"
|
||||
|
||||
|
|
|
@ -20,9 +20,11 @@ model_to_prompt_size = {
|
|||
"gpt-4": 8192,
|
||||
"llama-2-7b-chat.ggmlv3.q4_0.bin": 1548,
|
||||
"gpt-3.5-turbo-16k": 15000,
|
||||
"mistral-7b-instruct-v0.1.Q4_0.gguf": 1548,
|
||||
}
|
||||
model_to_tokenizer = {
|
||||
"llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer",
|
||||
"mistral-7b-instruct-v0.1.Q4_0.gguf": "mistralai/Mistral-7B-Instruct-v0.1",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ from khoj.migrations.migrate_version import migrate_config_to_version
|
|||
from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
|
||||
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
||||
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
||||
from khoj.migrations.migrate_offline_chat_default_model import migrate_offline_chat_default_model
|
||||
from khoj.migrations.migrate_server_pg import migrate_server_pg
|
||||
|
||||
|
||||
|
@ -38,6 +39,9 @@ def cli(args=None):
|
|||
help="Path to UNIX socket for server. Use to run server behind reverse proxy. Default: /tmp/uvicorn.sock",
|
||||
)
|
||||
parser.add_argument("--version", "-V", action="store_true", help="Print the installed Khoj version and exit")
|
||||
parser.add_argument(
|
||||
"--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model"
|
||||
)
|
||||
parser.add_argument("--demo", action="store_true", default=False, help="Run Khoj in demo mode")
|
||||
parser.add_argument(
|
||||
"--anonymous-mode",
|
||||
|
@ -50,6 +54,9 @@ def cli(args=None):
|
|||
|
||||
logger.debug(f"Ignoring unknown commandline args: {remaining_args}")
|
||||
|
||||
# Set default values for arguments
|
||||
args.chat_on_gpu = not args.disable_chat_on_gpu
|
||||
|
||||
args.version_no = version("khoj-assistant")
|
||||
if args.version:
|
||||
# Show version of khoj installed and exit
|
||||
|
@ -76,6 +83,7 @@ def run_migrations(args):
|
|||
migrate_processor_conversation_schema,
|
||||
migrate_offline_model,
|
||||
migrate_offline_chat_schema,
|
||||
migrate_offline_chat_default_model,
|
||||
migrate_server_pg,
|
||||
]
|
||||
for migration in migrations:
|
||||
|
|
|
@ -84,7 +84,7 @@ class OpenAIProcessorConfig(ConfigBase):
|
|||
|
||||
class OfflineChatProcessorConfig(ConfigBase):
|
||||
enable_offline_chat: Optional[bool] = False
|
||||
chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
||||
chat_model: Optional[str] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||
|
||||
|
||||
class ConversationProcessorConfig(ConfigBase):
|
||||
|
|
|
@ -33,5 +33,6 @@ SearchType = utils_config.SearchType
|
|||
telemetry: List[Dict[str, str]] = []
|
||||
demo: bool = False
|
||||
khoj_version: str = None
|
||||
anonymous_mode: bool = False
|
||||
device = get_device()
|
||||
chat_on_gpu: bool = True
|
||||
anonymous_mode: bool = False
|
||||
|
|
|
@ -169,7 +169,7 @@ def md_content_config():
|
|||
return markdown_config
|
||||
|
||||
|
||||
@pytest.fixture(scope="function")
|
||||
@pytest.fixture(scope="session")
|
||||
def chat_client(search_config: SearchConfig, default_user2: KhojUser):
|
||||
# Initialize app state
|
||||
state.config.search_type = search_config
|
||||
|
@ -211,7 +211,7 @@ def chat_client_no_background(search_config: SearchConfig, default_user2: KhojUs
|
|||
|
||||
# Initialize Processor from Config
|
||||
if os.getenv("OPENAI_API_KEY"):
|
||||
OpenAIProcessorConversationConfigFactory(user=default_user2)
|
||||
OpenAIProcessorConversationConfigFactory()
|
||||
|
||||
state.anonymous_mode = True
|
||||
|
||||
|
|
2
tests/data/config.yml
vendored
2
tests/data/config.yml
vendored
|
@ -14,4 +14,4 @@ search-type:
|
|||
asymmetric:
|
||||
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
||||
encoder: sentence-transformers/msmarco-MiniLM-L-6-v3
|
||||
version: 0.10.1
|
||||
version: 0.14.0
|
||||
|
|
|
@ -37,7 +37,7 @@ class ChatModelOptionsFactory(factory.django.DjangoModelFactory):
|
|||
|
||||
max_prompt_size = 2000
|
||||
tokenizer = None
|
||||
chat_model = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
||||
chat_model = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||
model_type = "offline"
|
||||
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ from khoj.processor.conversation.gpt4all.utils import download_model
|
|||
|
||||
from khoj.processor.conversation.utils import message_to_log
|
||||
|
||||
MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
||||
MODEL_NAME = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||
|
||||
|
||||
@pytest.fixture(scope="session")
|
||||
|
|
Loading…
Reference in a new issue