mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Merge branch 'master' of github.com:khoj-ai/khoj into features/multi-user-support-khoj
Merge changes to use latest GPT4All with GPU, GGUF model support into khoj multi-user support rearchitecture branch
This commit is contained in:
commit
345856e7be
16 changed files with 107 additions and 18 deletions
|
@ -10,7 +10,8 @@
|
||||||
Offline chat stays completely private and works without internet. But it is slower, lower quality and more compute intensive.
|
Offline chat stays completely private and works without internet. But it is slower, lower quality and more compute intensive.
|
||||||
|
|
||||||
> **System Requirements**:
|
> **System Requirements**:
|
||||||
> - Machine with at least **6 GB of RAM** and **4 GB of Disk** available
|
> - Minimum 8 GB RAM. Recommend **16Gb VRAM**
|
||||||
|
> - Minimum **5 GB of Disk** available
|
||||||
> - A CPU supporting [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) is required
|
> - A CPU supporting [AVX or AVX2 instructions](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) is required
|
||||||
> - A Mac M1+ or [Vulcan supported GPU](https://vulkan.gpuinfo.org/) should significantly speed up chat response times
|
> - A Mac M1+ or [Vulcan supported GPU](https://vulkan.gpuinfo.org/) should significantly speed up chat response times
|
||||||
|
|
||||||
|
|
|
@ -62,8 +62,8 @@ dependencies = [
|
||||||
"pymupdf >= 1.23.5",
|
"pymupdf >= 1.23.5",
|
||||||
"django == 4.2.5",
|
"django == 4.2.5",
|
||||||
"authlib == 1.2.1",
|
"authlib == 1.2.1",
|
||||||
"gpt4all == 1.0.12; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
"gpt4all >= 2.0.0; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
||||||
"gpt4all == 1.0.12; platform_system == 'Windows' or platform_system == 'Darwin'",
|
"gpt4all >= 2.0.0; platform_system == 'Windows' or platform_system == 'Darwin'",
|
||||||
"itsdangerous == 2.1.2",
|
"itsdangerous == 2.1.2",
|
||||||
"httpx == 0.25.0",
|
"httpx == 0.25.0",
|
||||||
"pgvector == 0.2.3",
|
"pgvector == 0.2.3",
|
||||||
|
|
|
@ -38,7 +38,7 @@ export class KhojChatModal extends Modal {
|
||||||
await this.getChatHistory();
|
await this.getChatHistory();
|
||||||
|
|
||||||
// Add chat input field
|
// Add chat input field
|
||||||
contentEl.createEl("input",
|
const chatInput = contentEl.createEl("input",
|
||||||
{
|
{
|
||||||
attr: {
|
attr: {
|
||||||
type: "text",
|
type: "text",
|
||||||
|
@ -48,10 +48,11 @@ export class KhojChatModal extends Modal {
|
||||||
class: "khoj-chat-input option"
|
class: "khoj-chat-input option"
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.addEventListener('change', (event) => { this.result = (<HTMLInputElement>event.target).value });
|
chatInput.addEventListener('change', (event) => { this.result = (<HTMLInputElement>event.target).value });
|
||||||
|
|
||||||
// Scroll to bottom of modal, till the send message input box
|
// Scroll to bottom of modal, till the send message input box
|
||||||
this.modalEl.scrollTop = this.modalEl.scrollHeight;
|
this.modalEl.scrollTop = this.modalEl.scrollHeight;
|
||||||
|
chatInput.focus();
|
||||||
}
|
}
|
||||||
|
|
||||||
generateReference(messageEl: any, reference: string, index: number) {
|
generateReference(messageEl: any, reference: string, index: number) {
|
||||||
|
|
|
@ -122,6 +122,7 @@ def set_state(args):
|
||||||
state.demo = args.demo
|
state.demo = args.demo
|
||||||
state.anonymous_mode = args.anonymous_mode
|
state.anonymous_mode = args.anonymous_mode
|
||||||
state.khoj_version = version("khoj-assistant")
|
state.khoj_version = version("khoj-assistant")
|
||||||
|
state.chat_on_gpu = args.chat_on_gpu
|
||||||
|
|
||||||
|
|
||||||
def start_server(app, host=None, port=None, socket=None):
|
def start_server(app, host=None, port=None, socket=None):
|
||||||
|
|
69
src/khoj/migrations/migrate_offline_chat_default_model.py
Normal file
69
src/khoj/migrations/migrate_offline_chat_default_model.py
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
"""
|
||||||
|
Current format of khoj.yml
|
||||||
|
---
|
||||||
|
app:
|
||||||
|
...
|
||||||
|
content-type:
|
||||||
|
...
|
||||||
|
processor:
|
||||||
|
conversation:
|
||||||
|
offline-chat:
|
||||||
|
enable-offline-chat: false
|
||||||
|
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
||||||
|
...
|
||||||
|
search-type:
|
||||||
|
...
|
||||||
|
|
||||||
|
New format of khoj.yml
|
||||||
|
---
|
||||||
|
app:
|
||||||
|
...
|
||||||
|
content-type:
|
||||||
|
...
|
||||||
|
processor:
|
||||||
|
conversation:
|
||||||
|
offline-chat:
|
||||||
|
enable-offline-chat: false
|
||||||
|
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||||
|
...
|
||||||
|
search-type:
|
||||||
|
...
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from packaging import version
|
||||||
|
|
||||||
|
from khoj.utils.yaml import load_config_from_file, save_config_to_file
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def migrate_offline_chat_default_model(args):
|
||||||
|
schema_version = "0.12.4"
|
||||||
|
raw_config = load_config_from_file(args.config_file)
|
||||||
|
previous_version = raw_config.get("version")
|
||||||
|
|
||||||
|
if "processor" not in raw_config:
|
||||||
|
return args
|
||||||
|
if raw_config["processor"] is None:
|
||||||
|
return args
|
||||||
|
if "conversation" not in raw_config["processor"]:
|
||||||
|
return args
|
||||||
|
if "offline-chat" not in raw_config["processor"]["conversation"]:
|
||||||
|
return args
|
||||||
|
if "chat-model" not in raw_config["processor"]["conversation"]["offline-chat"]:
|
||||||
|
return args
|
||||||
|
|
||||||
|
if previous_version is None or version.parse(previous_version) < version.parse("0.12.4"):
|
||||||
|
logger.info(
|
||||||
|
f"Upgrading config schema to {schema_version} from {previous_version} to change default (offline) chat model to mistral GGUF"
|
||||||
|
)
|
||||||
|
raw_config["version"] = schema_version
|
||||||
|
|
||||||
|
# Update offline chat model to mistral in GGUF format to use latest GPT4All
|
||||||
|
offline_chat_model = raw_config["processor"]["conversation"]["offline-chat"]["chat-model"]
|
||||||
|
if offline_chat_model.endswith(".bin"):
|
||||||
|
raw_config["processor"]["conversation"]["offline-chat"]["chat-model"] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||||
|
|
||||||
|
save_config_to_file(raw_config, args.config_file)
|
||||||
|
return args
|
|
@ -9,7 +9,7 @@ processor:
|
||||||
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
conversation-logfile: ~/.khoj/processor/conversation/conversation_logs.json
|
||||||
max-prompt-size: null
|
max-prompt-size: null
|
||||||
offline-chat:
|
offline-chat:
|
||||||
chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||||
enable-offline-chat: false
|
enable-offline-chat: false
|
||||||
openai:
|
openai:
|
||||||
api-key: sk-blah
|
api-key: sk-blah
|
||||||
|
@ -46,7 +46,7 @@ processor:
|
||||||
- chat-model: gpt-3.5-turbo
|
- chat-model: gpt-3.5-turbo
|
||||||
tokenizer: null
|
tokenizer: null
|
||||||
type: openai
|
type: openai
|
||||||
- chat-model: llama-2-7b-chat.ggmlv3.q4_0.bin
|
- chat-model: mistral-7b-instruct-v0.1.Q4_0.gguf
|
||||||
tokenizer: null
|
tokenizer: null
|
||||||
type: offline
|
type: offline
|
||||||
search-type:
|
search-type:
|
||||||
|
|
|
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def extract_questions_offline(
|
def extract_questions_offline(
|
||||||
text: str,
|
text: str,
|
||||||
model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
|
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||||
loaded_model: Union[Any, None] = None,
|
loaded_model: Union[Any, None] = None,
|
||||||
conversation_log={},
|
conversation_log={},
|
||||||
use_history: bool = True,
|
use_history: bool = True,
|
||||||
|
@ -123,7 +123,7 @@ def converse_offline(
|
||||||
references,
|
references,
|
||||||
user_query,
|
user_query,
|
||||||
conversation_log={},
|
conversation_log={},
|
||||||
model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
|
model: str = "mistral-7b-instruct-v0.1.Q4_0.gguf",
|
||||||
loaded_model: Union[Any, None] = None,
|
loaded_model: Union[Any, None] = None,
|
||||||
completion_func=None,
|
completion_func=None,
|
||||||
conversation_command=ConversationCommand.Default,
|
conversation_command=ConversationCommand.Default,
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
|
from khoj.utils import state
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -16,8 +17,13 @@ def download_model(model_name: str):
|
||||||
|
|
||||||
# Decide whether to load model to GPU or CPU
|
# Decide whether to load model to GPU or CPU
|
||||||
try:
|
try:
|
||||||
# Check if machine has GPU and GPU has enough free memory to load the chat model
|
# Try load chat model to GPU if:
|
||||||
device = "gpu" if gpt4all.pyllmodel.LLModel().list_gpu(chat_model_config["path"]) else "cpu"
|
# 1. Loading chat model to GPU isn't disabled via CLI and
|
||||||
|
# 2. Machine has GPU
|
||||||
|
# 3. GPU has enough free memory to load the chat model
|
||||||
|
device = (
|
||||||
|
"gpu" if state.chat_on_gpu and gpt4all.pyllmodel.LLModel().list_gpu(chat_model_config["path"]) else "cpu"
|
||||||
|
)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
device = "cpu"
|
device = "cpu"
|
||||||
|
|
||||||
|
|
|
@ -20,9 +20,11 @@ model_to_prompt_size = {
|
||||||
"gpt-4": 8192,
|
"gpt-4": 8192,
|
||||||
"llama-2-7b-chat.ggmlv3.q4_0.bin": 1548,
|
"llama-2-7b-chat.ggmlv3.q4_0.bin": 1548,
|
||||||
"gpt-3.5-turbo-16k": 15000,
|
"gpt-3.5-turbo-16k": 15000,
|
||||||
|
"mistral-7b-instruct-v0.1.Q4_0.gguf": 1548,
|
||||||
}
|
}
|
||||||
model_to_tokenizer = {
|
model_to_tokenizer = {
|
||||||
"llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer",
|
"llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer",
|
||||||
|
"mistral-7b-instruct-v0.1.Q4_0.gguf": "mistralai/Mistral-7B-Instruct-v0.1",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ from khoj.migrations.migrate_version import migrate_config_to_version
|
||||||
from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
|
from khoj.migrations.migrate_processor_config_openai import migrate_processor_conversation_schema
|
||||||
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
from khoj.migrations.migrate_offline_model import migrate_offline_model
|
||||||
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
from khoj.migrations.migrate_offline_chat_schema import migrate_offline_chat_schema
|
||||||
|
from khoj.migrations.migrate_offline_chat_default_model import migrate_offline_chat_default_model
|
||||||
from khoj.migrations.migrate_server_pg import migrate_server_pg
|
from khoj.migrations.migrate_server_pg import migrate_server_pg
|
||||||
|
|
||||||
|
|
||||||
|
@ -38,6 +39,9 @@ def cli(args=None):
|
||||||
help="Path to UNIX socket for server. Use to run server behind reverse proxy. Default: /tmp/uvicorn.sock",
|
help="Path to UNIX socket for server. Use to run server behind reverse proxy. Default: /tmp/uvicorn.sock",
|
||||||
)
|
)
|
||||||
parser.add_argument("--version", "-V", action="store_true", help="Print the installed Khoj version and exit")
|
parser.add_argument("--version", "-V", action="store_true", help="Print the installed Khoj version and exit")
|
||||||
|
parser.add_argument(
|
||||||
|
"--disable-chat-on-gpu", action="store_true", default=False, help="Disable using GPU for the offline chat model"
|
||||||
|
)
|
||||||
parser.add_argument("--demo", action="store_true", default=False, help="Run Khoj in demo mode")
|
parser.add_argument("--demo", action="store_true", default=False, help="Run Khoj in demo mode")
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--anonymous-mode",
|
"--anonymous-mode",
|
||||||
|
@ -50,6 +54,9 @@ def cli(args=None):
|
||||||
|
|
||||||
logger.debug(f"Ignoring unknown commandline args: {remaining_args}")
|
logger.debug(f"Ignoring unknown commandline args: {remaining_args}")
|
||||||
|
|
||||||
|
# Set default values for arguments
|
||||||
|
args.chat_on_gpu = not args.disable_chat_on_gpu
|
||||||
|
|
||||||
args.version_no = version("khoj-assistant")
|
args.version_no = version("khoj-assistant")
|
||||||
if args.version:
|
if args.version:
|
||||||
# Show version of khoj installed and exit
|
# Show version of khoj installed and exit
|
||||||
|
@ -76,6 +83,7 @@ def run_migrations(args):
|
||||||
migrate_processor_conversation_schema,
|
migrate_processor_conversation_schema,
|
||||||
migrate_offline_model,
|
migrate_offline_model,
|
||||||
migrate_offline_chat_schema,
|
migrate_offline_chat_schema,
|
||||||
|
migrate_offline_chat_default_model,
|
||||||
migrate_server_pg,
|
migrate_server_pg,
|
||||||
]
|
]
|
||||||
for migration in migrations:
|
for migration in migrations:
|
||||||
|
|
|
@ -84,7 +84,7 @@ class OpenAIProcessorConfig(ConfigBase):
|
||||||
|
|
||||||
class OfflineChatProcessorConfig(ConfigBase):
|
class OfflineChatProcessorConfig(ConfigBase):
|
||||||
enable_offline_chat: Optional[bool] = False
|
enable_offline_chat: Optional[bool] = False
|
||||||
chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
chat_model: Optional[str] = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||||
|
|
||||||
|
|
||||||
class ConversationProcessorConfig(ConfigBase):
|
class ConversationProcessorConfig(ConfigBase):
|
||||||
|
|
|
@ -33,5 +33,6 @@ SearchType = utils_config.SearchType
|
||||||
telemetry: List[Dict[str, str]] = []
|
telemetry: List[Dict[str, str]] = []
|
||||||
demo: bool = False
|
demo: bool = False
|
||||||
khoj_version: str = None
|
khoj_version: str = None
|
||||||
anonymous_mode: bool = False
|
|
||||||
device = get_device()
|
device = get_device()
|
||||||
|
chat_on_gpu: bool = True
|
||||||
|
anonymous_mode: bool = False
|
||||||
|
|
|
@ -169,7 +169,7 @@ def md_content_config():
|
||||||
return markdown_config
|
return markdown_config
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="session")
|
||||||
def chat_client(search_config: SearchConfig, default_user2: KhojUser):
|
def chat_client(search_config: SearchConfig, default_user2: KhojUser):
|
||||||
# Initialize app state
|
# Initialize app state
|
||||||
state.config.search_type = search_config
|
state.config.search_type = search_config
|
||||||
|
@ -211,7 +211,7 @@ def chat_client_no_background(search_config: SearchConfig, default_user2: KhojUs
|
||||||
|
|
||||||
# Initialize Processor from Config
|
# Initialize Processor from Config
|
||||||
if os.getenv("OPENAI_API_KEY"):
|
if os.getenv("OPENAI_API_KEY"):
|
||||||
OpenAIProcessorConversationConfigFactory(user=default_user2)
|
OpenAIProcessorConversationConfigFactory()
|
||||||
|
|
||||||
state.anonymous_mode = True
|
state.anonymous_mode = True
|
||||||
|
|
||||||
|
|
2
tests/data/config.yml
vendored
2
tests/data/config.yml
vendored
|
@ -14,4 +14,4 @@ search-type:
|
||||||
asymmetric:
|
asymmetric:
|
||||||
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
cross-encoder: cross-encoder/ms-marco-MiniLM-L-6-v2
|
||||||
encoder: sentence-transformers/msmarco-MiniLM-L-6-v3
|
encoder: sentence-transformers/msmarco-MiniLM-L-6-v3
|
||||||
version: 0.10.1
|
version: 0.14.0
|
||||||
|
|
|
@ -37,7 +37,7 @@ class ChatModelOptionsFactory(factory.django.DjangoModelFactory):
|
||||||
|
|
||||||
max_prompt_size = 2000
|
max_prompt_size = 2000
|
||||||
tokenizer = None
|
tokenizer = None
|
||||||
chat_model = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
chat_model = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||||
model_type = "offline"
|
model_type = "offline"
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,7 @@ from khoj.processor.conversation.gpt4all.utils import download_model
|
||||||
|
|
||||||
from khoj.processor.conversation.utils import message_to_log
|
from khoj.processor.conversation.utils import message_to_log
|
||||||
|
|
||||||
MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"
|
MODEL_NAME = "mistral-7b-instruct-v0.1.Q4_0.gguf"
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture(scope="session")
|
@pytest.fixture(scope="session")
|
||||||
|
|
Loading…
Reference in a new issue