Use default Llama 2 supported by GPT4All

Remove custom logic to download custom Llama 2 model. This was added as GPT4All didn't support Llama 2 when it was added to Khoj
2024-11-27 09:25:06 +01:00 · 2023-10-03 16:29:46 -07:00 · 2023-10-03 16:29:46 -07:00 · 13b16a4364
commit 13b16a4364
parent 4a5ed7f06c
6 changed files with 7 additions and 79 deletions
--- a/src/khoj/processor/conversation/gpt4all/chat_model.py
+++ b/src/khoj/processor/conversation/gpt4all/chat_model.py
@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)

 def extract_questions_offline(
    text: str,
-    model: str = "llama-2-7b-chat.ggmlv3.q4_K_S.bin",
+    model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
    loaded_model: Union[Any, None] = None,
    conversation_log={},
    use_history: bool = True,
@ -123,7 +123,7 @@ def converse_offline(
    references,
    user_query,
    conversation_log={},
-    model: str = "llama-2-7b-chat.ggmlv3.q4_K_S.bin",
+    model: str = "llama-2-7b-chat.ggmlv3.q4_0.bin",
    loaded_model: Union[Any, None] = None,
    completion_func=None,
    conversation_command=ConversationCommand.Default,
--- a/src/khoj/processor/conversation/gpt4all/model_metadata.py
+++ b/src/khoj/processor/conversation/gpt4all/model_metadata.py
@ -1,3 +0,0 @@
-model_name_to_url = {
-    "llama-2-7b-chat.ggmlv3.q4_K_S.bin": "https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/resolve/main/llama-2-7b-chat.ggmlv3.q4_K_S.bin"
-}
--- a/src/khoj/processor/conversation/gpt4all/utils.py
+++ b/src/khoj/processor/conversation/gpt4all/utils.py
@ -1,24 +1,8 @@
-import os
 import logging
-import requests
-import hashlib

-from tqdm import tqdm
-
-from khoj.processor.conversation.gpt4all import model_metadata

 logger = logging.getLogger(__name__)

-expected_checksum = {"llama-2-7b-chat.ggmlv3.q4_K_S.bin": "cfa87b15d92fb15a2d7c354b0098578b"}
-
-
-def get_md5_checksum(filename: str):
-    hash_md5 = hashlib.md5()
-    with open(filename, "rb") as f:
-        for chunk in iter(lambda: f.read(8192), b""):
-            hash_md5.update(chunk)
-    return hash_md5.hexdigest()
-

 def download_model(model_name: str):
    try:
@ -27,57 +11,4 @@ def download_model(model_name: str):
        logger.info("There was an error importing GPT4All. Please run pip install gpt4all in order to install it.")
        raise e

-    url = model_metadata.model_name_to_url.get(model_name)
-    model_path = os.path.expanduser(f"~/.cache/gpt4all/")
-    if not url:
-        logger.debug(f"Model {model_name} not found in model metadata. Skipping download.")
-        return GPT4All(model_name=model_name, model_path=model_path)
-
-    filename = os.path.expanduser(f"~/.cache/gpt4all/{model_name}")
-    if os.path.exists(filename):
-        # Check if the user is connected to the internet
-        try:
-            requests.get("https://www.google.com/", timeout=5)
-        except:
-            logger.debug("User is offline. Disabling allowed download flag")
-            return GPT4All(model_name=model_name, model_path=model_path, allow_download=False)
-        return GPT4All(model_name=model_name, model_path=model_path)
-
-    # Download the model to a tmp file. Once the download is completed, move the tmp file to the actual file
-    tmp_filename = filename + ".tmp"
-
-    try:
-        os.makedirs(os.path.dirname(tmp_filename), exist_ok=True)
-        logger.debug(f"Downloading model {model_name} from {url} to {filename}...")
-        with requests.get(url, stream=True) as r:
-            r.raise_for_status()
-            total_size = int(r.headers.get("content-length", 0))
-            with open(tmp_filename, "wb") as f, tqdm(
-                unit="B",  # unit string to be displayed.
-                unit_scale=True,  # let tqdm to determine the scale in kilo, mega..etc.
-                unit_divisor=1024,  # is used when unit_scale is true
-                total=total_size,  # the total iteration.
-                desc=model_name,  # prefix to be displayed on progress bar.
-            ) as progress_bar:
-                for chunk in r.iter_content(chunk_size=8192):
-                    f.write(chunk)
-                    progress_bar.update(len(chunk))
-
-        # Verify the checksum
-        if expected_checksum.get(model_name) != get_md5_checksum(tmp_filename):
-            logger.error(
-                f"Checksum verification failed for {filename}. Removing the tmp file. Offline model will not be available."
-            )
-            os.remove(tmp_filename)
-            raise ValueError(f"Checksum verification failed for downloading {model_name} from {url}.")
-
-        # Move the tmp file to the actual file
-        os.rename(tmp_filename, filename)
-        logger.debug(f"Successfully downloaded model {model_name} from {url} to {filename}")
-        return GPT4All(model_name)
-    except Exception as e:
-        logger.error(f"Failed to download model {model_name} from {url} to {filename}. Error: {e}", exc_info=True)
-        # Remove the tmp file if it exists
-        if os.path.exists(tmp_filename):
-            os.remove(tmp_filename)
-        return None
+    return GPT4All(model_name=model_name)
--- a/src/khoj/processor/conversation/utils.py
+++ b/src/khoj/processor/conversation/utils.py
@ -17,10 +17,10 @@ logger = logging.getLogger(__name__)
 max_prompt_size = {
    "gpt-3.5-turbo": 4096,
    "gpt-4": 8192,
-    "llama-2-7b-chat.ggmlv3.q4_K_S.bin": 1548,
+    "llama-2-7b-chat.ggmlv3.q4_0.bin": 1548,
    "gpt-3.5-turbo-16k": 15000,
 }
-tokenizer = {"llama-2-7b-chat.ggmlv3.q4_K_S.bin": "hf-internal-testing/llama-tokenizer"}
+tokenizer = {"llama-2-7b-chat.ggmlv3.q4_0.bin": "hf-internal-testing/llama-tokenizer"}


 class ThreadedGenerator:
--- a/src/khoj/utils/config.py
+++ b/src/khoj/utils/config.py
@ -84,7 +84,7 @@ class SearchModels:

@dataclass
 class GPT4AllProcessorConfig:
-    chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_K_S.bin"
+    chat_model: Optional[str] = "llama-2-7b-chat.ggmlv3.q4_0.bin"
    loaded_model: Union[Any, None] = None


--- a/tests/test_gpt4all_chat_actors.py
+++ b/tests/test_gpt4all_chat_actors.py
@ -24,7 +24,7 @@ from khoj.processor.conversation.gpt4all.utils import download_model

 from khoj.processor.conversation.utils import message_to_log

-MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_K_S.bin"
+MODEL_NAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"


@pytest.fixture(scope="session")