mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-30 10:53:02 +01:00
Resolve issues with GPT4All and fix prompt for yesterday extract questions date filter (#483)
- GPT4All integration had ceased working with 0.1.7 specification. Update to use 1.0.12. At a later date, we should also use first party support for llama v2 via gpt4all - Update the system prompt for the extract_questions flow to add start and end date to the yesterday date filter example. - Update all setup data in conftest.py to use new client-server indexing pattern
This commit is contained in:
parent
8141be97f6
commit
2dd15e9f63
4 changed files with 16 additions and 34 deletions
|
@ -59,8 +59,8 @@ dependencies = [
|
||||||
"bs4 >= 0.0.1",
|
"bs4 >= 0.0.1",
|
||||||
"anyio == 3.7.1",
|
"anyio == 3.7.1",
|
||||||
"pymupdf >= 1.23.3",
|
"pymupdf >= 1.23.3",
|
||||||
"gpt4all == 0.1.9; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
"gpt4all == 1.0.12; platform_system == 'Linux' and platform_machine == 'x86_64'",
|
||||||
"gpt4all == 0.1.9; platform_system == 'Windows' or platform_system == 'Darwin'",
|
"gpt4all == 1.0.12; platform_system == 'Windows' or platform_system == 'Darwin'",
|
||||||
]
|
]
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
|
|
||||||
|
|
|
@ -28,9 +28,10 @@ def download_model(model_name: str):
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
url = model_metadata.model_name_to_url.get(model_name)
|
url = model_metadata.model_name_to_url.get(model_name)
|
||||||
|
model_path = os.path.expanduser(f"~/.cache/gpt4all/")
|
||||||
if not url:
|
if not url:
|
||||||
logger.debug(f"Model {model_name} not found in model metadata. Skipping download.")
|
logger.debug(f"Model {model_name} not found in model metadata. Skipping download.")
|
||||||
return GPT4All(model_name)
|
return GPT4All(model_name=model_name, model_path=model_path)
|
||||||
|
|
||||||
filename = os.path.expanduser(f"~/.cache/gpt4all/{model_name}")
|
filename = os.path.expanduser(f"~/.cache/gpt4all/{model_name}")
|
||||||
if os.path.exists(filename):
|
if os.path.exists(filename):
|
||||||
|
@ -39,8 +40,8 @@ def download_model(model_name: str):
|
||||||
requests.get("https://www.google.com/", timeout=5)
|
requests.get("https://www.google.com/", timeout=5)
|
||||||
except:
|
except:
|
||||||
logger.debug("User is offline. Disabling allowed download flag")
|
logger.debug("User is offline. Disabling allowed download flag")
|
||||||
return GPT4All(model_name, allow_download=False)
|
return GPT4All(model_name=model_name, model_path=model_path, allow_download=False)
|
||||||
return GPT4All(model_name)
|
return GPT4All(model_name=model_name, model_path=model_path)
|
||||||
|
|
||||||
# Download the model to a tmp file. Once the download is completed, move the tmp file to the actual file
|
# Download the model to a tmp file. Once the download is completed, move the tmp file to the actual file
|
||||||
tmp_filename = filename + ".tmp"
|
tmp_filename = filename + ".tmp"
|
||||||
|
|
|
@ -209,7 +209,7 @@ A: Bob is {bob_tom_age_difference} years older than Tom. As Bob is {bob_age} yea
|
||||||
|
|
||||||
Q: What does yesterday's note say?
|
Q: What does yesterday's note say?
|
||||||
|
|
||||||
["Note from {yesterday_date} dt='{yesterday_date}'"]
|
["Note from {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]
|
||||||
|
|
||||||
A: Yesterday's note contains the following information: ...
|
A: Yesterday's note contains the following information: ...
|
||||||
|
|
||||||
|
|
|
@ -26,7 +26,8 @@ from khoj.utils.rawconfig import (
|
||||||
TextSearchConfig,
|
TextSearchConfig,
|
||||||
ImageSearchConfig,
|
ImageSearchConfig,
|
||||||
)
|
)
|
||||||
from khoj.utils import state
|
from khoj.utils import state, fs_syncer
|
||||||
|
from khoj.routers.indexer import configure_content
|
||||||
from khoj.processor.jsonl.jsonl_to_jsonl import JsonlToJsonl
|
from khoj.processor.jsonl.jsonl_to_jsonl import JsonlToJsonl
|
||||||
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
|
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
|
||||||
from khoj.search_filter.date_filter import DateFilter
|
from khoj.search_filter.date_filter import DateFilter
|
||||||
|
@ -220,15 +221,10 @@ def chat_client(md_content_config: ContentConfig, search_config: SearchConfig, p
|
||||||
state.SearchType = configure_search_types(state.config)
|
state.SearchType = configure_search_types(state.config)
|
||||||
|
|
||||||
# Index Markdown Content for Search
|
# Index Markdown Content for Search
|
||||||
filters = [DateFilter(), WordFilter(), FileFilter()]
|
|
||||||
state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
|
state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
|
||||||
state.content_index.markdown = text_search.setup(
|
all_files = fs_syncer.collect_files(state.config.content_type)
|
||||||
MarkdownToJsonl,
|
state.content_index = configure_content(
|
||||||
get_sample_data("markdown"),
|
state.content_index, state.config.content_type, all_files, state.search_models
|
||||||
md_content_config.markdown,
|
|
||||||
state.search_models.text_search.bi_encoder,
|
|
||||||
regenerate=False,
|
|
||||||
filters=filters,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize Processor from Config
|
# Initialize Processor from Config
|
||||||
|
@ -273,7 +269,7 @@ def client(content_config: ContentConfig, search_config: SearchConfig, processor
|
||||||
|
|
||||||
@pytest.fixture(scope="function")
|
@pytest.fixture(scope="function")
|
||||||
def client_offline_chat(
|
def client_offline_chat(
|
||||||
md_content_config: ContentConfig, search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig
|
search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig, content_config: ContentConfig
|
||||||
):
|
):
|
||||||
# Initialize app state
|
# Initialize app state
|
||||||
state.config.content_type = md_content_config
|
state.config.content_type = md_content_config
|
||||||
|
@ -281,27 +277,12 @@ def client_offline_chat(
|
||||||
state.SearchType = configure_search_types(state.config)
|
state.SearchType = configure_search_types(state.config)
|
||||||
|
|
||||||
# Index Markdown Content for Search
|
# Index Markdown Content for Search
|
||||||
filters = [DateFilter(), WordFilter(), FileFilter()]
|
|
||||||
state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
|
state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
|
||||||
state.search_models.image_search = image_search.initialize_model(search_config.image)
|
state.search_models.image_search = image_search.initialize_model(search_config.image)
|
||||||
state.content_index.org = text_search.setup(
|
|
||||||
OrgToJsonl,
|
|
||||||
get_sample_data("org"),
|
|
||||||
content_config.org,
|
|
||||||
state.search_models.text_search.bi_encoder,
|
|
||||||
regenerate=False,
|
|
||||||
)
|
|
||||||
state.content_index.image = image_search.setup(
|
|
||||||
content_config.image, state.search_models.image_search, regenerate=False
|
|
||||||
)
|
|
||||||
|
|
||||||
state.content_index.markdown = text_search.setup(
|
all_files = fs_syncer.collect_files(content_config.content_type)
|
||||||
MarkdownToJsonl,
|
state.content_index = configure_content(
|
||||||
get_sample_data("markdown"),
|
state.content_index, state.config.content_type, all_files, state.search_models
|
||||||
md_content_config.markdown,
|
|
||||||
state.search_models.text_search.bi_encoder,
|
|
||||||
regenerate=False,
|
|
||||||
filters=filters,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize Processor from Config
|
# Initialize Processor from Config
|
||||||
|
|
Loading…
Reference in a new issue