Resolve issues with GPT4All and fix prompt for yesterday extract questions date filter (#483)

- GPT4All integration had ceased working with 0.1.7 specification. Update to use 1.0.12. At a later date, we should also use first party support for llama v2 via gpt4all
- Update the system prompt for the extract_questions flow to add start and end date to the yesterday date filter example.
- Update all setup data in conftest.py to use new client-server indexing pattern
This commit is contained in:
sabaimran 2023-09-18 14:41:26 -07:00 committed by GitHub
parent 8141be97f6
commit 2dd15e9f63
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 16 additions and 34 deletions

View file

@ -59,8 +59,8 @@ dependencies = [
"bs4 >= 0.0.1", "bs4 >= 0.0.1",
"anyio == 3.7.1", "anyio == 3.7.1",
"pymupdf >= 1.23.3", "pymupdf >= 1.23.3",
"gpt4all == 0.1.9; platform_system == 'Linux' and platform_machine == 'x86_64'", "gpt4all == 1.0.12; platform_system == 'Linux' and platform_machine == 'x86_64'",
"gpt4all == 0.1.9; platform_system == 'Windows' or platform_system == 'Darwin'", "gpt4all == 1.0.12; platform_system == 'Windows' or platform_system == 'Darwin'",
] ]
dynamic = ["version"] dynamic = ["version"]

View file

@ -28,9 +28,10 @@ def download_model(model_name: str):
raise e raise e
url = model_metadata.model_name_to_url.get(model_name) url = model_metadata.model_name_to_url.get(model_name)
model_path = os.path.expanduser(f"~/.cache/gpt4all/")
if not url: if not url:
logger.debug(f"Model {model_name} not found in model metadata. Skipping download.") logger.debug(f"Model {model_name} not found in model metadata. Skipping download.")
return GPT4All(model_name) return GPT4All(model_name=model_name, model_path=model_path)
filename = os.path.expanduser(f"~/.cache/gpt4all/{model_name}") filename = os.path.expanduser(f"~/.cache/gpt4all/{model_name}")
if os.path.exists(filename): if os.path.exists(filename):
@ -39,8 +40,8 @@ def download_model(model_name: str):
requests.get("https://www.google.com/", timeout=5) requests.get("https://www.google.com/", timeout=5)
except: except:
logger.debug("User is offline. Disabling allowed download flag") logger.debug("User is offline. Disabling allowed download flag")
return GPT4All(model_name, allow_download=False) return GPT4All(model_name=model_name, model_path=model_path, allow_download=False)
return GPT4All(model_name) return GPT4All(model_name=model_name, model_path=model_path)
# Download the model to a tmp file. Once the download is completed, move the tmp file to the actual file # Download the model to a tmp file. Once the download is completed, move the tmp file to the actual file
tmp_filename = filename + ".tmp" tmp_filename = filename + ".tmp"

View file

@ -209,7 +209,7 @@ A: Bob is {bob_tom_age_difference} years older than Tom. As Bob is {bob_age} yea
Q: What does yesterday's note say? Q: What does yesterday's note say?
["Note from {yesterday_date} dt='{yesterday_date}'"] ["Note from {yesterday_date} dt>='{yesterday_date}' dt<'{current_date}'"]
A: Yesterday's note contains the following information: ... A: Yesterday's note contains the following information: ...

View file

@ -26,7 +26,8 @@ from khoj.utils.rawconfig import (
TextSearchConfig, TextSearchConfig,
ImageSearchConfig, ImageSearchConfig,
) )
from khoj.utils import state from khoj.utils import state, fs_syncer
from khoj.routers.indexer import configure_content
from khoj.processor.jsonl.jsonl_to_jsonl import JsonlToJsonl from khoj.processor.jsonl.jsonl_to_jsonl import JsonlToJsonl
from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl from khoj.processor.org_mode.org_to_jsonl import OrgToJsonl
from khoj.search_filter.date_filter import DateFilter from khoj.search_filter.date_filter import DateFilter
@ -220,15 +221,10 @@ def chat_client(md_content_config: ContentConfig, search_config: SearchConfig, p
state.SearchType = configure_search_types(state.config) state.SearchType = configure_search_types(state.config)
# Index Markdown Content for Search # Index Markdown Content for Search
filters = [DateFilter(), WordFilter(), FileFilter()]
state.search_models.text_search = text_search.initialize_model(search_config.asymmetric) state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
state.content_index.markdown = text_search.setup( all_files = fs_syncer.collect_files(state.config.content_type)
MarkdownToJsonl, state.content_index = configure_content(
get_sample_data("markdown"), state.content_index, state.config.content_type, all_files, state.search_models
md_content_config.markdown,
state.search_models.text_search.bi_encoder,
regenerate=False,
filters=filters,
) )
# Initialize Processor from Config # Initialize Processor from Config
@ -273,7 +269,7 @@ def client(content_config: ContentConfig, search_config: SearchConfig, processor
@pytest.fixture(scope="function") @pytest.fixture(scope="function")
def client_offline_chat( def client_offline_chat(
md_content_config: ContentConfig, search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig search_config: SearchConfig, processor_config_offline_chat: ProcessorConfig, content_config: ContentConfig
): ):
# Initialize app state # Initialize app state
state.config.content_type = md_content_config state.config.content_type = md_content_config
@ -281,27 +277,12 @@ def client_offline_chat(
state.SearchType = configure_search_types(state.config) state.SearchType = configure_search_types(state.config)
# Index Markdown Content for Search # Index Markdown Content for Search
filters = [DateFilter(), WordFilter(), FileFilter()]
state.search_models.text_search = text_search.initialize_model(search_config.asymmetric) state.search_models.text_search = text_search.initialize_model(search_config.asymmetric)
state.search_models.image_search = image_search.initialize_model(search_config.image) state.search_models.image_search = image_search.initialize_model(search_config.image)
state.content_index.org = text_search.setup(
OrgToJsonl,
get_sample_data("org"),
content_config.org,
state.search_models.text_search.bi_encoder,
regenerate=False,
)
state.content_index.image = image_search.setup(
content_config.image, state.search_models.image_search, regenerate=False
)
state.content_index.markdown = text_search.setup( all_files = fs_syncer.collect_files(content_config.content_type)
MarkdownToJsonl, state.content_index = configure_content(
get_sample_data("markdown"), state.content_index, state.config.content_type, all_files, state.search_models
md_content_config.markdown,
state.search_models.text_search.bi_encoder,
regenerate=False,
filters=filters,
) )
# Initialize Processor from Config # Initialize Processor from Config