mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Improve error handling when documents not searched with stack trace
- Stop extract OCR content from PDFs - Only use agent knowledge base when user not provided
This commit is contained in:
parent
2b68d61fef
commit
046de57571
3 changed files with 17 additions and 9 deletions
|
@ -1463,12 +1463,15 @@ class EntryAdapters:
|
|||
file_filters = EntryAdapters.file_filter.get_filter_terms(query)
|
||||
date_filters = EntryAdapters.date_filter.get_query_date_range(query)
|
||||
|
||||
user_or_agent = Q(user=user)
|
||||
owner_filter = Q()
|
||||
|
||||
if user != None:
|
||||
owner_filter = Q(user=user)
|
||||
if agent != None:
|
||||
user_or_agent |= Q(agent=agent)
|
||||
owner_filter |= Q(agent=agent)
|
||||
|
||||
if len(word_filters) == 0 and len(file_filters) == 0 and len(date_filters) == 0:
|
||||
return Entry.objects.filter(user_or_agent)
|
||||
return Entry.objects.filter(owner_filter)
|
||||
|
||||
for term in word_filters:
|
||||
if term.startswith("+"):
|
||||
|
@ -1504,7 +1507,7 @@ class EntryAdapters:
|
|||
formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d")
|
||||
q_filter_terms &= Q(embeddings_dates__date__lte=formatted_max_date)
|
||||
|
||||
relevant_entries = Entry.objects.filter(user_or_agent).filter(q_filter_terms)
|
||||
relevant_entries = Entry.objects.filter(owner_filter).filter(q_filter_terms)
|
||||
if file_type_filter:
|
||||
relevant_entries = relevant_entries.filter(file_type=file_type_filter)
|
||||
return relevant_entries
|
||||
|
@ -1519,13 +1522,18 @@ class EntryAdapters:
|
|||
max_distance: float = math.inf,
|
||||
agent: Agent = None,
|
||||
):
|
||||
user_or_agent = Q(user=user)
|
||||
owner_filter = Q()
|
||||
|
||||
if user != None:
|
||||
owner_filter = Q(user=user)
|
||||
if agent != None:
|
||||
user_or_agent |= Q(agent=agent)
|
||||
owner_filter |= Q(agent=agent)
|
||||
|
||||
if owner_filter == Q():
|
||||
return Entry.objects.none()
|
||||
|
||||
relevant_entries = EntryAdapters.apply_filters(user, raw_query, file_type_filter, agent)
|
||||
relevant_entries = relevant_entries.filter(user_or_agent).annotate(
|
||||
relevant_entries = relevant_entries.filter(owner_filter).annotate(
|
||||
distance=CosineDistance("embeddings", embeddings)
|
||||
)
|
||||
relevant_entries = relevant_entries.filter(distance__lte=max_distance)
|
||||
|
|
|
@ -67,7 +67,7 @@ class PdfToEntries(TextToEntries):
|
|||
bytes = pdf_files[pdf_file]
|
||||
f.write(bytes)
|
||||
try:
|
||||
loader = PyMuPDFLoader(f"{tmp_file}", extract_images=True)
|
||||
loader = PyMuPDFLoader(f"{tmp_file}", extract_images=False)
|
||||
pdf_entries_per_file = [page.page_content for page in loader.load()]
|
||||
except ImportError:
|
||||
loader = PyMuPDFLoader(f"{tmp_file}")
|
||||
|
|
|
@ -859,7 +859,7 @@ async def chat(
|
|||
defiltered_query = result[2]
|
||||
except Exception as e:
|
||||
error_message = f"Error searching knowledge base: {e}. Attempting to respond without document references."
|
||||
logger.warning(error_message)
|
||||
logger.error(error_message, exc_info=True)
|
||||
async for result in send_event(
|
||||
ChatEvent.STATUS, "Document search failed. I'll try respond without document references"
|
||||
):
|
||||
|
|
Loading…
Reference in a new issue