Improve error handling when documents not searched with stack trace

- Stop extract OCR content from PDFs
- Only use agent knowledge base when user not provided
This commit is contained in:
sabaimran 2024-10-20 18:03:14 -07:00
parent 2b68d61fef
commit 046de57571
3 changed files with 17 additions and 9 deletions

View file

@ -1463,12 +1463,15 @@ class EntryAdapters:
file_filters = EntryAdapters.file_filter.get_filter_terms(query)
date_filters = EntryAdapters.date_filter.get_query_date_range(query)
user_or_agent = Q(user=user)
owner_filter = Q()
if user != None:
owner_filter = Q(user=user)
if agent != None:
user_or_agent |= Q(agent=agent)
owner_filter |= Q(agent=agent)
if len(word_filters) == 0 and len(file_filters) == 0 and len(date_filters) == 0:
return Entry.objects.filter(user_or_agent)
return Entry.objects.filter(owner_filter)
for term in word_filters:
if term.startswith("+"):
@ -1504,7 +1507,7 @@ class EntryAdapters:
formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d")
q_filter_terms &= Q(embeddings_dates__date__lte=formatted_max_date)
relevant_entries = Entry.objects.filter(user_or_agent).filter(q_filter_terms)
relevant_entries = Entry.objects.filter(owner_filter).filter(q_filter_terms)
if file_type_filter:
relevant_entries = relevant_entries.filter(file_type=file_type_filter)
return relevant_entries
@ -1519,13 +1522,18 @@ class EntryAdapters:
max_distance: float = math.inf,
agent: Agent = None,
):
user_or_agent = Q(user=user)
owner_filter = Q()
if user != None:
owner_filter = Q(user=user)
if agent != None:
user_or_agent |= Q(agent=agent)
owner_filter |= Q(agent=agent)
if owner_filter == Q():
return Entry.objects.none()
relevant_entries = EntryAdapters.apply_filters(user, raw_query, file_type_filter, agent)
relevant_entries = relevant_entries.filter(user_or_agent).annotate(
relevant_entries = relevant_entries.filter(owner_filter).annotate(
distance=CosineDistance("embeddings", embeddings)
)
relevant_entries = relevant_entries.filter(distance__lte=max_distance)

View file

@ -67,7 +67,7 @@ class PdfToEntries(TextToEntries):
bytes = pdf_files[pdf_file]
f.write(bytes)
try:
loader = PyMuPDFLoader(f"{tmp_file}", extract_images=True)
loader = PyMuPDFLoader(f"{tmp_file}", extract_images=False)
pdf_entries_per_file = [page.page_content for page in loader.load()]
except ImportError:
loader = PyMuPDFLoader(f"{tmp_file}")

View file

@ -859,7 +859,7 @@ async def chat(
defiltered_query = result[2]
except Exception as e:
error_message = f"Error searching knowledge base: {e}. Attempting to respond without document references."
logger.warning(error_message)
logger.error(error_message, exc_info=True)
async for result in send_event(
ChatEvent.STATUS, "Document search failed. I'll try respond without document references"
):