diff --git a/src/khoj/database/adapters/__init__.py b/src/khoj/database/adapters/__init__.py index 28946557..e704b18f 100644 --- a/src/khoj/database/adapters/__init__.py +++ b/src/khoj/database/adapters/__init__.py @@ -1463,12 +1463,15 @@ class EntryAdapters: file_filters = EntryAdapters.file_filter.get_filter_terms(query) date_filters = EntryAdapters.date_filter.get_query_date_range(query) - user_or_agent = Q(user=user) + owner_filter = Q() + + if user != None: + owner_filter = Q(user=user) if agent != None: - user_or_agent |= Q(agent=agent) + owner_filter |= Q(agent=agent) if len(word_filters) == 0 and len(file_filters) == 0 and len(date_filters) == 0: - return Entry.objects.filter(user_or_agent) + return Entry.objects.filter(owner_filter) for term in word_filters: if term.startswith("+"): @@ -1504,7 +1507,7 @@ class EntryAdapters: formatted_max_date = date.fromtimestamp(max_date).strftime("%Y-%m-%d") q_filter_terms &= Q(embeddings_dates__date__lte=formatted_max_date) - relevant_entries = Entry.objects.filter(user_or_agent).filter(q_filter_terms) + relevant_entries = Entry.objects.filter(owner_filter).filter(q_filter_terms) if file_type_filter: relevant_entries = relevant_entries.filter(file_type=file_type_filter) return relevant_entries @@ -1519,13 +1522,18 @@ class EntryAdapters: max_distance: float = math.inf, agent: Agent = None, ): - user_or_agent = Q(user=user) + owner_filter = Q() + if user != None: + owner_filter = Q(user=user) if agent != None: - user_or_agent |= Q(agent=agent) + owner_filter |= Q(agent=agent) + + if owner_filter == Q(): + return Entry.objects.none() relevant_entries = EntryAdapters.apply_filters(user, raw_query, file_type_filter, agent) - relevant_entries = relevant_entries.filter(user_or_agent).annotate( + relevant_entries = relevant_entries.filter(owner_filter).annotate( distance=CosineDistance("embeddings", embeddings) ) relevant_entries = relevant_entries.filter(distance__lte=max_distance) diff --git a/src/khoj/processor/content/pdf/pdf_to_entries.py b/src/khoj/processor/content/pdf/pdf_to_entries.py index 59ffc388..063d1e74 100644 --- a/src/khoj/processor/content/pdf/pdf_to_entries.py +++ b/src/khoj/processor/content/pdf/pdf_to_entries.py @@ -67,7 +67,7 @@ class PdfToEntries(TextToEntries): bytes = pdf_files[pdf_file] f.write(bytes) try: - loader = PyMuPDFLoader(f"{tmp_file}", extract_images=True) + loader = PyMuPDFLoader(f"{tmp_file}", extract_images=False) pdf_entries_per_file = [page.page_content for page in loader.load()] except ImportError: loader = PyMuPDFLoader(f"{tmp_file}") diff --git a/src/khoj/routers/api_chat.py b/src/khoj/routers/api_chat.py index d57b5530..0d7320ff 100644 --- a/src/khoj/routers/api_chat.py +++ b/src/khoj/routers/api_chat.py @@ -859,7 +859,7 @@ async def chat( defiltered_query = result[2] except Exception as e: error_message = f"Error searching knowledge base: {e}. Attempting to respond without document references." - logger.warning(error_message) + logger.error(error_message, exc_info=True) async for result in send_event( ChatEvent.STATUS, "Document search failed. I'll try respond without document references" ):