mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 08:04:21 +00:00
Properly filter out empty PDFs for indexing
This commit is contained in:
parent
62a89f79b7
commit
d20746613a
1 changed files with 1 additions and 1 deletions
|
@ -28,7 +28,7 @@ class PdfToEntries(TextToEntries):
|
|||
) -> Tuple[int, int]:
|
||||
# Extract required fields from config
|
||||
if not full_corpus:
|
||||
deletion_file_names = set([file for file in files if files[file] == ""])
|
||||
deletion_file_names = set([file for file in files if files[file] == b""])
|
||||
files_to_process = set(files) - deletion_file_names
|
||||
files = {file: files[file] for file in files_to_process}
|
||||
else:
|
||||
|
|
Loading…
Add table
Reference in a new issue