mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Skip indexing single bad markdown, plaintext file (#460)
This commit is contained in:
parent
84bd579077
commit
e64357698d
3 changed files with 16 additions and 7 deletions
|
@ -105,10 +105,14 @@ class MarkdownToJsonl(TextToJsonl):
|
|||
entry_to_file_map = []
|
||||
for markdown_file in markdown_files:
|
||||
with open(markdown_file, "r", encoding="utf8") as f:
|
||||
markdown_content = f.read()
|
||||
entries, entry_to_file_map = MarkdownToJsonl.process_single_markdown_file(
|
||||
markdown_content, markdown_file, entries, entry_to_file_map
|
||||
)
|
||||
try:
|
||||
markdown_content = f.read()
|
||||
entries, entry_to_file_map = MarkdownToJsonl.process_single_markdown_file(
|
||||
markdown_content, markdown_file, entries, entry_to_file_map
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning(f"Unable to process file: {markdown_file}. This file will not be indexed.")
|
||||
logger.warning(e, exc_info=True)
|
||||
|
||||
return entries, dict(entry_to_file_map)
|
||||
|
||||
|
|
|
@ -100,7 +100,8 @@ class OrgToJsonl(TextToJsonl):
|
|||
entry_to_file_map += zip(org_file_entries, [org_file] * len(org_file_entries))
|
||||
entries.extend(org_file_entries)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file: {org_file} with error: {e}", exc_info=True)
|
||||
logger.warning(f"Unable to process file: {org_file}. This file will not be indexed.")
|
||||
logger.warning(e, exc_info=True)
|
||||
|
||||
return entries, dict(entry_to_file_map)
|
||||
|
||||
|
|
|
@ -91,8 +91,12 @@ class PlaintextToJsonl(TextToJsonl):
|
|||
|
||||
for plaintext_file in plaintext_files:
|
||||
with open(plaintext_file, "r") as f:
|
||||
plaintext_content = f.read()
|
||||
entry_to_file_map.append((plaintext_content, plaintext_file))
|
||||
try:
|
||||
plaintext_content = f.read()
|
||||
entry_to_file_map.append((plaintext_content, plaintext_file))
|
||||
except Exception as e:
|
||||
logger.warning(f"Unable to process file: {plaintext_file}. This file will not be indexed.")
|
||||
logger.warning(e, exc_info=True)
|
||||
|
||||
return dict(entry_to_file_map)
|
||||
|
||||
|
|
Loading…
Reference in a new issue