Skip indexing single bad markdown, plaintext file (#460)

This commit is contained in:
sabaimran 2023-08-23 15:34:56 -07:00 committed by GitHub
parent 84bd579077
commit e64357698d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 7 deletions

View file

@ -105,10 +105,14 @@ class MarkdownToJsonl(TextToJsonl):
entry_to_file_map = []
for markdown_file in markdown_files:
with open(markdown_file, "r", encoding="utf8") as f:
markdown_content = f.read()
entries, entry_to_file_map = MarkdownToJsonl.process_single_markdown_file(
markdown_content, markdown_file, entries, entry_to_file_map
)
try:
markdown_content = f.read()
entries, entry_to_file_map = MarkdownToJsonl.process_single_markdown_file(
markdown_content, markdown_file, entries, entry_to_file_map
)
except Exception as e:
logger.warning(f"Unable to process file: {markdown_file}. This file will not be indexed.")
logger.warning(e, exc_info=True)
return entries, dict(entry_to_file_map)

View file

@ -100,7 +100,8 @@ class OrgToJsonl(TextToJsonl):
entry_to_file_map += zip(org_file_entries, [org_file] * len(org_file_entries))
entries.extend(org_file_entries)
except Exception as e:
logger.error(f"Error processing file: {org_file} with error: {e}", exc_info=True)
logger.warning(f"Unable to process file: {org_file}. This file will not be indexed.")
logger.warning(e, exc_info=True)
return entries, dict(entry_to_file_map)

View file

@ -91,8 +91,12 @@ class PlaintextToJsonl(TextToJsonl):
for plaintext_file in plaintext_files:
with open(plaintext_file, "r") as f:
plaintext_content = f.read()
entry_to_file_map.append((plaintext_content, plaintext_file))
try:
plaintext_content = f.read()
entry_to_file_map.append((plaintext_content, plaintext_file))
except Exception as e:
logger.warning(f"Unable to process file: {plaintext_file}. This file will not be indexed.")
logger.warning(e, exc_info=True)
return dict(entry_to_file_map)