mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Skip plaintext file indexing if there's a parsing issue and log the file
This commit is contained in:
parent
74409c2c64
commit
92cbfef7ab
1 changed files with 7 additions and 4 deletions
|
@ -91,10 +91,13 @@ class PlaintextToJsonl(TextToJsonl):
|
||||||
|
|
||||||
for plaintext_file in plaintext_files:
|
for plaintext_file in plaintext_files:
|
||||||
with open(plaintext_file, "r") as f:
|
with open(plaintext_file, "r") as f:
|
||||||
plaintext_content = f.read()
|
try:
|
||||||
if plaintext_file.endswith(("html", "htm", "xml")):
|
plaintext_content = f.read()
|
||||||
plaintext_content = PlaintextToJsonl.extract_html_content(plaintext_content)
|
if plaintext_file.endswith(("html", "htm", "xml")):
|
||||||
entry_to_file_map.append((plaintext_content, plaintext_file))
|
plaintext_content = PlaintextToJsonl.extract_html_content(plaintext_content)
|
||||||
|
entry_to_file_map.append((plaintext_content, plaintext_file))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing file: {plaintext_file} - {e}", exc_info=True)
|
||||||
|
|
||||||
return dict(entry_to_file_map)
|
return dict(entry_to_file_map)
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue