mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 15:38:55 +01:00
Handle size calculation more gracefully for converted documents, depending on type
This commit is contained in:
parent
b6714c202f
commit
8ec1764e42
1 changed files with 11 additions and 1 deletions
|
@ -422,8 +422,18 @@ async def convert_documents(
|
|||
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
||||
]
|
||||
extracted_content = "\n".join(annotated_pages)
|
||||
else:
|
||||
# Convert content to string
|
||||
extracted_content = extracted_content.decode("utf-8")
|
||||
|
||||
size_in_bytes = len(extracted_content.encode("utf-8"))
|
||||
# Calculate size in bytes. Some of the content might be in bytes, some in str.
|
||||
if isinstance(extracted_content, str):
|
||||
size_in_bytes = len(extracted_content.encode("utf-8"))
|
||||
elif isinstance(extracted_content, bytes):
|
||||
size_in_bytes = len(extracted_content)
|
||||
else:
|
||||
size_in_bytes = 0
|
||||
logger.warning(f"Unexpected content type: {type(extracted_content)}")
|
||||
|
||||
converted_files.append(
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue