Handle size calculation more gracefully for converted documents, depending on type

This commit is contained in:
sabaimran 2024-11-12 02:00:29 -08:00
parent b6714c202f
commit 8ec1764e42

View file

@ -422,8 +422,18 @@ async def convert_documents(
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
]
extracted_content = "\n".join(annotated_pages)
else:
# Convert content to string
extracted_content = extracted_content.decode("utf-8")
size_in_bytes = len(extracted_content.encode("utf-8"))
# Calculate size in bytes. Some of the content might be in bytes, some in str.
if isinstance(extracted_content, str):
size_in_bytes = len(extracted_content.encode("utf-8"))
elif isinstance(extracted_content, bytes):
size_in_bytes = len(extracted_content)
else:
size_in_bytes = 0
logger.warning(f"Unexpected content type: {type(extracted_content)}")
converted_files.append(
{