mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-23 23:48:56 +01:00
Handle size calculation more gracefully for converted documents, depending on type
This commit is contained in:
parent
b6714c202f
commit
8ec1764e42
1 changed files with 11 additions and 1 deletions
|
@ -422,8 +422,18 @@ async def convert_documents(
|
||||||
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
||||||
]
|
]
|
||||||
extracted_content = "\n".join(annotated_pages)
|
extracted_content = "\n".join(annotated_pages)
|
||||||
|
else:
|
||||||
|
# Convert content to string
|
||||||
|
extracted_content = extracted_content.decode("utf-8")
|
||||||
|
|
||||||
size_in_bytes = len(extracted_content.encode("utf-8"))
|
# Calculate size in bytes. Some of the content might be in bytes, some in str.
|
||||||
|
if isinstance(extracted_content, str):
|
||||||
|
size_in_bytes = len(extracted_content.encode("utf-8"))
|
||||||
|
elif isinstance(extracted_content, bytes):
|
||||||
|
size_in_bytes = len(extracted_content)
|
||||||
|
else:
|
||||||
|
size_in_bytes = 0
|
||||||
|
logger.warning(f"Unexpected content type: {type(extracted_content)}")
|
||||||
|
|
||||||
converted_files.append(
|
converted_files.append(
|
||||||
{
|
{
|
||||||
|
|
Loading…
Reference in a new issue