Enforce limits on file size when converting to text

This commit is contained in:
sabaimran 2024-11-08 15:27:28 -08:00
parent 4695174149
commit 7159b0b735
2 changed files with 25 additions and 0 deletions

View file

@ -237,6 +237,16 @@ export const ChatInputArea = forwardRef<HTMLTextAreaElement, ChatInputProps>((pr
? Array.from(nonImageFiles).concat(Array.from(attachedFiles || [])) ? Array.from(nonImageFiles).concat(Array.from(attachedFiles || []))
: Array.from(attachedFiles || []); : Array.from(attachedFiles || []);
// Ensure files are below size limit (10 MB)
for (let i = 0; i < newFiles.length; i++) {
if (newFiles[i].size > 10 * 1024 * 1024) {
setWarning(
`File ${newFiles[i].name} is too large. Please upload files smaller than 10 MB.`,
);
return;
}
}
const dataTransfer = new DataTransfer(); const dataTransfer = new DataTransfer();
newFiles.forEach((file) => dataTransfer.items.add(file)); newFiles.forEach((file) => dataTransfer.items.add(file));
setAttachedFiles(dataTransfer.files); setAttachedFiles(dataTransfer.files);

View file

@ -384,10 +384,25 @@ async def convert_documents(
files: List[UploadFile], files: List[UploadFile],
client: Optional[str] = None, client: Optional[str] = None,
): ):
MAX_FILE_SIZE_MB = 10 # 10MB limit
MAX_FILE_SIZE_BYTES = MAX_FILE_SIZE_MB * 1024 * 1024
converted_files = [] converted_files = []
supported_files = ["org", "markdown", "pdf", "plaintext", "docx"] supported_files = ["org", "markdown", "pdf", "plaintext", "docx"]
for file in files: for file in files:
# Check file size first
file_size = 0
content = await file.read()
file_size = len(content)
await file.seek(0) # Reset file pointer
if file_size > MAX_FILE_SIZE_BYTES:
logger.warning(
f"Skipped converting oversized file ({file_size / 1024 / 1024:.1f}MB) sent by {client} client: {file.filename}"
)
continue
file_data = get_file_content(file) file_data = get_file_content(file)
if file_data.file_type in supported_files: if file_data.file_type in supported_files:
extracted_content = ( extracted_content = (