mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-17 18:18:11 +00:00
Display better error messages from document processor (#243)
pass messages to frontend on success/failure resolves #242
This commit is contained in:
parent
3e78476739
commit
a505928934
7 changed files with 60 additions and 6 deletions
collector/scripts/watch
frontend/src/components/Modals/MangeWorkspace/Upload/FileUploadProgress
|
@ -16,6 +16,10 @@ def as_docx(**kwargs):
|
|||
data = loader.load()[0]
|
||||
content = data.page_content
|
||||
|
||||
if len(content) == 0:
|
||||
print(f"Resulting text content was empty for {filename}{ext}.")
|
||||
return(False, f"No text content found in {filename}{ext}")
|
||||
|
||||
print(f"-- Working {fullpath} --")
|
||||
data = {
|
||||
'id': guid(),
|
||||
|
@ -33,7 +37,9 @@ def as_docx(**kwargs):
|
|||
|
||||
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
|
||||
move_source(parent_dir, f"{filename}{ext}", remove=remove)
|
||||
|
||||
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
|
||||
return(True, None)
|
||||
|
||||
def as_odt(**kwargs):
|
||||
parent_dir = kwargs.get('directory', 'hotdir')
|
||||
|
@ -46,6 +52,10 @@ def as_odt(**kwargs):
|
|||
data = loader.load()[0]
|
||||
content = data.page_content
|
||||
|
||||
if len(content) == 0:
|
||||
print(f"Resulting text content was empty for {filename}{ext}.")
|
||||
return(False, f"No text content found in {filename}{ext}")
|
||||
|
||||
print(f"-- Working {fullpath} --")
|
||||
data = {
|
||||
'id': guid(),
|
||||
|
@ -63,4 +73,6 @@ def as_odt(**kwargs):
|
|||
|
||||
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
|
||||
move_source(parent_dir, f"{filename}{ext}", remove=remove)
|
||||
|
||||
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
|
||||
return(True, None)
|
|
@ -16,6 +16,10 @@ def as_markdown(**kwargs):
|
|||
data = loader.load()[0]
|
||||
content = data.page_content
|
||||
|
||||
if len(content) == 0:
|
||||
print(f"Resulting page content was empty - no text could be extracted from {filename}{ext}.")
|
||||
return(False, f"No text could be extracted from {filename}{ext}.")
|
||||
|
||||
print(f"-- Working {fullpath} --")
|
||||
data = {
|
||||
'id': guid(),
|
||||
|
@ -33,4 +37,6 @@ def as_markdown(**kwargs):
|
|||
|
||||
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
|
||||
move_source(parent_dir, f"{filename}{ext}", remove=remove)
|
||||
|
||||
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
|
||||
return(True, None)
|
||||
|
|
|
@ -55,5 +55,7 @@ def as_mbox(**kwargs):
|
|||
}
|
||||
|
||||
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
|
||||
|
||||
move_source(parent_dir, f"{filename}{ext}", remove=remove)
|
||||
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
|
||||
return(True, None)
|
||||
|
|
|
@ -19,7 +19,7 @@ def as_pdf(**kwargs):
|
|||
|
||||
if len(pages) == 0:
|
||||
print(f"{fullpath} parsing resulted in no pages - nothing to do.")
|
||||
return False
|
||||
return(False, f"No pages found for {filename}{ext}!")
|
||||
|
||||
# Set doc to the first page so we can still get the metadata from PyMuPDF but without all the unicode issues.
|
||||
doc = pages[0]
|
||||
|
@ -31,6 +31,10 @@ def as_pdf(**kwargs):
|
|||
print(f"-- Parsing content from pg {page.number} --")
|
||||
page_content += unidecode(page.get_text('text'))
|
||||
|
||||
if len(page_content) == 0:
|
||||
print(f"Resulting page content was empty - no text could be extracted from the document.")
|
||||
return(False, f"No text content could be extracted from {filename}{ext}!")
|
||||
|
||||
title = doc.metadata.get('title')
|
||||
author = doc.metadata.get('author')
|
||||
subject = doc.metadata.get('subject')
|
||||
|
@ -50,4 +54,6 @@ def as_pdf(**kwargs):
|
|||
|
||||
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
|
||||
move_source(parent_dir, f"{filename}{ext}", remove=remove)
|
||||
|
||||
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
|
||||
return(True, None)
|
||||
|
|
|
@ -12,6 +12,10 @@ def as_text(**kwargs):
|
|||
fullpath = f"{parent_dir}/{filename}{ext}"
|
||||
content = open(fullpath).read()
|
||||
|
||||
if len(content) == 0:
|
||||
print(f"Resulting text content was empty for {filename}{ext}.")
|
||||
return(False, f"No text content found in {filename}{ext}")
|
||||
|
||||
print(f"-- Working {fullpath} --")
|
||||
data = {
|
||||
'id': guid(),
|
||||
|
@ -28,4 +32,6 @@ def as_text(**kwargs):
|
|||
|
||||
write_to_server_documents(data, f"{slugify(filename)}-{data.get('id')}")
|
||||
move_source(parent_dir, f"{filename}{ext}", remove=remove)
|
||||
|
||||
print(f"[SUCCESS]: {filename}{ext} converted & ready for embedding.\n")
|
||||
return(True, None)
|
||||
|
|
|
@ -25,11 +25,11 @@ def process_single(directory, target_doc):
|
|||
move_source(new_destination_filename=target_doc, failed=True, remove=True)
|
||||
return (False, f"{fileext} not a supported file type for conversion. It will not be processed.")
|
||||
|
||||
FILETYPES[fileext](
|
||||
# Returns Tuple of (Boolean, String|None) of success status and possible error message.
|
||||
# Error message will display to user.
|
||||
return FILETYPES[fileext](
|
||||
directory=directory,
|
||||
filename=filename,
|
||||
ext=fileext,
|
||||
remove_on_complete=True # remove source document to save disk space.
|
||||
)
|
||||
|
||||
return (True, None)
|
||||
)
|
|
@ -14,7 +14,8 @@ function FileUploadProgressComponent({
|
|||
onUploadError,
|
||||
}) {
|
||||
const [timerMs, setTimerMs] = useState(10);
|
||||
const [status, setStatus] = useState(file?.rejected ? "uploading" : "failed");
|
||||
const [status, setStatus] = useState("pending");
|
||||
const [error, setError] = useState("");
|
||||
|
||||
useEffect(() => {
|
||||
async function uploadFile() {
|
||||
|
@ -31,6 +32,7 @@ function FileUploadProgressComponent({
|
|||
setStatus("failed");
|
||||
clearInterval(timer);
|
||||
onUploadError(data.error);
|
||||
setError(data.error);
|
||||
} else {
|
||||
setStatus("complete");
|
||||
clearInterval(timer);
|
||||
|
@ -58,6 +60,24 @@ function FileUploadProgressComponent({
|
|||
);
|
||||
}
|
||||
|
||||
if (status === "failed") {
|
||||
return (
|
||||
<div className="w-fit px-2 py-2 flex items-center gap-x-4 rounded-lg bg-blue-100 border-blue-600 dark:bg-stone-800 bg-opacity-50 border dark:border-stone-600">
|
||||
<div className="w-6 h-6">
|
||||
<XCircle className="w-6 h-6 stroke-white bg-red-500 rounded-full p-1 w-full h-full" />
|
||||
</div>
|
||||
<div className="flex flex-col">
|
||||
<p className="text-black dark:text-stone-200 text-sm font-mono overflow-x-scroll">
|
||||
{truncate(file.name, 30)}
|
||||
</p>
|
||||
<p className="text-red-700 dark:text-red-400 text-xs font-mono">
|
||||
{error}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="w-fit px-2 py-2 flex items-center gap-x-4 rounded-lg bg-blue-100 border-blue-600 dark:bg-stone-800 bg-opacity-50 border dark:border-stone-600">
|
||||
<div className="w-6 h-6">
|
||||
|
@ -77,6 +97,8 @@ function FileUploadProgressComponent({
|
|||
</div>
|
||||
</div>
|
||||
);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
export default memo(FileUploadProgressComponent);
|
||||
|
|
Loading…
Add table
Reference in a new issue