mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Label pages when extract text from pdf, docs content. Fix scroll area in doc preview.
This commit is contained in:
parent
ee062d1c48
commit
ad46b0e718
3 changed files with 16 additions and 3 deletions
|
@ -262,6 +262,11 @@ export const ChatInputArea = forwardRef<HTMLTextAreaElement, ChatInputProps>((pr
|
|||
|
||||
return await response.json();
|
||||
} catch (error) {
|
||||
setError(
|
||||
"Error converting files. " +
|
||||
error +
|
||||
". Please try again, or contact team@khoj.dev if the issue persists.",
|
||||
);
|
||||
console.error("Error converting files:", error);
|
||||
return [];
|
||||
}
|
||||
|
|
|
@ -728,7 +728,9 @@ const ChatMessage = forwardRef<HTMLDivElement, ChatMessageProps>((props, ref) =>
|
|||
<DialogTitle>{file.name}</DialogTitle>
|
||||
</DialogHeader>
|
||||
<DialogDescription>
|
||||
<ScrollArea className="max-h-96">{file.content}</ScrollArea>
|
||||
<ScrollArea className="h-72 w-full rounded-md">
|
||||
{file.content}
|
||||
</ScrollArea>
|
||||
</DialogDescription>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
|
|
|
@ -396,11 +396,17 @@ async def convert_documents(
|
|||
|
||||
if file_data.file_type == "docx":
|
||||
entries_per_page = DocxToEntries.extract_text(file_data.content)
|
||||
extracted_content = "\n".join(entries_per_page)
|
||||
annotated_pages = [
|
||||
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
||||
]
|
||||
extracted_content = "\n".join(annotated_pages)
|
||||
|
||||
elif file_data.file_type == "pdf":
|
||||
entries_per_page = PdfToEntries.extract_text(file_data.content)
|
||||
extracted_content = "\n".join(entries_per_page)
|
||||
annotated_pages = [
|
||||
f"Page {index} of {file_data.name}:\n\n{entry}" for index, entry in enumerate(entries_per_page)
|
||||
]
|
||||
extracted_content = "\n".join(annotated_pages)
|
||||
|
||||
size_in_bytes = len(extracted_content.encode("utf-8"))
|
||||
|
||||
|
|
Loading…
Reference in a new issue