mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 09:25:06 +01:00
Fix sending file attachments in save_to_conversation method
- When files attached but upload fails, don't update the state variables - Make removing null characters in pdf extraction more space efficient
This commit is contained in:
parent
ba2471dc02
commit
dd36303bb7
3 changed files with 26 additions and 26 deletions
|
@ -237,26 +237,28 @@ export const ChatInputArea = forwardRef<HTMLTextAreaElement, ChatInputProps>((pr
|
||||||
? Array.from(nonImageFiles).concat(Array.from(attachedFiles || []))
|
? Array.from(nonImageFiles).concat(Array.from(attachedFiles || []))
|
||||||
: Array.from(attachedFiles || []);
|
: Array.from(attachedFiles || []);
|
||||||
|
|
||||||
// Ensure files are below size limit (10 MB)
|
if (newFiles.length > 0) {
|
||||||
for (let i = 0; i < newFiles.length; i++) {
|
// Ensure files are below size limit (10 MB)
|
||||||
if (newFiles[i].size > 10 * 1024 * 1024) {
|
for (let i = 0; i < newFiles.length; i++) {
|
||||||
setWarning(
|
if (newFiles[i].size > 10 * 1024 * 1024) {
|
||||||
`File ${newFiles[i].name} is too large. Please upload files smaller than 10 MB.`,
|
setWarning(
|
||||||
);
|
`File ${newFiles[i].name} is too large. Please upload files smaller than 10 MB.`,
|
||||||
return;
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const dataTransfer = new DataTransfer();
|
||||||
|
newFiles.forEach((file) => dataTransfer.items.add(file));
|
||||||
|
|
||||||
|
// Extract text from files
|
||||||
|
extractTextFromFiles(dataTransfer.files).then((data) => {
|
||||||
|
props.setUploadedFiles(data);
|
||||||
|
setAttachedFiles(dataTransfer.files);
|
||||||
|
setConvertedAttachedFiles(data);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const dataTransfer = new DataTransfer();
|
|
||||||
newFiles.forEach((file) => dataTransfer.items.add(file));
|
|
||||||
setAttachedFiles(dataTransfer.files);
|
|
||||||
|
|
||||||
// Extract text from files
|
|
||||||
extractTextFromFiles(dataTransfer.files).then((data) => {
|
|
||||||
props.setUploadedFiles(data);
|
|
||||||
setConvertedAttachedFiles(data);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Set focus to the input for user message after uploading files
|
// Set focus to the input for user message after uploading files
|
||||||
chatInputRef?.current?.focus();
|
chatInputRef?.current?.focus();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
import logging
|
import logging
|
||||||
import tempfile
|
import tempfile
|
||||||
from io import BytesIO
|
from typing import Dict, Final, List, Tuple
|
||||||
from typing import Dict, List, Tuple
|
|
||||||
|
|
||||||
from langchain_community.document_loaders import PyMuPDFLoader
|
from langchain_community.document_loaders import PyMuPDFLoader
|
||||||
|
|
||||||
|
@ -15,6 +14,9 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
class PdfToEntries(TextToEntries):
|
class PdfToEntries(TextToEntries):
|
||||||
|
# Class-level constant translation table
|
||||||
|
NULL_TRANSLATOR: Final = str.maketrans("", "", "\x00")
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
|
|
||||||
|
@ -112,8 +114,6 @@ class PdfToEntries(TextToEntries):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def clean_text(text: str) -> str:
|
def clean_text(text: str) -> str:
|
||||||
# Remove null bytes
|
"""Clean PDF text by removing null bytes and invalid Unicode characters."""
|
||||||
text = text.replace("\x00", "")
|
# Use faster translation table instead of replace
|
||||||
# Replace invalid Unicode
|
return text.translate(PdfToEntries.NULL_TRANSLATOR)
|
||||||
text = text.encode("utf-8", errors="ignore").decode("utf-8")
|
|
||||||
return text
|
|
||||||
|
|
|
@ -1133,7 +1133,6 @@ async def chat(
|
||||||
online_results=online_results,
|
online_results=online_results,
|
||||||
query_images=uploaded_images,
|
query_images=uploaded_images,
|
||||||
train_of_thought=train_of_thought,
|
train_of_thought=train_of_thought,
|
||||||
attached_file_context=attached_file_context,
|
|
||||||
raw_query_files=raw_query_files,
|
raw_query_files=raw_query_files,
|
||||||
tracer=tracer,
|
tracer=tracer,
|
||||||
)
|
)
|
||||||
|
@ -1194,7 +1193,6 @@ async def chat(
|
||||||
online_results=online_results,
|
online_results=online_results,
|
||||||
query_images=uploaded_images,
|
query_images=uploaded_images,
|
||||||
train_of_thought=train_of_thought,
|
train_of_thought=train_of_thought,
|
||||||
attached_file_context=attached_file_context,
|
|
||||||
raw_query_files=raw_query_files,
|
raw_query_files=raw_query_files,
|
||||||
tracer=tracer,
|
tracer=tracer,
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in a new issue