mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-17 18:18:11 +00:00
Generic upload fallback as text file. (#808)
* Do not block any file upload fallback unknown/unsupported types to text if possible * reduce call for frontend * patch
This commit is contained in:
parent
ca2e0f8e6f
commit
6d18d79bb7
7 changed files with 54 additions and 42 deletions
|
@ -4,7 +4,7 @@ const {
|
|||
WATCH_DIRECTORY,
|
||||
SUPPORTED_FILETYPE_CONVERTERS,
|
||||
} = require("../utils/constants");
|
||||
const { trashFile } = require("../utils/files");
|
||||
const { trashFile, isTextType } = require("../utils/files");
|
||||
const RESERVED_FILES = ["__HOTDIR__.md"];
|
||||
|
||||
async function processSingleFile(targetFilename) {
|
||||
|
@ -31,17 +31,25 @@ async function processSingleFile(targetFilename) {
|
|||
};
|
||||
}
|
||||
|
||||
if (!Object.keys(SUPPORTED_FILETYPE_CONVERTERS).includes(fileExtension)) {
|
||||
trashFile(fullFilePath);
|
||||
return {
|
||||
success: false,
|
||||
reason: `File extension ${fileExtension} not supported for parsing.`,
|
||||
documents: [],
|
||||
};
|
||||
let processFileAs = fileExtension;
|
||||
if (!SUPPORTED_FILETYPE_CONVERTERS.hasOwnProperty(fileExtension)) {
|
||||
if (isTextType(fullFilePath)) {
|
||||
console.log(
|
||||
`\x1b[33m[Collector]\x1b[0m The provided filetype of ${fileExtension} does not have a preset and will be processed as .txt.`
|
||||
);
|
||||
processFileAs = ".txt";
|
||||
} else {
|
||||
trashFile(fullFilePath);
|
||||
return {
|
||||
success: false,
|
||||
reason: `File extension ${fileExtension} not supported for parsing and cannot be assumed as text file type.`,
|
||||
documents: [],
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
const FileTypeProcessor = require(SUPPORTED_FILETYPE_CONVERTERS[
|
||||
fileExtension
|
||||
processFileAs
|
||||
]);
|
||||
return await FileTypeProcessor({
|
||||
fullFilePath,
|
||||
|
|
|
@ -1,5 +1,33 @@
|
|||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { getType } = require("mime");
|
||||
|
||||
function isTextType(filepath) {
|
||||
if (!fs.existsSync(filepath)) return false;
|
||||
// These are types of mime primary classes that for sure
|
||||
// cannot also for forced into a text type.
|
||||
const nonTextTypes = ["multipart", "image", "model", "audio", "video"];
|
||||
// These are full-mimes we for sure cannot parse or interpret as text
|
||||
// documents
|
||||
const BAD_MIMES = [
|
||||
"application/octet-stream",
|
||||
"application/zip",
|
||||
"application/pkcs8",
|
||||
"application/vnd.microsoft.portable-executable",
|
||||
"application/x-msdownload",
|
||||
];
|
||||
|
||||
try {
|
||||
const mime = getType(filepath);
|
||||
if (BAD_MIMES.includes(mime)) return false;
|
||||
|
||||
const type = mime.split("/")[0];
|
||||
if (nonTextTypes.includes(type)) return false;
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function trashFile(filepath) {
|
||||
if (!fs.existsSync(filepath)) return;
|
||||
|
@ -94,6 +122,7 @@ async function wipeCollectorStorage() {
|
|||
|
||||
module.exports = {
|
||||
trashFile,
|
||||
isTextType,
|
||||
createdDate,
|
||||
writeToServerDocuments,
|
||||
wipeCollectorStorage,
|
||||
|
|
|
@ -8,7 +8,6 @@ function Directory({
|
|||
files,
|
||||
loading,
|
||||
setLoading,
|
||||
fileTypes,
|
||||
workspace,
|
||||
fetchKeys,
|
||||
selectedItems,
|
||||
|
@ -135,9 +134,7 @@ function Directory({
|
|||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
||||
<UploadFile
|
||||
fileTypes={fileTypes}
|
||||
workspace={workspace}
|
||||
fetchKeys={fetchKeys}
|
||||
setLoading={setLoading}
|
||||
|
|
|
@ -61,7 +61,7 @@ function FileUploadProgressComponent({
|
|||
if (status === "failed") {
|
||||
return (
|
||||
<div className="h-14 px-2 py-2 flex items-center gap-x-4 rounded-lg bg-white/5 border border-white/40 overflow-y-auto">
|
||||
<div className="w-6 h-6">
|
||||
<div className="w-6 h-6 flex-shrink-0">
|
||||
<XCircle className="w-6 h-6 stroke-white bg-red-500 rounded-full p-1 w-full h-full" />
|
||||
</div>
|
||||
<div className="flex flex-col">
|
||||
|
@ -76,7 +76,7 @@ function FileUploadProgressComponent({
|
|||
|
||||
return (
|
||||
<div className="h-14 px-2 py-2 flex items-center gap-x-4 rounded-lg bg-white/5 border border-white/40">
|
||||
<div className="w-6 h-6">
|
||||
<div className="w-6 h-6 flex-shrink-0">
|
||||
{status !== "complete" ? (
|
||||
<div className="flex items-center justify-center">
|
||||
<PreLoader size="6" />
|
||||
|
|
|
@ -7,12 +7,7 @@ import { v4 } from "uuid";
|
|||
import FileUploadProgress from "./FileUploadProgress";
|
||||
import Workspace from "../../../../../models/workspace";
|
||||
|
||||
export default function UploadFile({
|
||||
workspace,
|
||||
fileTypes,
|
||||
fetchKeys,
|
||||
setLoading,
|
||||
}) {
|
||||
export default function UploadFile({ workspace, fetchKeys, setLoading }) {
|
||||
const [ready, setReady] = useState(false);
|
||||
const [files, setFiles] = useState([]);
|
||||
const [fetchingUrl, setFetchingUrl] = useState(false);
|
||||
|
@ -76,9 +71,6 @@ export default function UploadFile({
|
|||
|
||||
const { getRootProps, getInputProps } = useDropzone({
|
||||
onDrop,
|
||||
accept: {
|
||||
...fileTypes,
|
||||
},
|
||||
disabled: !ready,
|
||||
});
|
||||
|
||||
|
@ -109,9 +101,7 @@ export default function UploadFile({
|
|||
Click to upload or drag and drop
|
||||
</div>
|
||||
<div className="text-white text-opacity-60 text-xs font-medium py-1">
|
||||
{Object.values(fileTypes ?? [])
|
||||
.flat()
|
||||
.join(" ")}
|
||||
supports text files, csv's, spreadsheets, audio files, and more!
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
|
|
|
@ -15,11 +15,7 @@ const MODEL_COSTS = {
|
|||
"text-embedding-3-large": 0.00000013, // $0.00013 / 1K tokens
|
||||
};
|
||||
|
||||
export default function DocumentSettings({
|
||||
workspace,
|
||||
fileTypes,
|
||||
systemSettings,
|
||||
}) {
|
||||
export default function DocumentSettings({ workspace, systemSettings }) {
|
||||
const [highlightWorkspace, setHighlightWorkspace] = useState(false);
|
||||
const [availableDocs, setAvailableDocs] = useState([]);
|
||||
const [loading, setLoading] = useState(true);
|
||||
|
@ -201,7 +197,6 @@ export default function DocumentSettings({
|
|||
loading={loading}
|
||||
loadingMessage={loadingMessage}
|
||||
setLoading={setLoading}
|
||||
fileTypes={fileTypes}
|
||||
workspace={workspace}
|
||||
fetchKeys={fetchKeys}
|
||||
selectedItems={selectedItems}
|
||||
|
|
|
@ -11,17 +11,14 @@ const noop = () => {};
|
|||
const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => {
|
||||
const { slug } = useParams();
|
||||
const [workspace, setWorkspace] = useState(null);
|
||||
const [fileTypes, setFileTypes] = useState(null);
|
||||
const [settings, setSettings] = useState({});
|
||||
|
||||
useEffect(() => {
|
||||
async function checkSupportedFiletypes() {
|
||||
const acceptedTypes = await System.acceptedDocumentTypes();
|
||||
async function getSettings() {
|
||||
const _settings = await System.keys();
|
||||
setFileTypes(acceptedTypes ?? {});
|
||||
setSettings(_settings ?? {});
|
||||
}
|
||||
checkSupportedFiletypes();
|
||||
getSettings();
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
|
@ -78,11 +75,7 @@ const ManageWorkspace = ({ hideModal = noop, providedSlug = null }) => {
|
|||
<X className="text-gray-300 text-lg" />
|
||||
</button>
|
||||
</div>
|
||||
<DocumentSettings
|
||||
workspace={workspace}
|
||||
fileTypes={fileTypes}
|
||||
systemSettings={settings}
|
||||
/>
|
||||
<DocumentSettings workspace={workspace} systemSettings={settings} />
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
Loading…
Add table
Reference in a new issue