mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-24 21:48:12 +00:00
Re-map some file mimes to support text (#842)
re-map some file mimes to support text
This commit is contained in:
parent
60fc5f715a
commit
ec90060d36
2 changed files with 43 additions and 18 deletions
|
@ -1,28 +1,16 @@
|
|||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { getType } = require("mime");
|
||||
const { MimeDetector } = require("./mime");
|
||||
|
||||
function isTextType(filepath) {
|
||||
if (!fs.existsSync(filepath)) return false;
|
||||
// These are types of mime primary classes that for sure
|
||||
// cannot also for forced into a text type.
|
||||
const nonTextTypes = ["multipart", "image", "model", "audio", "video"];
|
||||
// These are full-mimes we for sure cannot parse or interpret as text
|
||||
// documents
|
||||
const BAD_MIMES = [
|
||||
"application/octet-stream",
|
||||
"application/zip",
|
||||
"application/pkcs8",
|
||||
"application/vnd.microsoft.portable-executable",
|
||||
"application/x-msdownload",
|
||||
];
|
||||
|
||||
try {
|
||||
const mime = getType(filepath);
|
||||
if (BAD_MIMES.includes(mime)) return false;
|
||||
if (!fs.existsSync(filepath)) return false;
|
||||
const mimeLib = new MimeDetector();
|
||||
const mime = mimeLib.getType(filepath);
|
||||
if (mimeLib.badMimes.includes(mime)) return false;
|
||||
|
||||
const type = mime.split("/")[0];
|
||||
if (nonTextTypes.includes(type)) return false;
|
||||
if (mimeLib.nonTextTypes.includes(type)) return false;
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
|
|
37
collector/utils/files/mime.js
Normal file
37
collector/utils/files/mime.js
Normal file
|
@ -0,0 +1,37 @@
|
|||
const MimeLib = require("mime");
|
||||
|
||||
class MimeDetector {
|
||||
nonTextTypes = ["multipart", "image", "model", "audio", "video"];
|
||||
badMimes = [
|
||||
"application/octet-stream",
|
||||
"application/zip",
|
||||
"application/pkcs8",
|
||||
"application/vnd.microsoft.portable-executable",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", // XLSX are binaries and need to be handled explicitly.
|
||||
"application/x-msdownload",
|
||||
];
|
||||
|
||||
constructor() {
|
||||
this.lib = MimeLib;
|
||||
this.setOverrides();
|
||||
}
|
||||
|
||||
setOverrides() {
|
||||
// the .ts extension maps to video/mp2t because of https://en.wikipedia.org/wiki/MPEG_transport_stream
|
||||
// which has had this extension far before TS was invented. So need to force re-map this MIME map.
|
||||
this.lib.define(
|
||||
{
|
||||
"text/plain": ["ts", "py", "opts", "lock", "jsonl"],
|
||||
},
|
||||
true
|
||||
);
|
||||
}
|
||||
|
||||
getType(filepath) {
|
||||
return this.lib.getType(filepath);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
MimeDetector,
|
||||
};
|
Loading…
Add table
Reference in a new issue