mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-23 13:08:11 +00:00
Re-map some file mimes to support text (#842)
re-map some file mimes to support text
This commit is contained in:
parent
60fc5f715a
commit
ec90060d36
2 changed files with 43 additions and 18 deletions
|
@ -1,28 +1,16 @@
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
const path = require("path");
|
const path = require("path");
|
||||||
const { getType } = require("mime");
|
const { MimeDetector } = require("./mime");
|
||||||
|
|
||||||
function isTextType(filepath) {
|
function isTextType(filepath) {
|
||||||
if (!fs.existsSync(filepath)) return false;
|
|
||||||
// These are types of mime primary classes that for sure
|
|
||||||
// cannot also for forced into a text type.
|
|
||||||
const nonTextTypes = ["multipart", "image", "model", "audio", "video"];
|
|
||||||
// These are full-mimes we for sure cannot parse or interpret as text
|
|
||||||
// documents
|
|
||||||
const BAD_MIMES = [
|
|
||||||
"application/octet-stream",
|
|
||||||
"application/zip",
|
|
||||||
"application/pkcs8",
|
|
||||||
"application/vnd.microsoft.portable-executable",
|
|
||||||
"application/x-msdownload",
|
|
||||||
];
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const mime = getType(filepath);
|
if (!fs.existsSync(filepath)) return false;
|
||||||
if (BAD_MIMES.includes(mime)) return false;
|
const mimeLib = new MimeDetector();
|
||||||
|
const mime = mimeLib.getType(filepath);
|
||||||
|
if (mimeLib.badMimes.includes(mime)) return false;
|
||||||
|
|
||||||
const type = mime.split("/")[0];
|
const type = mime.split("/")[0];
|
||||||
if (nonTextTypes.includes(type)) return false;
|
if (mimeLib.nonTextTypes.includes(type)) return false;
|
||||||
return true;
|
return true;
|
||||||
} catch {
|
} catch {
|
||||||
return false;
|
return false;
|
||||||
|
|
37
collector/utils/files/mime.js
Normal file
37
collector/utils/files/mime.js
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
const MimeLib = require("mime");
|
||||||
|
|
||||||
|
class MimeDetector {
|
||||||
|
nonTextTypes = ["multipart", "image", "model", "audio", "video"];
|
||||||
|
badMimes = [
|
||||||
|
"application/octet-stream",
|
||||||
|
"application/zip",
|
||||||
|
"application/pkcs8",
|
||||||
|
"application/vnd.microsoft.portable-executable",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", // XLSX are binaries and need to be handled explicitly.
|
||||||
|
"application/x-msdownload",
|
||||||
|
];
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.lib = MimeLib;
|
||||||
|
this.setOverrides();
|
||||||
|
}
|
||||||
|
|
||||||
|
setOverrides() {
|
||||||
|
// the .ts extension maps to video/mp2t because of https://en.wikipedia.org/wiki/MPEG_transport_stream
|
||||||
|
// which has had this extension far before TS was invented. So need to force re-map this MIME map.
|
||||||
|
this.lib.define(
|
||||||
|
{
|
||||||
|
"text/plain": ["ts", "py", "opts", "lock", "jsonl"],
|
||||||
|
},
|
||||||
|
true
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
getType(filepath) {
|
||||||
|
return this.lib.getType(filepath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
MimeDetector,
|
||||||
|
};
|
Loading…
Add table
Reference in a new issue