mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-05-02 09:03:12 +00:00
Audio file validations (#2902)
* add audio file validations * patch sharp to support wavfile parsing --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
696af19c45
commit
dd017c6cbb
3 changed files with 194 additions and 1 deletions
collector
|
@ -38,6 +38,7 @@
|
|||
"openai": "4.38.5",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"puppeteer": "~21.5.2",
|
||||
"sharp": "^0.33.5",
|
||||
"slugify": "^1.6.6",
|
||||
"url-pattern": "^1.0.3",
|
||||
"uuid": "^9.0.0",
|
||||
|
|
|
@ -29,6 +29,37 @@ class LocalWhisper {
|
|||
console.log(`\x1b[32m[LocalWhisper]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
#validateAudioFile(wavFile) {
|
||||
const sampleRate = wavFile.fmt.sampleRate;
|
||||
const duration = wavFile.data.samples / sampleRate;
|
||||
|
||||
// Most speech recognition systems expect minimum 8kHz
|
||||
// But we'll set it lower to be safe
|
||||
if (sampleRate < 4000) {
|
||||
// 4kHz minimum
|
||||
throw new Error(
|
||||
"Audio file sample rate is too low for accurate transcription. Minimum required is 4kHz."
|
||||
);
|
||||
}
|
||||
|
||||
// Typical audio file duration limits
|
||||
const MAX_DURATION_SECONDS = 4 * 60 * 60; // 4 hours
|
||||
if (duration > MAX_DURATION_SECONDS) {
|
||||
throw new Error("Audio file duration exceeds maximum limit of 4 hours.");
|
||||
}
|
||||
|
||||
// Check final sample count after upsampling to prevent memory issues
|
||||
const targetSampleRate = 16000;
|
||||
const upsampledSamples = duration * targetSampleRate;
|
||||
const MAX_SAMPLES = 230_400_000; // ~4 hours at 16kHz
|
||||
|
||||
if (upsampledSamples > MAX_SAMPLES) {
|
||||
throw new Error("Audio file exceeds maximum allowed length.");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
async #convertToWavAudioData(sourcePath) {
|
||||
try {
|
||||
let buffer;
|
||||
|
@ -81,6 +112,13 @@ class LocalWhisper {
|
|||
}
|
||||
|
||||
const wavFile = new wavefile.WaveFile(buffer);
|
||||
try {
|
||||
this.#validateAudioFile(wavFile);
|
||||
} catch (error) {
|
||||
this.#log(`Audio validation failed: ${error.message}`);
|
||||
throw new Error(`Invalid audio file: ${error.message}`);
|
||||
}
|
||||
|
||||
wavFile.toBitDepth("32f");
|
||||
wavFile.toSampleRate(16000);
|
||||
|
||||
|
|
|
@ -54,6 +54,13 @@
|
|||
enabled "2.0.x"
|
||||
kuler "^2.0.0"
|
||||
|
||||
"@emnapi/runtime@^1.2.0":
|
||||
version "1.3.1"
|
||||
resolved "https://registry.yarnpkg.com/@emnapi/runtime/-/runtime-1.3.1.tgz#0fcaa575afc31f455fd33534c19381cfce6c6f60"
|
||||
integrity sha512-kEBmG8KyqtxJZv+ygbEim+KCGtIq1fC22Ms3S4ziXmYKm8uyoLX0MHONVKwp+9opg390VaKRNt4a7A9NwmpNhw==
|
||||
dependencies:
|
||||
tslib "^2.4.0"
|
||||
|
||||
"@fastify/busboy@^2.0.0":
|
||||
version "2.1.1"
|
||||
resolved "https://registry.yarnpkg.com/@fastify/busboy/-/busboy-2.1.1.tgz#b9da6a878a371829a0502c9b6c1c143ef6663f4d"
|
||||
|
@ -64,6 +71,119 @@
|
|||
resolved "https://registry.yarnpkg.com/@huggingface/jinja/-/jinja-0.2.2.tgz#faeb205a9d6995089bef52655ddd8245d3190627"
|
||||
integrity sha512-/KPde26khDUIPkTGU82jdtTW9UAuvUTumCAbFs/7giR0SxsvZC4hru51PBvpijH6BVkHcROcvZM/lpy5h1jRRA==
|
||||
|
||||
"@img/sharp-darwin-arm64@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-darwin-arm64/-/sharp-darwin-arm64-0.33.5.tgz#ef5b5a07862805f1e8145a377c8ba6e98813ca08"
|
||||
integrity sha512-UT4p+iz/2H4twwAoLCqfA9UH5pI6DggwKEGuaPy7nCVQ8ZsiY5PIcrRvD1DzuY3qYL07NtIQcWnBSY/heikIFQ==
|
||||
optionalDependencies:
|
||||
"@img/sharp-libvips-darwin-arm64" "1.0.4"
|
||||
|
||||
"@img/sharp-darwin-x64@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-darwin-x64/-/sharp-darwin-x64-0.33.5.tgz#e03d3451cd9e664faa72948cc70a403ea4063d61"
|
||||
integrity sha512-fyHac4jIc1ANYGRDxtiqelIbdWkIuQaI84Mv45KvGRRxSAa7o7d1ZKAOBaYbnepLC1WqxfpimdeWfvqqSGwR2Q==
|
||||
optionalDependencies:
|
||||
"@img/sharp-libvips-darwin-x64" "1.0.4"
|
||||
|
||||
"@img/sharp-libvips-darwin-arm64@1.0.4":
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-libvips-darwin-arm64/-/sharp-libvips-darwin-arm64-1.0.4.tgz#447c5026700c01a993c7804eb8af5f6e9868c07f"
|
||||
integrity sha512-XblONe153h0O2zuFfTAbQYAX2JhYmDHeWikp1LM9Hul9gVPjFY427k6dFEcOL72O01QxQsWi761svJ/ev9xEDg==
|
||||
|
||||
"@img/sharp-libvips-darwin-x64@1.0.4":
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-libvips-darwin-x64/-/sharp-libvips-darwin-x64-1.0.4.tgz#e0456f8f7c623f9dbfbdc77383caa72281d86062"
|
||||
integrity sha512-xnGR8YuZYfJGmWPvmlunFaWJsb9T/AO2ykoP3Fz/0X5XV2aoYBPkX6xqCQvUTKKiLddarLaxpzNe+b1hjeWHAQ==
|
||||
|
||||
"@img/sharp-libvips-linux-arm64@1.0.4":
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-libvips-linux-arm64/-/sharp-libvips-linux-arm64-1.0.4.tgz#979b1c66c9a91f7ff2893556ef267f90ebe51704"
|
||||
integrity sha512-9B+taZ8DlyyqzZQnoeIvDVR/2F4EbMepXMc/NdVbkzsJbzkUjhXv/70GQJ7tdLA4YJgNP25zukcxpX2/SueNrA==
|
||||
|
||||
"@img/sharp-libvips-linux-arm@1.0.5":
|
||||
version "1.0.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-libvips-linux-arm/-/sharp-libvips-linux-arm-1.0.5.tgz#99f922d4e15216ec205dcb6891b721bfd2884197"
|
||||
integrity sha512-gvcC4ACAOPRNATg/ov8/MnbxFDJqf/pDePbBnuBDcjsI8PssmjoKMAz4LtLaVi+OnSb5FK/yIOamqDwGmXW32g==
|
||||
|
||||
"@img/sharp-libvips-linux-s390x@1.0.4":
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-libvips-linux-s390x/-/sharp-libvips-linux-s390x-1.0.4.tgz#f8a5eb1f374a082f72b3f45e2fb25b8118a8a5ce"
|
||||
integrity sha512-u7Wz6ntiSSgGSGcjZ55im6uvTrOxSIS8/dgoVMoiGE9I6JAfU50yH5BoDlYA1tcuGS7g/QNtetJnxA6QEsCVTA==
|
||||
|
||||
"@img/sharp-libvips-linux-x64@1.0.4":
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-libvips-linux-x64/-/sharp-libvips-linux-x64-1.0.4.tgz#d4c4619cdd157774906e15770ee119931c7ef5e0"
|
||||
integrity sha512-MmWmQ3iPFZr0Iev+BAgVMb3ZyC4KeFc3jFxnNbEPas60e1cIfevbtuyf9nDGIzOaW9PdnDciJm+wFFaTlj5xYw==
|
||||
|
||||
"@img/sharp-libvips-linuxmusl-arm64@1.0.4":
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-libvips-linuxmusl-arm64/-/sharp-libvips-linuxmusl-arm64-1.0.4.tgz#166778da0f48dd2bded1fa3033cee6b588f0d5d5"
|
||||
integrity sha512-9Ti+BbTYDcsbp4wfYib8Ctm1ilkugkA/uscUn6UXK1ldpC1JjiXbLfFZtRlBhjPZ5o1NCLiDbg8fhUPKStHoTA==
|
||||
|
||||
"@img/sharp-libvips-linuxmusl-x64@1.0.4":
|
||||
version "1.0.4"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-libvips-linuxmusl-x64/-/sharp-libvips-linuxmusl-x64-1.0.4.tgz#93794e4d7720b077fcad3e02982f2f1c246751ff"
|
||||
integrity sha512-viYN1KX9m+/hGkJtvYYp+CCLgnJXwiQB39damAO7WMdKWlIhmYTfHjwSbQeUK/20vY154mwezd9HflVFM1wVSw==
|
||||
|
||||
"@img/sharp-linux-arm64@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-linux-arm64/-/sharp-linux-arm64-0.33.5.tgz#edb0697e7a8279c9fc829a60fc35644c4839bb22"
|
||||
integrity sha512-JMVv+AMRyGOHtO1RFBiJy/MBsgz0x4AWrT6QoEVVTyh1E39TrCUpTRI7mx9VksGX4awWASxqCYLCV4wBZHAYxA==
|
||||
optionalDependencies:
|
||||
"@img/sharp-libvips-linux-arm64" "1.0.4"
|
||||
|
||||
"@img/sharp-linux-arm@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-linux-arm/-/sharp-linux-arm-0.33.5.tgz#422c1a352e7b5832842577dc51602bcd5b6f5eff"
|
||||
integrity sha512-JTS1eldqZbJxjvKaAkxhZmBqPRGmxgu+qFKSInv8moZ2AmT5Yib3EQ1c6gp493HvrvV8QgdOXdyaIBrhvFhBMQ==
|
||||
optionalDependencies:
|
||||
"@img/sharp-libvips-linux-arm" "1.0.5"
|
||||
|
||||
"@img/sharp-linux-s390x@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-linux-s390x/-/sharp-linux-s390x-0.33.5.tgz#f5c077926b48e97e4a04d004dfaf175972059667"
|
||||
integrity sha512-y/5PCd+mP4CA/sPDKl2961b+C9d+vPAveS33s6Z3zfASk2j5upL6fXVPZi7ztePZ5CuH+1kW8JtvxgbuXHRa4Q==
|
||||
optionalDependencies:
|
||||
"@img/sharp-libvips-linux-s390x" "1.0.4"
|
||||
|
||||
"@img/sharp-linux-x64@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-linux-x64/-/sharp-linux-x64-0.33.5.tgz#d806e0afd71ae6775cc87f0da8f2d03a7c2209cb"
|
||||
integrity sha512-opC+Ok5pRNAzuvq1AG0ar+1owsu842/Ab+4qvU879ippJBHvyY5n2mxF1izXqkPYlGuP/M556uh53jRLJmzTWA==
|
||||
optionalDependencies:
|
||||
"@img/sharp-libvips-linux-x64" "1.0.4"
|
||||
|
||||
"@img/sharp-linuxmusl-arm64@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-linuxmusl-arm64/-/sharp-linuxmusl-arm64-0.33.5.tgz#252975b915894fb315af5deea174651e208d3d6b"
|
||||
integrity sha512-XrHMZwGQGvJg2V/oRSUfSAfjfPxO+4DkiRh6p2AFjLQztWUuY/o8Mq0eMQVIY7HJ1CDQUJlxGGZRw1a5bqmd1g==
|
||||
optionalDependencies:
|
||||
"@img/sharp-libvips-linuxmusl-arm64" "1.0.4"
|
||||
|
||||
"@img/sharp-linuxmusl-x64@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-linuxmusl-x64/-/sharp-linuxmusl-x64-0.33.5.tgz#3f4609ac5d8ef8ec7dadee80b560961a60fd4f48"
|
||||
integrity sha512-WT+d/cgqKkkKySYmqoZ8y3pxx7lx9vVejxW/W4DOFMYVSkErR+w7mf2u8m/y4+xHe7yY9DAXQMWQhpnMuFfScw==
|
||||
optionalDependencies:
|
||||
"@img/sharp-libvips-linuxmusl-x64" "1.0.4"
|
||||
|
||||
"@img/sharp-wasm32@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-wasm32/-/sharp-wasm32-0.33.5.tgz#6f44f3283069d935bb5ca5813153572f3e6f61a1"
|
||||
integrity sha512-ykUW4LVGaMcU9lu9thv85CbRMAwfeadCJHRsg2GmeRa/cJxsVY9Rbd57JcMxBkKHag5U/x7TSBpScF4U8ElVzg==
|
||||
dependencies:
|
||||
"@emnapi/runtime" "^1.2.0"
|
||||
|
||||
"@img/sharp-win32-ia32@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-win32-ia32/-/sharp-win32-ia32-0.33.5.tgz#1a0c839a40c5351e9885628c85f2e5dfd02b52a9"
|
||||
integrity sha512-T36PblLaTwuVJ/zw/LaH0PdZkRz5rd3SmMHX8GSmR7vtNSP5Z6bQkExdSK7xGWyxLw4sUknBuugTelgw2faBbQ==
|
||||
|
||||
"@img/sharp-win32-x64@0.33.5":
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/@img/sharp-win32-x64/-/sharp-win32-x64-0.33.5.tgz#56f00962ff0c4e0eb93d34a047d29fa995e3e342"
|
||||
integrity sha512-MpY/o8/8kj+EcnxwvrP4aTJSWw/aZ7JIGR4aBeZkZw5B7/Jn+tY9/VNwtcoGmdT7GfggGIU4kygOMSbYnOrAbg==
|
||||
|
||||
"@langchain/community@^0.2.23":
|
||||
version "0.2.23"
|
||||
resolved "https://registry.yarnpkg.com/@langchain/community/-/community-0.2.23.tgz#20560e107bcc8432c42e499f1b9292d41a3732f2"
|
||||
|
@ -1038,7 +1158,7 @@ destroy@1.2.0:
|
|||
resolved "https://registry.yarnpkg.com/destroy/-/destroy-1.2.0.tgz#4803735509ad8be552934c67df614f94e66fa015"
|
||||
integrity sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==
|
||||
|
||||
detect-libc@^2.0.0, detect-libc@^2.0.2:
|
||||
detect-libc@^2.0.0, detect-libc@^2.0.2, detect-libc@^2.0.3:
|
||||
version "2.0.3"
|
||||
resolved "https://registry.yarnpkg.com/detect-libc/-/detect-libc-2.0.3.tgz#f0cd503b40f9939b894697d19ad50895e30cf700"
|
||||
integrity sha512-bwy0MGW55bG41VqxxypOsdSdGqLwXPI/focwgTYCFMbdUiBAxLg9CFzG08sz2aqzknwiX7Hkl0bQENjg8iLByw==
|
||||
|
@ -3035,6 +3155,35 @@ sharp@^0.32.0:
|
|||
tar-fs "^3.0.4"
|
||||
tunnel-agent "^0.6.0"
|
||||
|
||||
sharp@^0.33.5:
|
||||
version "0.33.5"
|
||||
resolved "https://registry.yarnpkg.com/sharp/-/sharp-0.33.5.tgz#13e0e4130cc309d6a9497596715240b2ec0c594e"
|
||||
integrity sha512-haPVm1EkS9pgvHrQ/F3Xy+hgcuMV0Wm9vfIBSiwZ05k+xgb0PkBQpGsAA/oWdDobNaZTH5ppvHtzCFbnSEwHVw==
|
||||
dependencies:
|
||||
color "^4.2.3"
|
||||
detect-libc "^2.0.3"
|
||||
semver "^7.6.3"
|
||||
optionalDependencies:
|
||||
"@img/sharp-darwin-arm64" "0.33.5"
|
||||
"@img/sharp-darwin-x64" "0.33.5"
|
||||
"@img/sharp-libvips-darwin-arm64" "1.0.4"
|
||||
"@img/sharp-libvips-darwin-x64" "1.0.4"
|
||||
"@img/sharp-libvips-linux-arm" "1.0.5"
|
||||
"@img/sharp-libvips-linux-arm64" "1.0.4"
|
||||
"@img/sharp-libvips-linux-s390x" "1.0.4"
|
||||
"@img/sharp-libvips-linux-x64" "1.0.4"
|
||||
"@img/sharp-libvips-linuxmusl-arm64" "1.0.4"
|
||||
"@img/sharp-libvips-linuxmusl-x64" "1.0.4"
|
||||
"@img/sharp-linux-arm" "0.33.5"
|
||||
"@img/sharp-linux-arm64" "0.33.5"
|
||||
"@img/sharp-linux-s390x" "0.33.5"
|
||||
"@img/sharp-linux-x64" "0.33.5"
|
||||
"@img/sharp-linuxmusl-arm64" "0.33.5"
|
||||
"@img/sharp-linuxmusl-x64" "0.33.5"
|
||||
"@img/sharp-wasm32" "0.33.5"
|
||||
"@img/sharp-win32-ia32" "0.33.5"
|
||||
"@img/sharp-win32-x64" "0.33.5"
|
||||
|
||||
side-channel@^1.0.4:
|
||||
version "1.0.6"
|
||||
resolved "https://registry.yarnpkg.com/side-channel/-/side-channel-1.0.6.tgz#abd25fb7cd24baf45466406b1096b7831c9215f2"
|
||||
|
@ -3343,6 +3492,11 @@ tslib@>=2, tslib@^2.0.1, tslib@^2.5.0, tslib@^2.6.2:
|
|||
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.6.2.tgz#703ac29425e7b37cd6fd456e92404d46d1f3e4ae"
|
||||
integrity sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==
|
||||
|
||||
tslib@^2.4.0:
|
||||
version "2.8.1"
|
||||
resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.8.1.tgz#612efe4ed235d567e8aba5f2a5fab70280ade83f"
|
||||
integrity sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==
|
||||
|
||||
tunnel-agent@^0.6.0:
|
||||
version "0.6.0"
|
||||
resolved "https://registry.yarnpkg.com/tunnel-agent/-/tunnel-agent-0.6.0.tgz#27a5dea06b36b04a0a9966774b290868f0fc40fd"
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue