mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-05-02 09:03:12 +00:00
Support external transcription providers (#909)
* Support External Transcription providers * patch files * update docs * fix return data
This commit is contained in:
parent
1352b18b5f
commit
0ada882991
19 changed files with 541 additions and 110 deletions
collector
docker
frontend/src
App.jsx
components
SettingsSidebar
TranscriptionSelection
pages/GeneralSettings/TranscriptionPreference
utils
server
|
@ -25,7 +25,7 @@ app.use(
|
|||
);
|
||||
|
||||
app.post("/process", async function (request, response) {
|
||||
const { filename } = reqBody(request);
|
||||
const { filename, options = {} } = reqBody(request);
|
||||
try {
|
||||
const targetFilename = path
|
||||
.normalize(filename)
|
||||
|
@ -34,7 +34,7 @@ app.post("/process", async function (request, response) {
|
|||
success,
|
||||
reason,
|
||||
documents = [],
|
||||
} = await processSingleFile(targetFilename);
|
||||
} = await processSingleFile(targetFilename, options);
|
||||
response
|
||||
.status(200)
|
||||
.json({ filename: targetFilename, success, reason, documents });
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
"moment": "^2.29.4",
|
||||
"multer": "^1.4.5-lts.1",
|
||||
"officeparser": "^4.0.5",
|
||||
"openai": "^3.2.1",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"puppeteer": "~21.5.2",
|
||||
"slugify": "^1.6.6",
|
||||
|
@ -46,4 +47,4 @@
|
|||
"nodemon": "^2.0.22",
|
||||
"prettier": "^2.4.1"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,5 +1,3 @@
|
|||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { v4 } = require("uuid");
|
||||
const {
|
||||
createdDate,
|
||||
|
@ -9,39 +7,35 @@ const {
|
|||
const { tokenizeString } = require("../../utils/tokenizer");
|
||||
const { default: slugify } = require("slugify");
|
||||
const { LocalWhisper } = require("../../utils/WhisperProviders/localWhisper");
|
||||
const { OpenAiWhisper } = require("../../utils/WhisperProviders/OpenAiWhisper");
|
||||
|
||||
async function asAudio({ fullFilePath = "", filename = "" }) {
|
||||
const whisper = new LocalWhisper();
|
||||
const WHISPER_PROVIDERS = {
|
||||
openai: OpenAiWhisper,
|
||||
local: LocalWhisper,
|
||||
};
|
||||
|
||||
async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
|
||||
const WhisperProvider = WHISPER_PROVIDERS.hasOwnProperty(
|
||||
options?.whisperProvider
|
||||
)
|
||||
? WHISPER_PROVIDERS[options?.whisperProvider]
|
||||
: WHISPER_PROVIDERS.local;
|
||||
|
||||
console.log(`-- Working ${filename} --`);
|
||||
const transcriberPromise = new Promise((resolve) =>
|
||||
whisper.client().then((client) => resolve(client))
|
||||
);
|
||||
const audioDataPromise = new Promise((resolve) =>
|
||||
convertToWavAudioData(fullFilePath).then((audioData) => resolve(audioData))
|
||||
);
|
||||
const [audioData, transcriber] = await Promise.all([
|
||||
audioDataPromise,
|
||||
transcriberPromise,
|
||||
]);
|
||||
const whisper = new WhisperProvider({ options });
|
||||
const { content, error } = await whisper.processFile(fullFilePath, filename);
|
||||
|
||||
if (!audioData) {
|
||||
console.error(`Failed to parse content from ${filename}.`);
|
||||
if (!!error) {
|
||||
console.error(`Error encountered for parsing of ${filename}.`);
|
||||
trashFile(fullFilePath);
|
||||
return {
|
||||
success: false,
|
||||
reason: `Failed to parse content from ${filename}.`,
|
||||
reason: error,
|
||||
documents: [],
|
||||
};
|
||||
}
|
||||
|
||||
console.log(`[Model Working]: Transcribing audio data to text`);
|
||||
const { text: content } = await transcriber(audioData, {
|
||||
chunk_length_s: 30,
|
||||
stride_length_s: 5,
|
||||
});
|
||||
|
||||
if (!content.length) {
|
||||
if (!content?.length) {
|
||||
console.error(`Resulting text content was empty for ${filename}.`);
|
||||
trashFile(fullFilePath);
|
||||
return {
|
||||
|
@ -76,79 +70,4 @@ async function asAudio({ fullFilePath = "", filename = "" }) {
|
|||
return { success: true, reason: null, documents: [document] };
|
||||
}
|
||||
|
||||
async function convertToWavAudioData(sourcePath) {
|
||||
try {
|
||||
let buffer;
|
||||
const wavefile = require("wavefile");
|
||||
const ffmpeg = require("fluent-ffmpeg");
|
||||
const outFolder = path.resolve(__dirname, `../../storage/tmp`);
|
||||
if (!fs.existsSync(outFolder)) fs.mkdirSync(outFolder, { recursive: true });
|
||||
|
||||
const fileExtension = path.extname(sourcePath).toLowerCase();
|
||||
if (fileExtension !== ".wav") {
|
||||
console.log(
|
||||
`[Conversion Required] ${fileExtension} file detected - converting to .wav`
|
||||
);
|
||||
const outputFile = path.resolve(outFolder, `${v4()}.wav`);
|
||||
const convert = new Promise((resolve) => {
|
||||
ffmpeg(sourcePath)
|
||||
.toFormat("wav")
|
||||
.on("error", (error) => {
|
||||
console.error(`[Conversion Error] ${error.message}`);
|
||||
resolve(false);
|
||||
})
|
||||
.on("progress", (progress) =>
|
||||
console.log(
|
||||
`[Conversion Processing]: ${progress.targetSize}KB converted`
|
||||
)
|
||||
)
|
||||
.on("end", () => {
|
||||
console.log("[Conversion Complete]: File converted to .wav!");
|
||||
resolve(true);
|
||||
})
|
||||
.save(outputFile);
|
||||
});
|
||||
const success = await convert;
|
||||
if (!success)
|
||||
throw new Error(
|
||||
"[Conversion Failed]: Could not convert file to .wav format!"
|
||||
);
|
||||
|
||||
const chunks = [];
|
||||
const stream = fs.createReadStream(outputFile);
|
||||
for await (let chunk of stream) chunks.push(chunk);
|
||||
buffer = Buffer.concat(chunks);
|
||||
fs.rmSync(outputFile);
|
||||
} else {
|
||||
const chunks = [];
|
||||
const stream = fs.createReadStream(sourcePath);
|
||||
for await (let chunk of stream) chunks.push(chunk);
|
||||
buffer = Buffer.concat(chunks);
|
||||
}
|
||||
|
||||
const wavFile = new wavefile.WaveFile(buffer);
|
||||
wavFile.toBitDepth("32f");
|
||||
wavFile.toSampleRate(16000);
|
||||
|
||||
let audioData = wavFile.getSamples();
|
||||
if (Array.isArray(audioData)) {
|
||||
if (audioData.length > 1) {
|
||||
const SCALING_FACTOR = Math.sqrt(2);
|
||||
|
||||
// Merge channels into first channel to save memory
|
||||
for (let i = 0; i < audioData[0].length; ++i) {
|
||||
audioData[0][i] =
|
||||
(SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
|
||||
}
|
||||
}
|
||||
audioData = audioData[0];
|
||||
}
|
||||
|
||||
return audioData;
|
||||
} catch (error) {
|
||||
console.error(`convertToWavAudioData`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = asAudio;
|
||||
|
|
|
@ -7,7 +7,7 @@ const {
|
|||
const { trashFile, isTextType } = require("../utils/files");
|
||||
const RESERVED_FILES = ["__HOTDIR__.md"];
|
||||
|
||||
async function processSingleFile(targetFilename) {
|
||||
async function processSingleFile(targetFilename, options = {}) {
|
||||
const fullFilePath = path.resolve(WATCH_DIRECTORY, targetFilename);
|
||||
if (RESERVED_FILES.includes(targetFilename))
|
||||
return {
|
||||
|
@ -54,6 +54,7 @@ async function processSingleFile(targetFilename) {
|
|||
return await FileTypeProcessor({
|
||||
fullFilePath,
|
||||
filename: targetFilename,
|
||||
options,
|
||||
});
|
||||
}
|
||||
|
||||
|
|
44
collector/utils/WhisperProviders/OpenAiWhisper.js
Normal file
44
collector/utils/WhisperProviders/OpenAiWhisper.js
Normal file
|
@ -0,0 +1,44 @@
|
|||
const fs = require("fs");
|
||||
|
||||
class OpenAiWhisper {
|
||||
constructor({ options }) {
|
||||
const { Configuration, OpenAIApi } = require("openai");
|
||||
if (!options.openAiKey) throw new Error("No OpenAI API key was set.");
|
||||
|
||||
const config = new Configuration({
|
||||
apiKey: options.openAiKey,
|
||||
});
|
||||
this.openai = new OpenAIApi(config);
|
||||
this.model = "whisper-1";
|
||||
this.temperature = 0;
|
||||
this.#log("Initialized.");
|
||||
}
|
||||
|
||||
#log(text, ...args) {
|
||||
console.log(`\x1b[32m[OpenAiWhisper]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
async processFile(fullFilePath) {
|
||||
return await this.openai
|
||||
.createTranscription(
|
||||
fs.createReadStream(fullFilePath),
|
||||
this.model,
|
||||
undefined,
|
||||
"text",
|
||||
this.temperature
|
||||
)
|
||||
.then((res) => {
|
||||
if (res.hasOwnProperty("data"))
|
||||
return { content: res.data, error: null };
|
||||
return { content: "", error: "No content was able to be transcribed." };
|
||||
})
|
||||
.catch((e) => {
|
||||
this.#log(`Could not get any response from openai whisper`, e.message);
|
||||
return { content: "", error: e.message };
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
OpenAiWhisper,
|
||||
};
|
|
@ -1,5 +1,6 @@
|
|||
const path = require("path");
|
||||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { v4 } = require("uuid");
|
||||
|
||||
class LocalWhisper {
|
||||
constructor() {
|
||||
|
@ -16,12 +17,94 @@ class LocalWhisper {
|
|||
// Make directory when it does not exist in existing installations
|
||||
if (!fs.existsSync(this.cacheDir))
|
||||
fs.mkdirSync(this.cacheDir, { recursive: true });
|
||||
|
||||
this.#log("Initialized.");
|
||||
}
|
||||
|
||||
#log(text, ...args) {
|
||||
console.log(`\x1b[32m[LocalWhisper]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
async #convertToWavAudioData(sourcePath) {
|
||||
try {
|
||||
let buffer;
|
||||
const wavefile = require("wavefile");
|
||||
const ffmpeg = require("fluent-ffmpeg");
|
||||
const outFolder = path.resolve(__dirname, `../../storage/tmp`);
|
||||
if (!fs.existsSync(outFolder))
|
||||
fs.mkdirSync(outFolder, { recursive: true });
|
||||
|
||||
const fileExtension = path.extname(sourcePath).toLowerCase();
|
||||
if (fileExtension !== ".wav") {
|
||||
this.#log(
|
||||
`File conversion required! ${fileExtension} file detected - converting to .wav`
|
||||
);
|
||||
const outputFile = path.resolve(outFolder, `${v4()}.wav`);
|
||||
const convert = new Promise((resolve) => {
|
||||
ffmpeg(sourcePath)
|
||||
.toFormat("wav")
|
||||
.on("error", (error) => {
|
||||
this.#log(`Conversion Error! ${error.message}`);
|
||||
resolve(false);
|
||||
})
|
||||
.on("progress", (progress) =>
|
||||
this.#log(
|
||||
`Conversion Processing! ${progress.targetSize}KB converted`
|
||||
)
|
||||
)
|
||||
.on("end", () => {
|
||||
this.#log(`Conversion Complete! File converted to .wav!`);
|
||||
resolve(true);
|
||||
})
|
||||
.save(outputFile);
|
||||
});
|
||||
const success = await convert;
|
||||
if (!success)
|
||||
throw new Error(
|
||||
"[Conversion Failed]: Could not convert file to .wav format!"
|
||||
);
|
||||
|
||||
const chunks = [];
|
||||
const stream = fs.createReadStream(outputFile);
|
||||
for await (let chunk of stream) chunks.push(chunk);
|
||||
buffer = Buffer.concat(chunks);
|
||||
fs.rmSync(outputFile);
|
||||
} else {
|
||||
const chunks = [];
|
||||
const stream = fs.createReadStream(sourcePath);
|
||||
for await (let chunk of stream) chunks.push(chunk);
|
||||
buffer = Buffer.concat(chunks);
|
||||
}
|
||||
|
||||
const wavFile = new wavefile.WaveFile(buffer);
|
||||
wavFile.toBitDepth("32f");
|
||||
wavFile.toSampleRate(16000);
|
||||
|
||||
let audioData = wavFile.getSamples();
|
||||
if (Array.isArray(audioData)) {
|
||||
if (audioData.length > 1) {
|
||||
const SCALING_FACTOR = Math.sqrt(2);
|
||||
|
||||
// Merge channels into first channel to save memory
|
||||
for (let i = 0; i < audioData[0].length; ++i) {
|
||||
audioData[0][i] =
|
||||
(SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
|
||||
}
|
||||
}
|
||||
audioData = audioData[0];
|
||||
}
|
||||
|
||||
return audioData;
|
||||
} catch (error) {
|
||||
console.error(`convertToWavAudioData`, error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async client() {
|
||||
if (!fs.existsSync(this.modelPath)) {
|
||||
console.log(
|
||||
"\x1b[34m[INFO]\x1b[0m The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)\n\n"
|
||||
this.#log(
|
||||
`The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)`
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -48,10 +131,45 @@ class LocalWhisper {
|
|||
: {}),
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Failed to load the native whisper model:", error);
|
||||
this.#log("Failed to load the native whisper model:", error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
async processFile(fullFilePath, filename) {
|
||||
try {
|
||||
const transcriberPromise = new Promise((resolve) =>
|
||||
this.client().then((client) => resolve(client))
|
||||
);
|
||||
const audioDataPromise = new Promise((resolve) =>
|
||||
this.#convertToWavAudioData(fullFilePath).then((audioData) =>
|
||||
resolve(audioData)
|
||||
)
|
||||
);
|
||||
const [audioData, transcriber] = await Promise.all([
|
||||
audioDataPromise,
|
||||
transcriberPromise,
|
||||
]);
|
||||
|
||||
if (!audioData) {
|
||||
this.#log(`Failed to parse content from ${filename}.`);
|
||||
return {
|
||||
content: null,
|
||||
error: `Failed to parse content from ${filename}.`,
|
||||
};
|
||||
}
|
||||
|
||||
this.#log(`Transcribing audio data to text...`);
|
||||
const { text } = await transcriber(audioData, {
|
||||
chunk_length_s: 30,
|
||||
stride_length_s: 5,
|
||||
});
|
||||
|
||||
return { content: text, error: null };
|
||||
} catch (error) {
|
||||
return { content: null, error: error.message };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
|
|
|
@ -372,6 +372,13 @@ asynckit@^0.4.0:
|
|||
resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
|
||||
integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
|
||||
|
||||
axios@^0.26.0:
|
||||
version "0.26.1"
|
||||
resolved "https://registry.yarnpkg.com/axios/-/axios-0.26.1.tgz#1ede41c51fcf51bbbd6fd43669caaa4f0495aaa9"
|
||||
integrity sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==
|
||||
dependencies:
|
||||
follow-redirects "^1.14.8"
|
||||
|
||||
b4a@^1.6.4:
|
||||
version "1.6.4"
|
||||
resolved "https://registry.yarnpkg.com/b4a/-/b4a-1.6.4.tgz#ef1c1422cae5ce6535ec191baeed7567443f36c9"
|
||||
|
@ -1203,6 +1210,11 @@ fluent-ffmpeg@^2.1.2:
|
|||
async ">=0.2.9"
|
||||
which "^1.1.1"
|
||||
|
||||
follow-redirects@^1.14.8:
|
||||
version "1.15.6"
|
||||
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.6.tgz#7f815c0cda4249c74ff09e95ef97c23b5fd0399b"
|
||||
integrity sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==
|
||||
|
||||
form-data-encoder@1.7.2:
|
||||
version "1.7.2"
|
||||
resolved "https://registry.yarnpkg.com/form-data-encoder/-/form-data-encoder-1.7.2.tgz#1f1ae3dccf58ed4690b86d87e4f57c654fbab040"
|
||||
|
@ -2304,6 +2316,14 @@ onnxruntime-web@1.14.0:
|
|||
onnxruntime-common "~1.14.0"
|
||||
platform "^1.3.6"
|
||||
|
||||
openai@^3.2.1:
|
||||
version "3.3.0"
|
||||
resolved "https://registry.yarnpkg.com/openai/-/openai-3.3.0.tgz#a6408016ad0945738e1febf43f2fccca83a3f532"
|
||||
integrity sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==
|
||||
dependencies:
|
||||
axios "^0.26.0"
|
||||
form-data "^4.0.0"
|
||||
|
||||
openai@^4.19.0:
|
||||
version "4.20.1"
|
||||
resolved "https://registry.yarnpkg.com/openai/-/openai-4.20.1.tgz#afa0d496d125b5a0f6cebcb4b9aeabf71e00214e"
|
||||
|
|
|
@ -131,6 +131,16 @@ GID='1000'
|
|||
# ASTRA_DB_APPLICATION_TOKEN=
|
||||
# ASTRA_DB_ENDPOINT=
|
||||
|
||||
###########################################
|
||||
######## Audio Model Selection ############
|
||||
###########################################
|
||||
# (default) use built-in whisper-small model.
|
||||
# WHISPER_PROVIDER="local"
|
||||
|
||||
# use openai hosted whisper model.
|
||||
# WHISPER_PROVIDER="openai"
|
||||
# OPEN_AI_KEY=sk-xxxxxxxx
|
||||
|
||||
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||
# DISABLE_TELEMETRY="false"
|
||||
|
|
|
@ -29,6 +29,9 @@ const GeneralApiKeys = lazy(() => import("@/pages/GeneralSettings/ApiKeys"));
|
|||
const GeneralLLMPreference = lazy(
|
||||
() => import("@/pages/GeneralSettings/LLMPreference")
|
||||
);
|
||||
const GeneralTranscriptionPreference = lazy(
|
||||
() => import("@/pages/GeneralSettings/TranscriptionPreference")
|
||||
);
|
||||
const GeneralEmbeddingPreference = lazy(
|
||||
() => import("@/pages/GeneralSettings/EmbeddingPreference")
|
||||
);
|
||||
|
@ -76,6 +79,12 @@ export default function App() {
|
|||
path="/settings/llm-preference"
|
||||
element={<AdminRoute Component={GeneralLLMPreference} />}
|
||||
/>
|
||||
<Route
|
||||
path="/settings/transcription-preference"
|
||||
element={
|
||||
<AdminRoute Component={GeneralTranscriptionPreference} />
|
||||
}
|
||||
/>
|
||||
<Route
|
||||
path="/settings/embedding-preference"
|
||||
element={<AdminRoute Component={GeneralEmbeddingPreference} />}
|
||||
|
|
|
@ -19,6 +19,7 @@ import {
|
|||
Notepad,
|
||||
CodeBlock,
|
||||
Barcode,
|
||||
ClosedCaptioning,
|
||||
} from "@phosphor-icons/react";
|
||||
import useUser from "@/hooks/useUser";
|
||||
import { USER_BACKGROUND_COLOR } from "@/utils/constants";
|
||||
|
@ -278,9 +279,17 @@ const SidebarOptions = ({ user = null }) => (
|
|||
flex={true}
|
||||
allowedRole={["admin"]}
|
||||
/>
|
||||
<Option
|
||||
href={paths.settings.transcriptionPreference()}
|
||||
btnText="Transcription Model"
|
||||
icon={<ClosedCaptioning className="h-5 w-5 flex-shrink-0" />}
|
||||
user={user}
|
||||
flex={true}
|
||||
allowedRole={["admin"]}
|
||||
/>
|
||||
<Option
|
||||
href={paths.settings.embeddingPreference()}
|
||||
btnText="Embedding Preference"
|
||||
btnText="Embedding Model"
|
||||
icon={<FileCode className="h-5 w-5 flex-shrink-0" />}
|
||||
user={user}
|
||||
flex={true}
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
import { Gauge } from "@phosphor-icons/react";
|
||||
export default function NativeTranscriptionOptions() {
|
||||
return (
|
||||
<div className="w-full flex flex-col gap-y-4">
|
||||
<div className="flex flex-col md:flex-row md:items-center gap-x-2 text-white mb-4 bg-blue-800/30 w-fit rounded-lg px-4 py-2">
|
||||
<div className="gap-x-2 flex items-center">
|
||||
<Gauge size={25} />
|
||||
<p className="text-sm">
|
||||
Using the local whisper model on machines with limited RAM or CPU
|
||||
can stall AnythingLLM when processing media files.
|
||||
<br />
|
||||
We recommend at least 2GB of RAM and upload files <10Mb.
|
||||
<br />
|
||||
<br />
|
||||
<i>
|
||||
The built-in model will automatically download on the first use.
|
||||
</i>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div className="w-full flex items-center gap-4">
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Model Selection
|
||||
</label>
|
||||
<select
|
||||
disabled={true}
|
||||
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
<option disabled={true} selected={true}>
|
||||
Xenova/whisper-small
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
import { useState } from "react";
|
||||
|
||||
export default function OpenAiWhisperOptions({ settings }) {
|
||||
const [inputValue, setInputValue] = useState(settings?.OpenAiKey);
|
||||
const [_openAIKey, setOpenAIKey] = useState(settings?.OpenAiKey);
|
||||
|
||||
return (
|
||||
<div className="flex gap-x-4">
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
API Key
|
||||
</label>
|
||||
<input
|
||||
type="password"
|
||||
name="OpenAiKey"
|
||||
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||
placeholder="OpenAI API Key"
|
||||
defaultValue={settings?.OpenAiKey ? "*".repeat(20) : ""}
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
onChange={(e) => setInputValue(e.target.value)}
|
||||
onBlur={() => setOpenAIKey(inputValue)}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-col w-60">
|
||||
<label className="text-white text-sm font-semibold block mb-4">
|
||||
Whisper Model
|
||||
</label>
|
||||
<select
|
||||
disabled={true}
|
||||
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
<option disabled={true} selected={true}>
|
||||
Whisper Large
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
|
@ -0,0 +1,180 @@
|
|||
import React, { useEffect, useState } from "react";
|
||||
import { isMobile } from "react-device-detect";
|
||||
import Sidebar from "@/components/SettingsSidebar";
|
||||
import System from "@/models/system";
|
||||
import showToast from "@/utils/toast";
|
||||
import PreLoader from "@/components/Preloader";
|
||||
|
||||
import OpenAiLogo from "@/media/llmprovider/openai.png";
|
||||
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
|
||||
import OpenAiWhisperOptions from "@/components/TranscriptionSelection/OpenAiOptions";
|
||||
import NativeTranscriptionOptions from "@/components/TranscriptionSelection/NativeTranscriptionOptions";
|
||||
import LLMItem from "@/components/LLMSelection/LLMItem";
|
||||
import { MagnifyingGlass } from "@phosphor-icons/react";
|
||||
|
||||
export default function TranscriptionModelPreference() {
|
||||
const [saving, setSaving] = useState(false);
|
||||
const [hasChanges, setHasChanges] = useState(false);
|
||||
const [settings, setSettings] = useState(null);
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [searchQuery, setSearchQuery] = useState("");
|
||||
const [filteredProviders, setFilteredProviders] = useState([]);
|
||||
const [selectedProvider, setSelectedProvider] = useState(null);
|
||||
|
||||
const handleSubmit = async (e) => {
|
||||
e.preventDefault();
|
||||
const form = e.target;
|
||||
const data = { WhisperProvider: selectedProvider };
|
||||
const formData = new FormData(form);
|
||||
|
||||
for (var [key, value] of formData.entries()) data[key] = value;
|
||||
const { error } = await System.updateSystem(data);
|
||||
setSaving(true);
|
||||
|
||||
if (error) {
|
||||
showToast(`Failed to save preferences: ${error}`, "error");
|
||||
} else {
|
||||
showToast("Transcription preferences saved successfully.", "success");
|
||||
}
|
||||
setSaving(false);
|
||||
setHasChanges(!!error);
|
||||
};
|
||||
|
||||
const updateProviderChoice = (selection) => {
|
||||
setSelectedProvider(selection);
|
||||
setHasChanges(true);
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
async function fetchKeys() {
|
||||
const _settings = await System.keys();
|
||||
setSettings(_settings);
|
||||
setSelectedProvider(_settings?.WhisperProvider || "local");
|
||||
setLoading(false);
|
||||
}
|
||||
fetchKeys();
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
const filtered = PROVIDERS.filter((provider) =>
|
||||
provider.name.toLowerCase().includes(searchQuery.toLowerCase())
|
||||
);
|
||||
setFilteredProviders(filtered);
|
||||
}, [searchQuery, selectedProvider]);
|
||||
|
||||
const PROVIDERS = [
|
||||
{
|
||||
name: "OpenAI",
|
||||
value: "openai",
|
||||
logo: OpenAiLogo,
|
||||
options: <OpenAiWhisperOptions settings={settings} />,
|
||||
description:
|
||||
"Leverage the OpenAI Whisper-large model using your API key.",
|
||||
},
|
||||
{
|
||||
name: "AnythingLLM Built-In",
|
||||
value: "local",
|
||||
logo: AnythingLLMIcon,
|
||||
options: <NativeTranscriptionOptions settings={settings} />,
|
||||
description: "Run a built-in whisper model on this instance privately.",
|
||||
},
|
||||
];
|
||||
|
||||
return (
|
||||
<div className="w-screen h-screen overflow-hidden bg-sidebar flex">
|
||||
<Sidebar />
|
||||
{loading ? (
|
||||
<div
|
||||
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
|
||||
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
|
||||
>
|
||||
<div className="w-full h-full flex justify-center items-center">
|
||||
<PreLoader />
|
||||
</div>
|
||||
</div>
|
||||
) : (
|
||||
<div
|
||||
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
|
||||
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
|
||||
>
|
||||
<form onSubmit={handleSubmit} className="flex w-full">
|
||||
<div className="flex flex-col w-full px-1 md:pl-6 md:pr-[86px] md:py-6 py-16">
|
||||
<div className="w-full flex flex-col gap-y-1 pb-6 border-white border-b-2 border-opacity-10">
|
||||
<div className="flex gap-x-4 items-center">
|
||||
<p className="text-lg leading-6 font-bold text-white">
|
||||
Transcription Model Preference
|
||||
</p>
|
||||
{hasChanges && (
|
||||
<button
|
||||
type="submit"
|
||||
disabled={saving}
|
||||
className="flex items-center gap-x-2 px-4 py-2 rounded-lg bg-[#2C2F36] text-white text-sm hover:bg-[#3D4147] shadow-md border border-[#3D4147]"
|
||||
>
|
||||
{saving ? "Saving..." : "Save changes"}
|
||||
</button>
|
||||
)}
|
||||
</div>
|
||||
<p className="text-xs leading-[18px] font-base text-white text-opacity-60">
|
||||
These are the credentials and settings for your preferred
|
||||
transcription model provider. Its important these keys are
|
||||
current and correct or else media files and audio will not
|
||||
transcribe.
|
||||
</p>
|
||||
</div>
|
||||
<div className="text-sm font-medium text-white mt-6 mb-4">
|
||||
Transcription Providers
|
||||
</div>
|
||||
<div className="w-full">
|
||||
<div className="w-full relative border-slate-300/20 shadow border-4 rounded-xl text-white">
|
||||
<div className="w-full p-4 absolute top-0 rounded-t-lg backdrop-blur-sm">
|
||||
<div className="w-full flex items-center sticky top-0">
|
||||
<MagnifyingGlass
|
||||
size={16}
|
||||
weight="bold"
|
||||
className="absolute left-4 z-30 text-white"
|
||||
/>
|
||||
<input
|
||||
type="text"
|
||||
placeholder="Search audio transcription providers"
|
||||
className="bg-zinc-600 z-20 pl-10 h-[38px] rounded-full w-full px-4 py-1 text-sm border-2 border-slate-300/40 outline-none focus:border-white text-white"
|
||||
onChange={(e) => setSearchQuery(e.target.value)}
|
||||
autoComplete="off"
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter") e.preventDefault();
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<div className="px-4 pt-[70px] flex flex-col gap-y-1 max-h-[390px] overflow-y-auto no-scroll pb-4">
|
||||
{filteredProviders.map((provider) => {
|
||||
return (
|
||||
<LLMItem
|
||||
key={provider.name}
|
||||
name={provider.name}
|
||||
value={provider.value}
|
||||
image={provider.logo}
|
||||
description={provider.description}
|
||||
checked={selectedProvider === provider.value}
|
||||
onClick={() => updateProviderChoice(provider.value)}
|
||||
/>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
<div
|
||||
onChange={() => setHasChanges(true)}
|
||||
className="mt-4 flex flex-col gap-y-1"
|
||||
>
|
||||
{selectedProvider &&
|
||||
PROVIDERS.find(
|
||||
(provider) => provider.value === selectedProvider
|
||||
)?.options}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
}
|
|
@ -92,6 +92,9 @@ export default {
|
|||
llmPreference: () => {
|
||||
return "/settings/llm-preference";
|
||||
},
|
||||
transcriptionPreference: () => {
|
||||
return "/settings/transcription-preference";
|
||||
},
|
||||
embeddingPreference: () => {
|
||||
return "/settings/embedding-preference";
|
||||
},
|
||||
|
|
|
@ -128,6 +128,16 @@ VECTOR_DB="lancedb"
|
|||
# ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com"
|
||||
# ZILLIZ_API_TOKEN=api-token-here
|
||||
|
||||
###########################################
|
||||
######## Audio Model Selection ############
|
||||
###########################################
|
||||
# (default) use built-in whisper-small model.
|
||||
WHISPER_PROVIDER="local"
|
||||
|
||||
# use openai hosted whisper model.
|
||||
# WHISPER_PROVIDER="openai"
|
||||
# OPEN_AI_KEY=sk-xxxxxxxx
|
||||
|
||||
# CLOUD DEPLOYMENT VARIRABLES ONLY
|
||||
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
|
||||
# STORAGE_DIR= # absolute filesystem path with no trailing slash
|
||||
|
|
|
@ -258,6 +258,7 @@ const SystemSettings = {
|
|||
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
|
||||
}
|
||||
: {}),
|
||||
WhisperProvider: process.env.WHISPER_PROVIDER || "local",
|
||||
};
|
||||
},
|
||||
|
||||
|
|
|
@ -14,6 +14,9 @@ AnythingLLM allows you to upload various audio and video formats as source docum
|
|||
|
||||
Once transcribed you can embed these transcriptions into your workspace like you would any other file!
|
||||
|
||||
**Other external model/transcription providers are also live.**
|
||||
- [OpenAI Whisper via API key.](https://openai.com/research/whisper)
|
||||
|
||||
## Text generation (LLM selection)
|
||||
> [!IMPORTANT]
|
||||
> Use of a locally running LLM model is **experimental** and may behave unexpectedly, crash, or not function at all.
|
||||
|
|
|
@ -5,13 +5,20 @@
|
|||
|
||||
class CollectorApi {
|
||||
constructor() {
|
||||
this.endpoint = "http://0.0.0.0:8888";
|
||||
this.endpoint = `http://0.0.0.0:${process.env.COLLECTOR_PORT || 8888}`;
|
||||
}
|
||||
|
||||
log(text, ...args) {
|
||||
console.log(`\x1b[36m[CollectorApi]\x1b[0m ${text}`, ...args);
|
||||
}
|
||||
|
||||
#attachOptions() {
|
||||
return {
|
||||
whisperProvider: process.env.WHISPER_PROVIDER || "local",
|
||||
openAiKey: process.env.OPEN_AI_KEY || null,
|
||||
};
|
||||
}
|
||||
|
||||
async online() {
|
||||
return await fetch(this.endpoint)
|
||||
.then((res) => res.ok)
|
||||
|
@ -38,7 +45,10 @@ class CollectorApi {
|
|||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
body: JSON.stringify({ filename }),
|
||||
body: JSON.stringify({
|
||||
filename,
|
||||
options: this.#attachOptions(),
|
||||
}),
|
||||
})
|
||||
.then((res) => {
|
||||
if (!res.ok) throw new Error("Response could not be completed");
|
||||
|
|
|
@ -269,6 +269,13 @@ const KEY_MAPPING = {
|
|||
checks: [isNotEmpty],
|
||||
},
|
||||
|
||||
// Whisper (transcription) providers
|
||||
WhisperProvider: {
|
||||
envKey: "WHISPER_PROVIDER",
|
||||
checks: [isNotEmpty, supportedTranscriptionProvider],
|
||||
postUpdate: [],
|
||||
},
|
||||
|
||||
// System Settings
|
||||
AuthToken: {
|
||||
envKey: "AUTH_TOKEN",
|
||||
|
@ -351,6 +358,13 @@ function supportedLLM(input = "") {
|
|||
return validSelection ? null : `${input} is not a valid LLM provider.`;
|
||||
}
|
||||
|
||||
function supportedTranscriptionProvider(input = "") {
|
||||
const validSelection = ["openai", "local"].includes(input);
|
||||
return validSelection
|
||||
? null
|
||||
: `${input} is not a valid transcription model provider.`;
|
||||
}
|
||||
|
||||
function validGeminiModel(input = "") {
|
||||
const validModels = ["gemini-pro"];
|
||||
return validModels.includes(input)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue