Support external transcription providers ()

* Support External Transcription providers

* patch files

* update docs

* fix return data
This commit is contained in:
Timothy Carambat 2024-03-14 15:43:26 -07:00 committed by GitHub
parent 1352b18b5f
commit 0ada882991
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
19 changed files with 541 additions and 110 deletions
collector
docker
frontend/src
App.jsx
components
SettingsSidebar
TranscriptionSelection
NativeTranscriptionOptions
OpenAiOptions
pages/GeneralSettings/TranscriptionPreference
utils
server
.env.example
models
storage/models
utils
collectorApi
helpers

View file

@ -25,7 +25,7 @@ app.use(
);
app.post("/process", async function (request, response) {
const { filename } = reqBody(request);
const { filename, options = {} } = reqBody(request);
try {
const targetFilename = path
.normalize(filename)
@ -34,7 +34,7 @@ app.post("/process", async function (request, response) {
success,
reason,
documents = [],
} = await processSingleFile(targetFilename);
} = await processSingleFile(targetFilename, options);
response
.status(200)
.json({ filename: targetFilename, success, reason, documents });

View file

@ -33,6 +33,7 @@
"moment": "^2.29.4",
"multer": "^1.4.5-lts.1",
"officeparser": "^4.0.5",
"openai": "^3.2.1",
"pdf-parse": "^1.1.1",
"puppeteer": "~21.5.2",
"slugify": "^1.6.6",
@ -46,4 +47,4 @@
"nodemon": "^2.0.22",
"prettier": "^2.4.1"
}
}
}

View file

@ -1,5 +1,3 @@
const fs = require("fs");
const path = require("path");
const { v4 } = require("uuid");
const {
createdDate,
@ -9,39 +7,35 @@ const {
const { tokenizeString } = require("../../utils/tokenizer");
const { default: slugify } = require("slugify");
const { LocalWhisper } = require("../../utils/WhisperProviders/localWhisper");
const { OpenAiWhisper } = require("../../utils/WhisperProviders/OpenAiWhisper");
async function asAudio({ fullFilePath = "", filename = "" }) {
const whisper = new LocalWhisper();
const WHISPER_PROVIDERS = {
openai: OpenAiWhisper,
local: LocalWhisper,
};
async function asAudio({ fullFilePath = "", filename = "", options = {} }) {
const WhisperProvider = WHISPER_PROVIDERS.hasOwnProperty(
options?.whisperProvider
)
? WHISPER_PROVIDERS[options?.whisperProvider]
: WHISPER_PROVIDERS.local;
console.log(`-- Working ${filename} --`);
const transcriberPromise = new Promise((resolve) =>
whisper.client().then((client) => resolve(client))
);
const audioDataPromise = new Promise((resolve) =>
convertToWavAudioData(fullFilePath).then((audioData) => resolve(audioData))
);
const [audioData, transcriber] = await Promise.all([
audioDataPromise,
transcriberPromise,
]);
const whisper = new WhisperProvider({ options });
const { content, error } = await whisper.processFile(fullFilePath, filename);
if (!audioData) {
console.error(`Failed to parse content from ${filename}.`);
if (!!error) {
console.error(`Error encountered for parsing of ${filename}.`);
trashFile(fullFilePath);
return {
success: false,
reason: `Failed to parse content from ${filename}.`,
reason: error,
documents: [],
};
}
console.log(`[Model Working]: Transcribing audio data to text`);
const { text: content } = await transcriber(audioData, {
chunk_length_s: 30,
stride_length_s: 5,
});
if (!content.length) {
if (!content?.length) {
console.error(`Resulting text content was empty for ${filename}.`);
trashFile(fullFilePath);
return {
@ -76,79 +70,4 @@ async function asAudio({ fullFilePath = "", filename = "" }) {
return { success: true, reason: null, documents: [document] };
}
async function convertToWavAudioData(sourcePath) {
try {
let buffer;
const wavefile = require("wavefile");
const ffmpeg = require("fluent-ffmpeg");
const outFolder = path.resolve(__dirname, `../../storage/tmp`);
if (!fs.existsSync(outFolder)) fs.mkdirSync(outFolder, { recursive: true });
const fileExtension = path.extname(sourcePath).toLowerCase();
if (fileExtension !== ".wav") {
console.log(
`[Conversion Required] ${fileExtension} file detected - converting to .wav`
);
const outputFile = path.resolve(outFolder, `${v4()}.wav`);
const convert = new Promise((resolve) => {
ffmpeg(sourcePath)
.toFormat("wav")
.on("error", (error) => {
console.error(`[Conversion Error] ${error.message}`);
resolve(false);
})
.on("progress", (progress) =>
console.log(
`[Conversion Processing]: ${progress.targetSize}KB converted`
)
)
.on("end", () => {
console.log("[Conversion Complete]: File converted to .wav!");
resolve(true);
})
.save(outputFile);
});
const success = await convert;
if (!success)
throw new Error(
"[Conversion Failed]: Could not convert file to .wav format!"
);
const chunks = [];
const stream = fs.createReadStream(outputFile);
for await (let chunk of stream) chunks.push(chunk);
buffer = Buffer.concat(chunks);
fs.rmSync(outputFile);
} else {
const chunks = [];
const stream = fs.createReadStream(sourcePath);
for await (let chunk of stream) chunks.push(chunk);
buffer = Buffer.concat(chunks);
}
const wavFile = new wavefile.WaveFile(buffer);
wavFile.toBitDepth("32f");
wavFile.toSampleRate(16000);
let audioData = wavFile.getSamples();
if (Array.isArray(audioData)) {
if (audioData.length > 1) {
const SCALING_FACTOR = Math.sqrt(2);
// Merge channels into first channel to save memory
for (let i = 0; i < audioData[0].length; ++i) {
audioData[0][i] =
(SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
}
}
audioData = audioData[0];
}
return audioData;
} catch (error) {
console.error(`convertToWavAudioData`, error);
return null;
}
}
module.exports = asAudio;

View file

@ -7,7 +7,7 @@ const {
const { trashFile, isTextType } = require("../utils/files");
const RESERVED_FILES = ["__HOTDIR__.md"];
async function processSingleFile(targetFilename) {
async function processSingleFile(targetFilename, options = {}) {
const fullFilePath = path.resolve(WATCH_DIRECTORY, targetFilename);
if (RESERVED_FILES.includes(targetFilename))
return {
@ -54,6 +54,7 @@ async function processSingleFile(targetFilename) {
return await FileTypeProcessor({
fullFilePath,
filename: targetFilename,
options,
});
}

View file

@ -0,0 +1,44 @@
const fs = require("fs");
class OpenAiWhisper {
constructor({ options }) {
const { Configuration, OpenAIApi } = require("openai");
if (!options.openAiKey) throw new Error("No OpenAI API key was set.");
const config = new Configuration({
apiKey: options.openAiKey,
});
this.openai = new OpenAIApi(config);
this.model = "whisper-1";
this.temperature = 0;
this.#log("Initialized.");
}
#log(text, ...args) {
console.log(`\x1b[32m[OpenAiWhisper]\x1b[0m ${text}`, ...args);
}
async processFile(fullFilePath) {
return await this.openai
.createTranscription(
fs.createReadStream(fullFilePath),
this.model,
undefined,
"text",
this.temperature
)
.then((res) => {
if (res.hasOwnProperty("data"))
return { content: res.data, error: null };
return { content: "", error: "No content was able to be transcribed." };
})
.catch((e) => {
this.#log(`Could not get any response from openai whisper`, e.message);
return { content: "", error: e.message };
});
}
}
module.exports = {
OpenAiWhisper,
};

View file

@ -1,5 +1,6 @@
const path = require("path");
const fs = require("fs");
const path = require("path");
const { v4 } = require("uuid");
class LocalWhisper {
constructor() {
@ -16,12 +17,94 @@ class LocalWhisper {
// Make directory when it does not exist in existing installations
if (!fs.existsSync(this.cacheDir))
fs.mkdirSync(this.cacheDir, { recursive: true });
this.#log("Initialized.");
}
#log(text, ...args) {
console.log(`\x1b[32m[LocalWhisper]\x1b[0m ${text}`, ...args);
}
async #convertToWavAudioData(sourcePath) {
try {
let buffer;
const wavefile = require("wavefile");
const ffmpeg = require("fluent-ffmpeg");
const outFolder = path.resolve(__dirname, `../../storage/tmp`);
if (!fs.existsSync(outFolder))
fs.mkdirSync(outFolder, { recursive: true });
const fileExtension = path.extname(sourcePath).toLowerCase();
if (fileExtension !== ".wav") {
this.#log(
`File conversion required! ${fileExtension} file detected - converting to .wav`
);
const outputFile = path.resolve(outFolder, `${v4()}.wav`);
const convert = new Promise((resolve) => {
ffmpeg(sourcePath)
.toFormat("wav")
.on("error", (error) => {
this.#log(`Conversion Error! ${error.message}`);
resolve(false);
})
.on("progress", (progress) =>
this.#log(
`Conversion Processing! ${progress.targetSize}KB converted`
)
)
.on("end", () => {
this.#log(`Conversion Complete! File converted to .wav!`);
resolve(true);
})
.save(outputFile);
});
const success = await convert;
if (!success)
throw new Error(
"[Conversion Failed]: Could not convert file to .wav format!"
);
const chunks = [];
const stream = fs.createReadStream(outputFile);
for await (let chunk of stream) chunks.push(chunk);
buffer = Buffer.concat(chunks);
fs.rmSync(outputFile);
} else {
const chunks = [];
const stream = fs.createReadStream(sourcePath);
for await (let chunk of stream) chunks.push(chunk);
buffer = Buffer.concat(chunks);
}
const wavFile = new wavefile.WaveFile(buffer);
wavFile.toBitDepth("32f");
wavFile.toSampleRate(16000);
let audioData = wavFile.getSamples();
if (Array.isArray(audioData)) {
if (audioData.length > 1) {
const SCALING_FACTOR = Math.sqrt(2);
// Merge channels into first channel to save memory
for (let i = 0; i < audioData[0].length; ++i) {
audioData[0][i] =
(SCALING_FACTOR * (audioData[0][i] + audioData[1][i])) / 2;
}
}
audioData = audioData[0];
}
return audioData;
} catch (error) {
console.error(`convertToWavAudioData`, error);
return null;
}
}
async client() {
if (!fs.existsSync(this.modelPath)) {
console.log(
"\x1b[34m[INFO]\x1b[0m The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)\n\n"
this.#log(
`The native whisper model has never been run and will be downloaded right now. Subsequent runs will be faster. (~250MB)`
);
}
@ -48,10 +131,45 @@ class LocalWhisper {
: {}),
});
} catch (error) {
console.error("Failed to load the native whisper model:", error);
this.#log("Failed to load the native whisper model:", error);
throw error;
}
}
async processFile(fullFilePath, filename) {
try {
const transcriberPromise = new Promise((resolve) =>
this.client().then((client) => resolve(client))
);
const audioDataPromise = new Promise((resolve) =>
this.#convertToWavAudioData(fullFilePath).then((audioData) =>
resolve(audioData)
)
);
const [audioData, transcriber] = await Promise.all([
audioDataPromise,
transcriberPromise,
]);
if (!audioData) {
this.#log(`Failed to parse content from ${filename}.`);
return {
content: null,
error: `Failed to parse content from ${filename}.`,
};
}
this.#log(`Transcribing audio data to text...`);
const { text } = await transcriber(audioData, {
chunk_length_s: 30,
stride_length_s: 5,
});
return { content: text, error: null };
} catch (error) {
return { content: null, error: error.message };
}
}
}
module.exports = {

View file

@ -372,6 +372,13 @@ asynckit@^0.4.0:
resolved "https://registry.yarnpkg.com/asynckit/-/asynckit-0.4.0.tgz#c79ed97f7f34cb8f2ba1bc9790bcc366474b4b79"
integrity sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==
axios@^0.26.0:
version "0.26.1"
resolved "https://registry.yarnpkg.com/axios/-/axios-0.26.1.tgz#1ede41c51fcf51bbbd6fd43669caaa4f0495aaa9"
integrity sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==
dependencies:
follow-redirects "^1.14.8"
b4a@^1.6.4:
version "1.6.4"
resolved "https://registry.yarnpkg.com/b4a/-/b4a-1.6.4.tgz#ef1c1422cae5ce6535ec191baeed7567443f36c9"
@ -1203,6 +1210,11 @@ fluent-ffmpeg@^2.1.2:
async ">=0.2.9"
which "^1.1.1"
follow-redirects@^1.14.8:
version "1.15.6"
resolved "https://registry.yarnpkg.com/follow-redirects/-/follow-redirects-1.15.6.tgz#7f815c0cda4249c74ff09e95ef97c23b5fd0399b"
integrity sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==
form-data-encoder@1.7.2:
version "1.7.2"
resolved "https://registry.yarnpkg.com/form-data-encoder/-/form-data-encoder-1.7.2.tgz#1f1ae3dccf58ed4690b86d87e4f57c654fbab040"
@ -2304,6 +2316,14 @@ onnxruntime-web@1.14.0:
onnxruntime-common "~1.14.0"
platform "^1.3.6"
openai@^3.2.1:
version "3.3.0"
resolved "https://registry.yarnpkg.com/openai/-/openai-3.3.0.tgz#a6408016ad0945738e1febf43f2fccca83a3f532"
integrity sha512-uqxI/Au+aPRnsaQRe8CojU0eCR7I0mBiKjD3sNMzY6DaC1ZVrc85u98mtJW6voDug8fgGN+DIZmTDxTthxb7dQ==
dependencies:
axios "^0.26.0"
form-data "^4.0.0"
openai@^4.19.0:
version "4.20.1"
resolved "https://registry.yarnpkg.com/openai/-/openai-4.20.1.tgz#afa0d496d125b5a0f6cebcb4b9aeabf71e00214e"

View file

@ -131,6 +131,16 @@ GID='1000'
# ASTRA_DB_APPLICATION_TOKEN=
# ASTRA_DB_ENDPOINT=
###########################################
######## Audio Model Selection ############
###########################################
# (default) use built-in whisper-small model.
# WHISPER_PROVIDER="local"
# use openai hosted whisper model.
# WHISPER_PROVIDER="openai"
# OPEN_AI_KEY=sk-xxxxxxxx
# CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# DISABLE_TELEMETRY="false"

View file

@ -29,6 +29,9 @@ const GeneralApiKeys = lazy(() => import("@/pages/GeneralSettings/ApiKeys"));
const GeneralLLMPreference = lazy(
() => import("@/pages/GeneralSettings/LLMPreference")
);
const GeneralTranscriptionPreference = lazy(
() => import("@/pages/GeneralSettings/TranscriptionPreference")
);
const GeneralEmbeddingPreference = lazy(
() => import("@/pages/GeneralSettings/EmbeddingPreference")
);
@ -76,6 +79,12 @@ export default function App() {
path="/settings/llm-preference"
element={<AdminRoute Component={GeneralLLMPreference} />}
/>
<Route
path="/settings/transcription-preference"
element={
<AdminRoute Component={GeneralTranscriptionPreference} />
}
/>
<Route
path="/settings/embedding-preference"
element={<AdminRoute Component={GeneralEmbeddingPreference} />}

View file

@ -19,6 +19,7 @@ import {
Notepad,
CodeBlock,
Barcode,
ClosedCaptioning,
} from "@phosphor-icons/react";
import useUser from "@/hooks/useUser";
import { USER_BACKGROUND_COLOR } from "@/utils/constants";
@ -278,9 +279,17 @@ const SidebarOptions = ({ user = null }) => (
flex={true}
allowedRole={["admin"]}
/>
<Option
href={paths.settings.transcriptionPreference()}
btnText="Transcription Model"
icon={<ClosedCaptioning className="h-5 w-5 flex-shrink-0" />}
user={user}
flex={true}
allowedRole={["admin"]}
/>
<Option
href={paths.settings.embeddingPreference()}
btnText="Embedding Preference"
btnText="Embedding Model"
icon={<FileCode className="h-5 w-5 flex-shrink-0" />}
user={user}
flex={true}

View file

@ -0,0 +1,38 @@
import { Gauge } from "@phosphor-icons/react";
export default function NativeTranscriptionOptions() {
return (
<div className="w-full flex flex-col gap-y-4">
<div className="flex flex-col md:flex-row md:items-center gap-x-2 text-white mb-4 bg-blue-800/30 w-fit rounded-lg px-4 py-2">
<div className="gap-x-2 flex items-center">
<Gauge size={25} />
<p className="text-sm">
Using the local whisper model on machines with limited RAM or CPU
can stall AnythingLLM when processing media files.
<br />
We recommend at least 2GB of RAM and upload files &lt;10Mb.
<br />
<br />
<i>
The built-in model will automatically download on the first use.
</i>
</p>
</div>
</div>
<div className="w-full flex items-center gap-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Model Selection
</label>
<select
disabled={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
Xenova/whisper-small
</option>
</select>
</div>
</div>
</div>
);
}

View file

@ -0,0 +1,41 @@
import { useState } from "react";
export default function OpenAiWhisperOptions({ settings }) {
const [inputValue, setInputValue] = useState(settings?.OpenAiKey);
const [_openAIKey, setOpenAIKey] = useState(settings?.OpenAiKey);
return (
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
API Key
</label>
<input
type="password"
name="OpenAiKey"
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="OpenAI API Key"
defaultValue={settings?.OpenAiKey ? "*".repeat(20) : ""}
required={true}
autoComplete="off"
spellCheck={false}
onChange={(e) => setInputValue(e.target.value)}
onBlur={() => setOpenAIKey(inputValue)}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Whisper Model
</label>
<select
disabled={true}
className="bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
Whisper Large
</option>
</select>
</div>
</div>
);
}

View file

@ -0,0 +1,180 @@
import React, { useEffect, useState } from "react";
import { isMobile } from "react-device-detect";
import Sidebar from "@/components/SettingsSidebar";
import System from "@/models/system";
import showToast from "@/utils/toast";
import PreLoader from "@/components/Preloader";
import OpenAiLogo from "@/media/llmprovider/openai.png";
import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png";
import OpenAiWhisperOptions from "@/components/TranscriptionSelection/OpenAiOptions";
import NativeTranscriptionOptions from "@/components/TranscriptionSelection/NativeTranscriptionOptions";
import LLMItem from "@/components/LLMSelection/LLMItem";
import { MagnifyingGlass } from "@phosphor-icons/react";
export default function TranscriptionModelPreference() {
const [saving, setSaving] = useState(false);
const [hasChanges, setHasChanges] = useState(false);
const [settings, setSettings] = useState(null);
const [loading, setLoading] = useState(true);
const [searchQuery, setSearchQuery] = useState("");
const [filteredProviders, setFilteredProviders] = useState([]);
const [selectedProvider, setSelectedProvider] = useState(null);
const handleSubmit = async (e) => {
e.preventDefault();
const form = e.target;
const data = { WhisperProvider: selectedProvider };
const formData = new FormData(form);
for (var [key, value] of formData.entries()) data[key] = value;
const { error } = await System.updateSystem(data);
setSaving(true);
if (error) {
showToast(`Failed to save preferences: ${error}`, "error");
} else {
showToast("Transcription preferences saved successfully.", "success");
}
setSaving(false);
setHasChanges(!!error);
};
const updateProviderChoice = (selection) => {
setSelectedProvider(selection);
setHasChanges(true);
};
useEffect(() => {
async function fetchKeys() {
const _settings = await System.keys();
setSettings(_settings);
setSelectedProvider(_settings?.WhisperProvider || "local");
setLoading(false);
}
fetchKeys();
}, []);
useEffect(() => {
const filtered = PROVIDERS.filter((provider) =>
provider.name.toLowerCase().includes(searchQuery.toLowerCase())
);
setFilteredProviders(filtered);
}, [searchQuery, selectedProvider]);
const PROVIDERS = [
{
name: "OpenAI",
value: "openai",
logo: OpenAiLogo,
options: <OpenAiWhisperOptions settings={settings} />,
description:
"Leverage the OpenAI Whisper-large model using your API key.",
},
{
name: "AnythingLLM Built-In",
value: "local",
logo: AnythingLLMIcon,
options: <NativeTranscriptionOptions settings={settings} />,
description: "Run a built-in whisper model on this instance privately.",
},
];
return (
<div className="w-screen h-screen overflow-hidden bg-sidebar flex">
<Sidebar />
{loading ? (
<div
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
>
<div className="w-full h-full flex justify-center items-center">
<PreLoader />
</div>
</div>
) : (
<div
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[16px] bg-main-gradient w-full h-full overflow-y-scroll"
>
<form onSubmit={handleSubmit} className="flex w-full">
<div className="flex flex-col w-full px-1 md:pl-6 md:pr-[86px] md:py-6 py-16">
<div className="w-full flex flex-col gap-y-1 pb-6 border-white border-b-2 border-opacity-10">
<div className="flex gap-x-4 items-center">
<p className="text-lg leading-6 font-bold text-white">
Transcription Model Preference
</p>
{hasChanges && (
<button
type="submit"
disabled={saving}
className="flex items-center gap-x-2 px-4 py-2 rounded-lg bg-[#2C2F36] text-white text-sm hover:bg-[#3D4147] shadow-md border border-[#3D4147]"
>
{saving ? "Saving..." : "Save changes"}
</button>
)}
</div>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60">
These are the credentials and settings for your preferred
transcription model provider. Its important these keys are
current and correct or else media files and audio will not
transcribe.
</p>
</div>
<div className="text-sm font-medium text-white mt-6 mb-4">
Transcription Providers
</div>
<div className="w-full">
<div className="w-full relative border-slate-300/20 shadow border-4 rounded-xl text-white">
<div className="w-full p-4 absolute top-0 rounded-t-lg backdrop-blur-sm">
<div className="w-full flex items-center sticky top-0">
<MagnifyingGlass
size={16}
weight="bold"
className="absolute left-4 z-30 text-white"
/>
<input
type="text"
placeholder="Search audio transcription providers"
className="bg-zinc-600 z-20 pl-10 h-[38px] rounded-full w-full px-4 py-1 text-sm border-2 border-slate-300/40 outline-none focus:border-white text-white"
onChange={(e) => setSearchQuery(e.target.value)}
autoComplete="off"
onKeyDown={(e) => {
if (e.key === "Enter") e.preventDefault();
}}
/>
</div>
</div>
<div className="px-4 pt-[70px] flex flex-col gap-y-1 max-h-[390px] overflow-y-auto no-scroll pb-4">
{filteredProviders.map((provider) => {
return (
<LLMItem
key={provider.name}
name={provider.name}
value={provider.value}
image={provider.logo}
description={provider.description}
checked={selectedProvider === provider.value}
onClick={() => updateProviderChoice(provider.value)}
/>
);
})}
</div>
</div>
<div
onChange={() => setHasChanges(true)}
className="mt-4 flex flex-col gap-y-1"
>
{selectedProvider &&
PROVIDERS.find(
(provider) => provider.value === selectedProvider
)?.options}
</div>
</div>
</div>
</form>
</div>
)}
</div>
);
}

View file

@ -92,6 +92,9 @@ export default {
llmPreference: () => {
return "/settings/llm-preference";
},
transcriptionPreference: () => {
return "/settings/transcription-preference";
},
embeddingPreference: () => {
return "/settings/embedding-preference";
},

View file

@ -128,6 +128,16 @@ VECTOR_DB="lancedb"
# ZILLIZ_ENDPOINT="https://sample.api.gcp-us-west1.zillizcloud.com"
# ZILLIZ_API_TOKEN=api-token-here
###########################################
######## Audio Model Selection ############
###########################################
# (default) use built-in whisper-small model.
WHISPER_PROVIDER="local"
# use openai hosted whisper model.
# WHISPER_PROVIDER="openai"
# OPEN_AI_KEY=sk-xxxxxxxx
# CLOUD DEPLOYMENT VARIRABLES ONLY
# AUTH_TOKEN="hunter2" # This is the password to your application if remote hosting.
# STORAGE_DIR= # absolute filesystem path with no trailing slash

View file

@ -258,6 +258,7 @@ const SystemSettings = {
AzureOpenAiEmbeddingModelPref: process.env.EMBEDDING_MODEL_PREF,
}
: {}),
WhisperProvider: process.env.WHISPER_PROVIDER || "local",
};
},

View file

@ -14,6 +14,9 @@ AnythingLLM allows you to upload various audio and video formats as source docum
Once transcribed you can embed these transcriptions into your workspace like you would any other file!
**Other external model/transcription providers are also live.**
- [OpenAI Whisper via API key.](https://openai.com/research/whisper)
## Text generation (LLM selection)
> [!IMPORTANT]
> Use of a locally running LLM model is **experimental** and may behave unexpectedly, crash, or not function at all.

View file

@ -5,13 +5,20 @@
class CollectorApi {
constructor() {
this.endpoint = "http://0.0.0.0:8888";
this.endpoint = `http://0.0.0.0:${process.env.COLLECTOR_PORT || 8888}`;
}
log(text, ...args) {
console.log(`\x1b[36m[CollectorApi]\x1b[0m ${text}`, ...args);
}
#attachOptions() {
return {
whisperProvider: process.env.WHISPER_PROVIDER || "local",
openAiKey: process.env.OPEN_AI_KEY || null,
};
}
async online() {
return await fetch(this.endpoint)
.then((res) => res.ok)
@ -38,7 +45,10 @@ class CollectorApi {
headers: {
"Content-Type": "application/json",
},
body: JSON.stringify({ filename }),
body: JSON.stringify({
filename,
options: this.#attachOptions(),
}),
})
.then((res) => {
if (!res.ok) throw new Error("Response could not be completed");

View file

@ -269,6 +269,13 @@ const KEY_MAPPING = {
checks: [isNotEmpty],
},
// Whisper (transcription) providers
WhisperProvider: {
envKey: "WHISPER_PROVIDER",
checks: [isNotEmpty, supportedTranscriptionProvider],
postUpdate: [],
},
// System Settings
AuthToken: {
envKey: "AUTH_TOKEN",
@ -351,6 +358,13 @@ function supportedLLM(input = "") {
return validSelection ? null : `${input} is not a valid LLM provider.`;
}
function supportedTranscriptionProvider(input = "") {
const validSelection = ["openai", "local"].includes(input);
return validSelection
? null
: `${input} is not a valid transcription model provider.`;
}
function validGeminiModel(input = "") {
const validModels = ["gemini-pro"];
return validModels.includes(input)