diff --git a/.github/workflows/dev-build.yaml b/.github/workflows/dev-build.yaml index a7632dfd0..e3bb1d556 100644 --- a/.github/workflows/dev-build.yaml +++ b/.github/workflows/dev-build.yaml @@ -6,7 +6,7 @@ concurrency: on: push: - branches: ['558-multi-modal-support'] # put your current branch to create a build. Core team only. + branches: ['pipertts-support'] # put your current branch to create a build. Core team only. paths-ignore: - '**.md' - 'cloud-deployments/*' diff --git a/README.md b/README.md index d7812265d..178fef08e 100644 --- a/README.md +++ b/README.md @@ -110,6 +110,7 @@ AnythingLLM divides your documents into objects called `workspaces`. A Workspace **TTS (text-to-speech) support:** - Native Browser Built-in (default) +- [PiperTTSLocal - runs in browser](https://github.com/rhasspy/piper) - [OpenAI TTS](https://platform.openai.com/docs/guides/text-to-speech/voice-options) - [ElevenLabs](https://elevenlabs.io/) diff --git a/frontend/package.json b/frontend/package.json index 3640e9ee5..8a60c1109 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -13,6 +13,7 @@ "dependencies": { "@metamask/jazzicon": "^2.0.0", "@microsoft/fetch-event-source": "^2.0.1", + "@mintplex-labs/piper-tts-web": "^1.0.4", "@phosphor-icons/react": "^2.1.7", "@tremor/react": "^3.15.1", "dompurify": "^3.0.8", @@ -26,6 +27,7 @@ "markdown-it": "^13.0.1", "markdown-it-katex": "^2.0.3", "moment": "^2.30.1", + "onnxruntime-web": "^1.18.0", "pluralize": "^8.0.0", "react": "^18.2.0", "react-device-detect": "^2.2.2", diff --git a/frontend/src/components/TextToSpeech/PiperTTSOptions/index.jsx b/frontend/src/components/TextToSpeech/PiperTTSOptions/index.jsx new file mode 100644 index 000000000..323bf3ad5 --- /dev/null +++ b/frontend/src/components/TextToSpeech/PiperTTSOptions/index.jsx @@ -0,0 +1,219 @@ +import { useState, useEffect, useRef } from "react"; +import PiperTTSClient from "@/utils/piperTTS"; +import { titleCase } from "text-case"; +import { humanFileSize } from "@/utils/numbers"; +import showToast from "@/utils/toast"; +import { CircleNotch, PauseCircle, PlayCircle } from "@phosphor-icons/react"; + +export default function PiperTTSOptions({ settings }) { + return ( + <> + <p className="text-sm font-base text-white text-opacity-60 mb-4"> + All PiperTTS models will run in your browser locally. This can be + resource intensive on lower-end devices. + </p> + <div className="flex gap-x-4 items-center"> + <PiperTTSModelSelection settings={settings} /> + </div> + </> + ); +} + +function voicesByLanguage(voices = []) { + const voicesByLanguage = voices.reduce((acc, voice) => { + const langName = voice?.language?.name_english ?? "Unlisted"; + acc[langName] = acc[langName] || []; + acc[langName].push(voice); + return acc; + }, {}); + return Object.entries(voicesByLanguage); +} + +function voiceDisplayName(voice) { + const { is_stored, name, quality, files } = voice; + const onnxFileKey = Object.keys(files).find((key) => key.endsWith(".onnx")); + const fileSize = files?.[onnxFileKey]?.size_bytes || 0; + return `${is_stored ? "✔ " : ""}${titleCase(name)}-${quality === "low" ? "Low" : "HQ"} (${humanFileSize(fileSize)})`; +} + +function PiperTTSModelSelection({ settings }) { + const [loading, setLoading] = useState(true); + const [voices, setVoices] = useState([]); + const [selectedVoice, setSelectedVoice] = useState( + settings?.TTSPiperTTSVoiceModel + ); + + function flushVoices() { + PiperTTSClient.flush() + .then(() => + showToast("All voices flushed from browser storage", "info", { + clear: true, + }) + ) + .catch((e) => console.error(e)); + } + + useEffect(() => { + PiperTTSClient.voices() + .then((voices) => { + if (voices?.length !== 0) return setVoices(voices); + throw new Error("Could not fetch voices from web worker."); + }) + .catch((e) => { + console.error(e); + }) + .finally(() => setLoading(false)); + }, []); + + if (loading) { + return ( + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-3"> + Voice Model Selection + </label> + <select + name="TTSPiperTTSVoiceModel" + value="" + disabled={true} + className="border-none bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + > + <option value="" disabled={true}> + -- loading available models -- + </option> + </select> + </div> + ); + } + + return ( + <div className="flex flex-col w-fit"> + <div className="flex flex-col w-60"> + <label className="text-white text-sm font-semibold block mb-3"> + Voice Model Selection + </label> + <div className="flex items-center w-fit gap-x-4 mb-2"> + <select + name="TTSPiperTTSVoiceModel" + required={true} + onChange={(e) => setSelectedVoice(e.target.value)} + value={selectedVoice} + className="border-none flex-shrink-0 bg-zinc-900 border-gray-500 text-white text-sm rounded-lg block w-full p-2.5" + > + {voicesByLanguage(voices).map(([lang, voices]) => { + return ( + <optgroup key={lang} label={lang}> + {voices.map((voice) => ( + <option key={voice.key} value={voice.key}> + {voiceDisplayName(voice)} + </option> + ))} + </optgroup> + ); + })} + </select> + <DemoVoiceSample voiceId={selectedVoice} /> + </div> + <p className="text-xs text-white/40"> + The "✔" indicates this model is already stored locally and does not + need to be downloaded when run. + </p> + </div> + {!!voices.find((voice) => voice.is_stored) && ( + <button + type="button" + onClick={flushVoices} + className="w-fit border-none hover:text-white hover:underline text-white/40 text-sm my-4" + > + Flush voice cache + </button> + )} + </div> + ); +} + +function DemoVoiceSample({ voiceId }) { + const playerRef = useRef(null); + const [speaking, setSpeaking] = useState(false); + const [loading, setLoading] = useState(false); + const [audioSrc, setAudioSrc] = useState(null); + + async function speakMessage(e) { + e.preventDefault(); + if (speaking) { + playerRef?.current?.pause(); + return; + } + + try { + if (!audioSrc) { + setLoading(true); + const client = new PiperTTSClient({ voiceId }); + const blobUrl = await client.getAudioBlobForText( + "Hello, welcome to AnythingLLM!" + ); + setAudioSrc(blobUrl); + setLoading(false); + client.worker?.terminate(); + PiperTTSClient._instance = null; + } else { + playerRef.current.play(); + } + } catch (e) { + console.error(e); + setLoading(false); + setSpeaking(false); + } + } + + useEffect(() => { + function setupPlayer() { + if (!playerRef?.current) return; + playerRef.current.addEventListener("play", () => { + setSpeaking(true); + }); + + playerRef.current.addEventListener("pause", () => { + playerRef.current.currentTime = 0; + setSpeaking(false); + setAudioSrc(null); + }); + } + setupPlayer(); + }, []); + + return ( + <button + type="button" + onClick={speakMessage} + className="border-none text-zinc-300 flex items-center gap-x-1" + > + {speaking ? ( + <> + <PauseCircle size={20} className="flex-shrink-0" /> + <p className="text-sm flex-shrink-0">Stop demo</p> + </> + ) : ( + <> + {loading ? ( + <> + <CircleNotch size={20} className="animate-spin flex-shrink-0" /> + <p className="text-sm flex-shrink-0">Loading voice</p> + </> + ) : ( + <> + <PlayCircle size={20} className="flex-shrink-0" /> + <p className="text-sm flex-shrink-0">Play sample</p> + </> + )} + </> + )} + <audio + ref={playerRef} + hidden={true} + src={audioSrc} + autoPlay={true} + controls={false} + /> + </button> + ); +} diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/index.jsx index 56d32e847..88d063387 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/index.jsx @@ -1,9 +1,11 @@ import { useEffect, useState } from "react"; import NativeTTSMessage from "./native"; import AsyncTTSMessage from "./asyncTts"; +import PiperTTSMessage from "./piperTTS"; import System from "@/models/system"; export default function TTSMessage({ slug, chatId, message }) { + const [settings, setSettings] = useState({}); const [provider, setProvider] = useState("native"); const [loading, setLoading] = useState(true); @@ -11,13 +13,26 @@ export default function TTSMessage({ slug, chatId, message }) { async function getSettings() { const _settings = await System.keys(); setProvider(_settings?.TextToSpeechProvider ?? "native"); + setSettings(_settings); setLoading(false); } getSettings(); }, []); if (!chatId || loading) return null; - if (provider !== "native") - return <AsyncTTSMessage slug={slug} chatId={chatId} />; - return <NativeTTSMessage message={message} />; + + switch (provider) { + case "openai": + case "elevenlabs": + return <AsyncTTSMessage slug={slug} chatId={chatId} />; + case "piper_local": + return ( + <PiperTTSMessage + voiceId={settings?.TTSPiperTTSVoiceModel} + message={message} + /> + ); + default: + return <NativeTTSMessage message={message} />; + } } diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/piperTTS.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/piperTTS.jsx new file mode 100644 index 000000000..d384faf1e --- /dev/null +++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/HistoricalMessage/Actions/TTSButton/piperTTS.jsx @@ -0,0 +1,90 @@ +import { useEffect, useState, useRef } from "react"; +import { SpeakerHigh, PauseCircle, CircleNotch } from "@phosphor-icons/react"; +import { Tooltip } from "react-tooltip"; +import PiperTTSClient from "@/utils/piperTTS"; + +export default function PiperTTS({ voiceId = null, message }) { + const playerRef = useRef(null); + const [speaking, setSpeaking] = useState(false); + const [loading, setLoading] = useState(false); + const [audioSrc, setAudioSrc] = useState(null); + + async function speakMessage(e) { + e.preventDefault(); + if (speaking) { + playerRef?.current?.pause(); + return; + } + + try { + if (!audioSrc) { + setLoading(true); + const client = new PiperTTSClient({ voiceId }); + const blobUrl = await client.getAudioBlobForText(message); + setAudioSrc(blobUrl); + setLoading(false); + } else { + playerRef.current.play(); + } + } catch (e) { + console.error(e); + setLoading(false); + setSpeaking(false); + } + } + + useEffect(() => { + function setupPlayer() { + if (!playerRef?.current) return; + playerRef.current.addEventListener("play", () => { + setSpeaking(true); + }); + + playerRef.current.addEventListener("pause", () => { + playerRef.current.currentTime = 0; + setSpeaking(false); + }); + } + setupPlayer(); + }, []); + + return ( + <div className="mt-3 relative"> + <button + type="button" + onClick={speakMessage} + data-tooltip-id="message-to-speech" + data-tooltip-content={ + speaking ? "Pause TTS speech of message" : "TTS Speak message" + } + className="border-none text-zinc-300" + aria-label={speaking ? "Pause speech" : "Speak message"} + > + {speaking ? ( + <PauseCircle size={18} className="mb-1" /> + ) : ( + <> + {loading ? ( + <CircleNotch size={18} className="mb-1 animate-spin" /> + ) : ( + <SpeakerHigh size={18} className="mb-1" /> + )} + </> + )} + <audio + ref={playerRef} + hidden={true} + src={audioSrc} + autoPlay={true} + controls={false} + /> + </button> + <Tooltip + id="message-to-speech" + place="bottom" + delayShow={300} + className="tooltip !text-xs" + /> + </div> + ); +} diff --git a/frontend/src/media/ttsproviders/piper.png b/frontend/src/media/ttsproviders/piper.png new file mode 100644 index 000000000..32d3ec5a7 Binary files /dev/null and b/frontend/src/media/ttsproviders/piper.png differ diff --git a/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx b/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx index dee8a8444..0ebab72de 100644 --- a/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx +++ b/frontend/src/pages/GeneralSettings/AudioPreference/tts.jsx @@ -7,9 +7,11 @@ import CTAButton from "@/components/lib/CTAButton"; import OpenAiLogo from "@/media/llmprovider/openai.png"; import AnythingLLMIcon from "@/media/logo/anything-llm-icon.png"; import ElevenLabsIcon from "@/media/ttsproviders/elevenlabs.png"; +import PiperTTSIcon from "@/media/ttsproviders/piper.png"; import BrowserNative from "@/components/TextToSpeech/BrowserNative"; import OpenAiTTSOptions from "@/components/TextToSpeech/OpenAiOptions"; import ElevenLabsTTSOptions from "@/components/TextToSpeech/ElevenLabsOptions"; +import PiperTTSOptions from "@/components/TextToSpeech/PiperTTSOptions"; const PROVIDERS = [ { @@ -33,6 +35,13 @@ const PROVIDERS = [ options: (settings) => <ElevenLabsTTSOptions settings={settings} />, description: "Use ElevenLabs's text to speech voices and technology.", }, + { + name: "PiperTTS", + value: "piper_local", + logo: PiperTTSIcon, + options: (settings) => <PiperTTSOptions settings={settings} />, + description: "Run TTS models locally in your browser privately.", + }, ]; export default function TextToSpeechProvider({ settings }) { diff --git a/frontend/src/utils/piperTTS/index.js b/frontend/src/utils/piperTTS/index.js new file mode 100644 index 000000000..5016af79e --- /dev/null +++ b/frontend/src/utils/piperTTS/index.js @@ -0,0 +1,138 @@ +import showToast from "../toast"; + +export default class PiperTTSClient { + static _instance; + voiceId = "en_US-hfc_female-medium"; + worker = null; + + constructor({ voiceId } = { voiceId: null }) { + if (PiperTTSClient._instance) { + this.voiceId = voiceId !== null ? voiceId : this.voiceId; + return PiperTTSClient._instance; + } + + this.voiceId = voiceId !== null ? voiceId : this.voiceId; + PiperTTSClient._instance = this; + return this; + } + + #getWorker() { + if (!this.worker) + this.worker = new Worker(new URL("./worker.js", import.meta.url), { + type: "module", + }); + return this.worker; + } + + /** + * Get all available voices for a client + * @returns {Promise<import("@mintplex-labs/piper-tts-web/dist/types").Voice[]}>} + */ + static async voices() { + const tmpWorker = new Worker(new URL("./worker.js", import.meta.url), { + type: "module", + }); + tmpWorker.postMessage({ type: "voices" }); + return new Promise((resolve, reject) => { + let timeout = null; + const handleMessage = (event) => { + if (event.data.type !== "voices") { + console.log("PiperTTSWorker debug event:", event.data); + return; + } + resolve(event.data.voices); + tmpWorker.removeEventListener("message", handleMessage); + timeout && clearTimeout(timeout); + tmpWorker.terminate(); + }; + + timeout = setTimeout(() => { + reject("TTS Worker timed out."); + }, 30_000); + tmpWorker.addEventListener("message", handleMessage); + }); + } + + static async flush() { + const tmpWorker = new Worker(new URL("./worker.js", import.meta.url), { + type: "module", + }); + tmpWorker.postMessage({ type: "flush" }); + return new Promise((resolve, reject) => { + let timeout = null; + const handleMessage = (event) => { + if (event.data.type !== "flush") { + console.log("PiperTTSWorker debug event:", event.data); + return; + } + resolve(event.data.flushed); + tmpWorker.removeEventListener("message", handleMessage); + timeout && clearTimeout(timeout); + tmpWorker.terminate(); + }; + + timeout = setTimeout(() => { + reject("TTS Worker timed out."); + }, 30_000); + tmpWorker.addEventListener("message", handleMessage); + }); + } + + /** + * Runs prediction via webworker so we can get an audio blob back. + * @returns {Promise<{blobURL: string|null, error: string|null}>} objectURL blob: type. + */ + async waitForBlobResponse() { + return new Promise((resolve) => { + let timeout = null; + const handleMessage = (event) => { + if (event.data.type === "error") { + this.worker.removeEventListener("message", handleMessage); + timeout && clearTimeout(timeout); + return resolve({ blobURL: null, error: event.data.message }); + } + + if (event.data.type !== "result") { + console.log("PiperTTSWorker debug event:", event.data); + return; + } + resolve({ + blobURL: URL.createObjectURL(event.data.audio), + error: null, + }); + this.worker.removeEventListener("message", handleMessage); + timeout && clearTimeout(timeout); + }; + + timeout = setTimeout(() => { + resolve({ blobURL: null, error: "PiperTTSWorker Worker timed out." }); + }, 30_000); + this.worker.addEventListener("message", handleMessage); + }); + } + + async getAudioBlobForText(textToSpeak, voiceId = null) { + const primaryWorker = this.#getWorker(); + primaryWorker.postMessage({ + type: "init", + text: String(textToSpeak), + voiceId: voiceId ?? this.voiceId, + // Don't reference WASM because in the docker image + // the user will be connected to internet (mostly) + // and it bloats the app size on the frontend or app significantly + // and running the docker image fully offline is not an intended use-case unlike the app. + }); + + const { blobURL, error } = await this.waitForBlobResponse(); + if (!!error) { + showToast( + `Could not generate voice prediction. Error: ${error}`, + "error", + { clear: true } + ); + return; + } + + return blobURL; + } +} diff --git a/frontend/src/utils/piperTTS/worker.js b/frontend/src/utils/piperTTS/worker.js new file mode 100644 index 000000000..e0fa8aabb --- /dev/null +++ b/frontend/src/utils/piperTTS/worker.js @@ -0,0 +1,94 @@ +import * as TTS from "@mintplex-labs/piper-tts-web"; + +/** @type {import("@mintplexlabs/piper-web-tts").TtsSession | null} */ +let PIPER_SESSION = null; + +/** + * @typedef PredictionRequest + * @property {('init')} type + * @property {string} text - the text to inference on + * @property {import('@mintplexlabs/piper-web-tts').VoiceId} voiceId - the voiceID key to use. + * @property {string|null} baseUrl - the base URL to fetch WASMs from. + */ +/** + * @typedef PredictionRequestResponse + * @property {('result')} type + * @property {Blob} audio - the text to inference on + */ + +/** + * @typedef VoicesRequest + * @property {('voices')} type + * @property {string|null} baseUrl - the base URL to fetch WASMs from. + */ +/** + * @typedef VoicesRequestResponse + * @property {('voices')} type + * @property {[import("@mintplex-labs/piper-tts-web/dist/types")['Voice']]} voices - available voices in array + */ + +/** + * @typedef FlushRequest + * @property {('flush')} type + */ +/** + * @typedef FlushRequestResponse + * @property {('flush')} type + * @property {true} flushed + */ + +/** + * Web worker for generating client-side PiperTTS predictions + * @param {MessageEvent<PredictionRequest | VoicesRequest | FlushRequest>} event - The event object containing the prediction request + * @returns {Promise<PredictionRequestResponse|VoicesRequestResponse|FlushRequestResponse>} + */ +async function main(event) { + if (event.data.type === "voices") { + const stored = await TTS.stored(); + const voices = await TTS.voices(); + voices.forEach((voice) => (voice.is_stored = stored.includes(voice.key))); + + self.postMessage({ type: "voices", voices }); + return; + } + + if (event.data.type === "flush") { + await TTS.flush(); + self.postMessage({ type: "flush", flushed: true }); + return; + } + + if (event.data?.type !== "init") return; + if (!PIPER_SESSION) { + PIPER_SESSION = new TTS.TtsSession({ + voiceId: event.data.voiceId, + progress: (e) => self.postMessage(JSON.stringify(e)), + logger: (msg) => self.postMessage(msg), + ...(!!event.data.baseUrl + ? { + wasmPaths: { + onnxWasm: `${event.data.baseUrl}/piper/ort/`, + piperData: `${event.data.baseUrl}/piper/piper_phonemize.data`, + piperWasm: `${event.data.baseUrl}/piper/piper_phonemize.wasm`, + }, + } + : {}), + }); + } + + if (event.data.voiceId && PIPER_SESSION.voiceId !== event.data.voiceId) + PIPER_SESSION.voiceId = event.data.voiceId; + + PIPER_SESSION.predict(event.data.text) + .then((res) => { + if (res instanceof Blob) { + self.postMessage({ type: "result", audio: res }); + return; + } + }) + .catch((error) => { + self.postMessage({ type: "error", message: error.message, error }); // Will be an error. + }); +} + +self.addEventListener("message", main); diff --git a/frontend/vite.config.js b/frontend/vite.config.js index b67e9ef7c..73b295be2 100644 --- a/frontend/vite.config.js +++ b/frontend/vite.config.js @@ -9,6 +9,14 @@ dns.setDefaultResultOrder("verbatim") // https://vitejs.dev/config/ export default defineConfig({ + assetsInclude: [ + './public/piper/ort-wasm-simd-threaded.wasm', + './public/piper/piper_phonemize.wasm', + './public/piper/piper_phonemize.data', + ], + worker: { + format: 'es' + }, server: { port: 3000, host: "localhost" @@ -60,7 +68,7 @@ export default defineConfig({ }, external: [ // Reduces transformation time by 50% and we don't even use this variant, so we can ignore. - /@phosphor-icons\/react\/dist\/ssr/ + /@phosphor-icons\/react\/dist\/ssr/, ] }, commonjsOptions: { @@ -68,6 +76,7 @@ export default defineConfig({ } }, optimizeDeps: { + include: ["@mintplex-labs/piper-tts-web"], esbuildOptions: { define: { global: "globalThis" diff --git a/frontend/yarn.lock b/frontend/yarn.lock index 0f62957b1..4a56e4f92 100644 --- a/frontend/yarn.lock +++ b/frontend/yarn.lock @@ -496,6 +496,11 @@ resolved "https://registry.yarnpkg.com/@microsoft/fetch-event-source/-/fetch-event-source-2.0.1.tgz#9ceecc94b49fbaa15666e38ae8587f64acce007d" integrity sha512-W6CLUJ2eBMw3Rec70qrsEW0jOm/3twwJv21mrmj2yORiaVmVYGS4sSS5yUwvQc1ZlDLYGPnClVWmUUMagKNsfA== +"@mintplex-labs/piper-tts-web@^1.0.4": + version "1.0.4" + resolved "https://registry.yarnpkg.com/@mintplex-labs/piper-tts-web/-/piper-tts-web-1.0.4.tgz#016b196fa86dc8b616691dd381f3ca1939196444" + integrity sha512-Y24X+CJaGXoY5HFPSstHvJI6408OAtw3Pmq2OIYwpRpcwLLbgadWg8l1ODHNkgpB0Ps5fS9PAAQB60fHA3Bdag== + "@nodelib/fs.scandir@2.1.5": version "2.1.5" resolved "https://registry.yarnpkg.com/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz#7619c2eb21b25483f6d167548b4cfd5a7488c3d5" @@ -532,6 +537,59 @@ resolved "https://registry.yarnpkg.com/@pkgr/core/-/core-0.1.1.tgz#1ec17e2edbec25c8306d424ecfbf13c7de1aaa31" integrity sha512-cq8o4cWH0ibXh9VGi5P20Tu9XF/0fFXl9EUinr9QfTM7a7p0oTA4iJRCQWppXR1Pg8dSM0UCItCkPwsk9qWWYA== +"@protobufjs/aspromise@^1.1.1", "@protobufjs/aspromise@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@protobufjs/aspromise/-/aspromise-1.1.2.tgz#9b8b0cc663d669a7d8f6f5d0893a14d348f30fbf" + integrity sha512-j+gKExEuLmKwvz3OgROXtrJ2UG2x8Ch2YZUxahh+s1F2HZ+wAceUNLkvy6zKCPVRkU++ZWQrdxsUeQXmcg4uoQ== + +"@protobufjs/base64@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@protobufjs/base64/-/base64-1.1.2.tgz#4c85730e59b9a1f1f349047dbf24296034bb2735" + integrity sha512-AZkcAA5vnN/v4PDqKyMR5lx7hZttPDgClv83E//FMNhR2TMcLUhfRUBHCmSl0oi9zMgDDqRUJkSxO3wm85+XLg== + +"@protobufjs/codegen@^2.0.4": + version "2.0.4" + resolved "https://registry.yarnpkg.com/@protobufjs/codegen/-/codegen-2.0.4.tgz#7ef37f0d010fb028ad1ad59722e506d9262815cb" + integrity sha512-YyFaikqM5sH0ziFZCN3xDC7zeGaB/d0IUb9CATugHWbd1FRFwWwt4ld4OYMPWu5a3Xe01mGAULCdqhMlPl29Jg== + +"@protobufjs/eventemitter@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/eventemitter/-/eventemitter-1.1.0.tgz#355cbc98bafad5978f9ed095f397621f1d066b70" + integrity sha512-j9ednRT81vYJ9OfVuXG6ERSTdEL1xVsNgqpkxMsbIabzSo3goCjDIveeGv5d03om39ML71RdmrGNjG5SReBP/Q== + +"@protobufjs/fetch@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/fetch/-/fetch-1.1.0.tgz#ba99fb598614af65700c1619ff06d454b0d84c45" + integrity sha512-lljVXpqXebpsijW71PZaCYeIcE5on1w5DlQy5WH6GLbFryLUrBD4932W/E2BSpfRJWseIL4v/KPgBFxDOIdKpQ== + dependencies: + "@protobufjs/aspromise" "^1.1.1" + "@protobufjs/inquire" "^1.1.0" + +"@protobufjs/float@^1.0.2": + version "1.0.2" + resolved "https://registry.yarnpkg.com/@protobufjs/float/-/float-1.0.2.tgz#5e9e1abdcb73fc0a7cb8b291df78c8cbd97b87d1" + integrity sha512-Ddb+kVXlXst9d+R9PfTIxh1EdNkgoRe5tOX6t01f1lYWOvJnSPDBlG241QLzcyPdoNTsblLUdujGSE4RzrTZGQ== + +"@protobufjs/inquire@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/inquire/-/inquire-1.1.0.tgz#ff200e3e7cf2429e2dcafc1140828e8cc638f089" + integrity sha512-kdSefcPdruJiFMVSbn801t4vFK7KB/5gd2fYvrxhuJYg8ILrmn9SKSX2tZdV6V+ksulWqS7aXjBcRXl3wHoD9Q== + +"@protobufjs/path@^1.1.2": + version "1.1.2" + resolved "https://registry.yarnpkg.com/@protobufjs/path/-/path-1.1.2.tgz#6cc2b20c5c9ad6ad0dccfd21ca7673d8d7fbf68d" + integrity sha512-6JOcJ5Tm08dOHAbdR3GrvP+yUUfkjG5ePsHYczMFLq3ZmMkAD98cDgcT2iA1lJ9NVwFd4tH/iSSoe44YWkltEA== + +"@protobufjs/pool@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/pool/-/pool-1.1.0.tgz#09fd15f2d6d3abfa9b65bc366506d6ad7846ff54" + integrity sha512-0kELaGSIDBKvcgS4zkjz1PeddatrjYcmMWOlAuAPwAeccUrPHdUqo/J6LiymHHEiJT5NrF1UVwxY14f+fy4WQw== + +"@protobufjs/utf8@^1.1.0": + version "1.1.0" + resolved "https://registry.yarnpkg.com/@protobufjs/utf8/-/utf8-1.1.0.tgz#a777360b5b39a1a2e5106f8e858f2fd2d060c570" + integrity sha512-Vvn3zZrhQZkkBE8LSuW3em98c0FwgO4nxzv6OdSxPKJIEKY2bGbHn+mhGIPerzI4twdxaP8/0+06HBpwf345Lw== + "@remix-run/router@1.18.0": version "1.18.0" resolved "https://registry.yarnpkg.com/@remix-run/router/-/router-1.18.0.tgz#20b033d1f542a100c1d57cfd18ecf442d1784732" @@ -652,6 +710,13 @@ resolved "https://registry.yarnpkg.com/@types/history/-/history-4.7.11.tgz#56588b17ae8f50c53983a524fc3cc47437969d64" integrity sha512-qjDJRrmvBMiTx+jyLxvLfJU7UznFuokDv4f3WRuriHKERccVpFU+8XMQUAbDzoiJCsmexxRExQeMwwCdamSKDA== +"@types/node@>=13.7.0": + version "22.1.0" + resolved "https://registry.yarnpkg.com/@types/node/-/node-22.1.0.tgz#6d6adc648b5e03f0e83c78dc788c2b037d0ad94b" + integrity sha512-AOmuRF0R2/5j1knA3c6G3HOk523Ga+l+ZXltX8SF1+5oqcXijjfTd8fY3XRZqSihEu9XhtQnKYLmkFaoxgsJHw== + dependencies: + undici-types "~6.13.0" + "@types/prop-types@*": version "15.7.12" resolved "https://registry.yarnpkg.com/@types/prop-types/-/prop-types-15.7.12.tgz#12bb1e2be27293c1406acb6af1c3f3a1481d98c6" @@ -1729,6 +1794,11 @@ flat-cache@^3.0.4: keyv "^4.5.3" rimraf "^3.0.2" +flatbuffers@^1.12.0: + version "1.12.0" + resolved "https://registry.yarnpkg.com/flatbuffers/-/flatbuffers-1.12.0.tgz#72e87d1726cb1b216e839ef02658aa87dcef68aa" + integrity sha512-c7CZADjRcl6j0PlvFy0ZqXQ67qSEZfrVPynmnL+2zPc+NtMvrF8Y0QceMo7QqnSPc7+uWjUIAbvCQ5WIKlMVdQ== + flatted@^3.2.9: version "3.3.1" resolved "https://registry.yarnpkg.com/flatted/-/flatted-3.3.1.tgz#21db470729a6734d4997002f439cb308987f567a" @@ -1898,6 +1968,11 @@ graphemer@^1.4.0: resolved "https://registry.yarnpkg.com/graphemer/-/graphemer-1.4.0.tgz#fb2f1d55e0e3a1849aeffc90c4fa0dd53a0e66c6" integrity sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag== +guid-typescript@^1.0.9: + version "1.0.9" + resolved "https://registry.yarnpkg.com/guid-typescript/-/guid-typescript-1.0.9.tgz#e35f77003535b0297ea08548f5ace6adb1480ddc" + integrity sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ== + has-bigints@^1.0.1, has-bigints@^1.0.2: version "1.0.2" resolved "https://registry.yarnpkg.com/has-bigints/-/has-bigints-1.0.2.tgz#0871bd3e3d51626f6ca0966668ba35d5602d6eaa" @@ -2413,6 +2488,11 @@ lodash@^4.17.21: resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.21.tgz#679591c564c3bffaae8454cf0b3df370c3d6911c" integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== +long@^5.0.0, long@^5.2.3: + version "5.2.3" + resolved "https://registry.yarnpkg.com/long/-/long-5.2.3.tgz#a3ba97f3877cf1d778eccbcb048525ebb77499e1" + integrity sha512-lcHwpNoggQTObv5apGNCTdJrO69eHOZMi4BNC+rTLER8iHAqGrUVeLh/irVIM7zTw2bOXA8T6uNPeujwOLg/2Q== + loose-envify@^1.1.0, loose-envify@^1.4.0: version "1.4.0" resolved "https://registry.yarnpkg.com/loose-envify/-/loose-envify-1.4.0.tgz#71ee51fa7be4caec1a63839f7e682d8132d30caf" @@ -2611,6 +2691,23 @@ once@^1.3.0: dependencies: wrappy "1" +onnxruntime-common@1.18.0: + version "1.18.0" + resolved "https://registry.yarnpkg.com/onnxruntime-common/-/onnxruntime-common-1.18.0.tgz#b904dc6ff134e7f21a3eab702fac17538f59e116" + integrity sha512-lufrSzX6QdKrktAELG5x5VkBpapbCeS3dQwrXbN0eD9rHvU0yAWl7Ztju9FvgAKWvwd/teEKJNj3OwM6eTZh3Q== + +onnxruntime-web@^1.18.0: + version "1.18.0" + resolved "https://registry.yarnpkg.com/onnxruntime-web/-/onnxruntime-web-1.18.0.tgz#cd46268d9472f89697da0a3282f13129f0acbfa0" + integrity sha512-o1UKj4ABIj1gmG7ae0RKJ3/GT+3yoF0RRpfDfeoe0huzRW4FDRLfbkDETmdFAvnJEXuYDE0YT+hhkia0352StQ== + dependencies: + flatbuffers "^1.12.0" + guid-typescript "^1.0.9" + long "^5.2.3" + onnxruntime-common "1.18.0" + platform "^1.3.6" + protobufjs "^7.2.4" + open@^8.4.0: version "8.4.2" resolved "https://registry.yarnpkg.com/open/-/open-8.4.2.tgz#5b5ffe2a8f793dcd2aad73e550cb87b59cb084f9" @@ -2713,6 +2810,11 @@ pirates@^4.0.1: resolved "https://registry.yarnpkg.com/pirates/-/pirates-4.0.6.tgz#3018ae32ecfcff6c29ba2267cbf21166ac1f36b9" integrity sha512-saLsH7WeYYPiD25LDuLRRY/i+6HaPYr6G1OUlN39otzkSTxKnubR9RTxS3/Kk50s1g2JTgFwWQDQyplC5/SHZg== +platform@^1.3.6: + version "1.3.6" + resolved "https://registry.yarnpkg.com/platform/-/platform-1.3.6.tgz#48b4ce983164b209c2d45a107adb31f473a6e7a7" + integrity sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg== + pluralize@^8.0.0: version "8.0.0" resolved "https://registry.yarnpkg.com/pluralize/-/pluralize-8.0.0.tgz#1a6fa16a38d12a1901e0320fa017051c539ce3b1" @@ -2802,6 +2904,24 @@ prop-types@^15.6.2, prop-types@^15.8.1: object-assign "^4.1.1" react-is "^16.13.1" +protobufjs@^7.2.4: + version "7.3.2" + resolved "https://registry.yarnpkg.com/protobufjs/-/protobufjs-7.3.2.tgz#60f3b7624968868f6f739430cfbc8c9370e26df4" + integrity sha512-RXyHaACeqXeqAKGLDl68rQKbmObRsTIn4TYVUUug1KfS47YWCo5MacGITEryugIgZqORCvJWEk4l449POg5Txg== + dependencies: + "@protobufjs/aspromise" "^1.1.2" + "@protobufjs/base64" "^1.1.2" + "@protobufjs/codegen" "^2.0.4" + "@protobufjs/eventemitter" "^1.1.0" + "@protobufjs/fetch" "^1.1.0" + "@protobufjs/float" "^1.0.2" + "@protobufjs/inquire" "^1.1.0" + "@protobufjs/path" "^1.1.2" + "@protobufjs/pool" "^1.1.0" + "@protobufjs/utf8" "^1.1.0" + "@types/node" ">=13.7.0" + long "^5.0.0" + punycode@^2.1.0: version "2.3.1" resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.1.tgz#027422e2faec0b25e1549c3e1bd8309b9133b6e5" @@ -3612,6 +3732,11 @@ unbox-primitive@^1.0.2: has-symbols "^1.0.3" which-boxed-primitive "^1.0.2" +undici-types@~6.13.0: + version "6.13.0" + resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.13.0.tgz#e3e79220ab8c81ed1496b5812471afd7cf075ea5" + integrity sha512-xtFJHudx8S2DSoujjMd1WeWvn7KKWFRESZTMeL1RptAYERu29D6jphMjjY+vn96jvN3kVPDNxU/E13VTaXj6jg== + update-browserslist-db@^1.1.0: version "1.1.0" resolved "https://registry.yarnpkg.com/update-browserslist-db/-/update-browserslist-db-1.1.0.tgz#7ca61c0d8650766090728046e416a8cde682859e" diff --git a/server/endpoints/api/openai/index.js b/server/endpoints/api/openai/index.js index 309575115..cd732f424 100644 --- a/server/endpoints/api/openai/index.js +++ b/server/endpoints/api/openai/index.js @@ -154,6 +154,7 @@ function apiOpenAICompatibleEndpoints(app) { workspace.chatProvider ?? process.env.LLM_PROVIDER ?? "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent("api_sent_chat", { workspaceName: workspace?.name, @@ -180,6 +181,7 @@ function apiOpenAICompatibleEndpoints(app) { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent("api_sent_chat", { workspaceName: workspace?.name, diff --git a/server/endpoints/api/workspace/index.js b/server/endpoints/api/workspace/index.js index 719b73baf..3d4e90fb4 100644 --- a/server/endpoints/api/workspace/index.js +++ b/server/endpoints/api/workspace/index.js @@ -73,6 +73,7 @@ function apiWorkspaceEndpoints(app) { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent("api_workspace_created", { workspaceName: workspace?.name || "Unknown Workspace", @@ -622,6 +623,7 @@ function apiWorkspaceEndpoints(app) { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent("api_sent_chat", { workspaceName: workspace?.name, @@ -745,6 +747,7 @@ function apiWorkspaceEndpoints(app) { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent("api_sent_chat", { workspaceName: workspace?.name, diff --git a/server/endpoints/api/workspaceThread/index.js b/server/endpoints/api/workspaceThread/index.js index a636a85d2..e2c6af1c7 100644 --- a/server/endpoints/api/workspaceThread/index.js +++ b/server/endpoints/api/workspaceThread/index.js @@ -90,6 +90,7 @@ function apiWorkspaceThreadEndpoints(app) { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent("api_workspace_thread_created", { workspaceName: workspace?.name || "Unknown Workspace", @@ -416,6 +417,7 @@ function apiWorkspaceThreadEndpoints(app) { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent("api_sent_chat", { workspaceName: workspace?.name, @@ -567,6 +569,7 @@ function apiWorkspaceThreadEndpoints(app) { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent("api_sent_chat", { workspaceName: workspace?.name, diff --git a/server/endpoints/chat.js b/server/endpoints/chat.js index 787aba574..64beefeb6 100644 --- a/server/endpoints/chat.js +++ b/server/endpoints/chat.js @@ -98,6 +98,7 @@ function chatEndpoints(app) { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", multiModal: Array.isArray(attachments) && attachments?.length !== 0, + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent( @@ -226,6 +227,7 @@ function chatEndpoints(app) { Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", multiModal: Array.isArray(attachments) && attachments?.length !== 0, + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent( diff --git a/server/endpoints/workspaceThreads.js b/server/endpoints/workspaceThreads.js index 42e502278..4e071992b 100644 --- a/server/endpoints/workspaceThreads.js +++ b/server/endpoints/workspaceThreads.js @@ -40,6 +40,7 @@ function workspaceThreadEndpoints(app) { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }, user?.id ); diff --git a/server/endpoints/workspaces.js b/server/endpoints/workspaces.js index 4f523aaaf..43b093679 100644 --- a/server/endpoints/workspaces.js +++ b/server/endpoints/workspaces.js @@ -55,6 +55,7 @@ function workspaceEndpoints(app) { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }, user?.id ); diff --git a/server/models/documents.js b/server/models/documents.js index 80d4fd850..43ec5f9f4 100644 --- a/server/models/documents.js +++ b/server/models/documents.js @@ -142,6 +142,7 @@ const Document = { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent( "workspace_documents_added", @@ -185,6 +186,7 @@ const Document = { LLMSelection: process.env.LLM_PROVIDER || "openai", Embedder: process.env.EMBEDDING_ENGINE || "inherit", VectorDbSelection: process.env.VECTOR_DB || "lancedb", + TTSSelection: process.env.TTS_PROVIDER || "native", }); await EventLogs.logEvent( "workspace_documents_removed", diff --git a/server/models/systemSettings.js b/server/models/systemSettings.js index 216f63ad5..b85f3cb8c 100644 --- a/server/models/systemSettings.js +++ b/server/models/systemSettings.js @@ -209,6 +209,9 @@ const SystemSettings = { // Eleven Labs TTS TTSElevenLabsKey: !!process.env.TTS_ELEVEN_LABS_KEY, TTSElevenLabsVoiceModel: process.env.TTS_ELEVEN_LABS_VOICE_MODEL, + // Piper TTS + TTSPiperTTSVoiceModel: + process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium", // -------------------------------------------------------- // Agent Settings & Configs diff --git a/server/utils/helpers/updateENV.js b/server/utils/helpers/updateENV.js index 85981994d..c579da188 100644 --- a/server/utils/helpers/updateENV.js +++ b/server/utils/helpers/updateENV.js @@ -477,6 +477,12 @@ const KEY_MAPPING = { envKey: "TTS_ELEVEN_LABS_VOICE_MODEL", checks: [], }, + + // PiperTTS Local + TTSPiperTTSVoiceModel: { + envKey: "TTS_PIPER_VOICE_MODEL", + checks: [], + }, }; function isNotEmpty(input = "") { @@ -536,7 +542,12 @@ function validOllamaLLMBasePath(input = "") { } function supportedTTSProvider(input = "") { - const validSelection = ["native", "openai", "elevenlabs"].includes(input); + const validSelection = [ + "native", + "openai", + "elevenlabs", + "piper_local", + ].includes(input); return validSelection ? null : `${input} is not a valid TTS provider.`; }