Allow use of any embedder for any llm/update data handling modal ()

* allow use of any embedder for any llm/update data handling modal

* Apply embedder override and fallback to OpenAI and Azure models

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Sean Hatfield 2023-11-16 15:19:49 -08:00 committed by GitHub
parent 2c2543b4d7
commit 5ad8a5f2d0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 256 additions and 180 deletions
frontend/src
components/Sidebar
pages
GeneralSettings/EmbeddingPreference
OnboardingFlow/OnboardingModal
Steps
DataHandling
EmbeddingSelection
LLMSelection
index.jsx
server/utils
AiProviders
azureOpenAi
openAi
helpers

View file

@ -278,7 +278,7 @@ function SettingsButton() {
return (
<a
href={
!!user?.role ? paths.settings.system() : paths.settings.appearance()
!!user?.role ? paths.settings.system() : paths.settings.appearance()
}
className="transition-all duration-300 p-2 rounded-full text-white bg-sidebar-button hover:bg-menu-item-selected-gradient hover:border-slate-100 hover:border-opacity-50 border-transparent border"
>

View file

@ -133,161 +133,161 @@ export default function GeneralEmbeddingPreference() {
</p>
</div>
{["openai", "azure"].includes(settings.LLMProvider) ? (
<div className="w-full h-20 items-center justify-center flex">
<p className="text-gray-800 dark:text-slate-400 text-center">
Your current LLM preference does not require you to set up
this part of AnythingLLM.
<br />
Embedding is being automatically managed by AnythingLLM.
</p>
<>
<div className="text-white text-sm font-medium py-4">
Embedding Providers
</div>
) : (
<>
<div className="text-white text-sm font-medium py-4">
Embedding Providers
</div>
<div className="w-full flex md:flex-wrap overflow-x-scroll gap-4 max-w-[900px]">
<input
hidden={true}
name="EmbeddingEngine"
value={embeddingChoice}
/>
<LLMProviderOption
name="OpenAI"
value="openai"
link="openai.com"
description="Use OpenAI's text-embedding-ada-002 embedding model."
checked={embeddingChoice === "openai"}
image={OpenAiLogo}
onClick={updateChoice}
/>
<LLMProviderOption
name="Azure OpenAI"
value="azure"
link="azure.microsoft.com"
description="The enterprise option of OpenAI hosted on Azure services."
checked={embeddingChoice === "azure"}
image={AzureOpenAiLogo}
onClick={updateChoice}
/>
<LLMProviderOption
name="LocalAI"
value="localai"
link="localai.io"
description="Self hosted LocalAI embedding engine."
checked={embeddingChoice === "localai"}
image={LocalAiLogo}
onClick={updateChoice}
/>
</div>
<div className="mt-10 flex flex-wrap gap-4 max-w-[800px]">
{embeddingChoice === "openai" && (
<>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
API Key
</label>
<input
type="text"
name="OpenAiKey"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="OpenAI API Key"
defaultValue={
settings?.OpenAiKey ? "*".repeat(20) : ""
}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
</>
)}
{embeddingChoice === "azure" && (
<>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Azure Service Endpoint
</label>
<input
type="url"
name="AzureOpenAiEndpoint"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="https://my-azure.openai.azure.com"
defaultValue={settings?.AzureOpenAiEndpoint}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
API Key
</label>
<input
type="password"
name="AzureOpenAiKey"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="Azure OpenAI API Key"
defaultValue={
settings?.AzureOpenAiKey ? "*".repeat(20) : ""
}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Embedding Deployment Name
</label>
<input
type="text"
name="AzureOpenAiEmbeddingModelPref"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="Azure OpenAI embedding model deployment name"
defaultValue={
settings?.AzureOpenAiEmbeddingModelPref
}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
</>
)}
{embeddingChoice === "localai" && (
<>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
LocalAI Base URL
</label>
<input
type="url"
name="EmbeddingBasePath"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="http://localhost:8080/v1"
defaultValue={settings?.EmbeddingBasePath}
onChange={(e) => setBasePathValue(e.target.value)}
onBlur={updateBasePath}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<LocalAIModelSelection
settings={settings}
basePath={basePath}
<div className="w-full flex md:flex-wrap overflow-x-scroll gap-4 max-w-[900px]">
<input
hidden={true}
name="EmbeddingEngine"
value={embeddingChoice}
/>
<LLMProviderOption
name="OpenAI"
value="openai"
link="openai.com"
description="Use OpenAI's text-embedding-ada-002 embedding model."
checked={embeddingChoice === "openai"}
image={OpenAiLogo}
onClick={updateChoice}
/>
<LLMProviderOption
name="Azure OpenAI"
value="azure"
link="azure.microsoft.com"
description="The enterprise option of OpenAI hosted on Azure services."
checked={embeddingChoice === "azure"}
image={AzureOpenAiLogo}
onClick={updateChoice}
/>
<LLMProviderOption
name="LocalAI"
value="localai"
link="localai.io"
description="Self hosted LocalAI embedding engine."
checked={embeddingChoice === "localai"}
image={LocalAiLogo}
onClick={updateChoice}
/>
</div>
<div className="mt-10 flex flex-wrap gap-4 max-w-[800px]">
{embeddingChoice === "openai" && (
<>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
API Key
</label>
<input
type="text"
name="OpenAiKey"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="OpenAI API Key"
defaultValue={
settings?.OpenAiKey ? "*".repeat(20) : ""
}
required={true}
autoComplete="off"
spellCheck={false}
/>
</>
)}
</div>
</>
)}
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Model Preference
</label>
<select
disabled={true}
className="cursor-not-allowed bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
text-embedding-ada-002
</option>
</select>
</div>
</>
)}
{embeddingChoice === "azure" && (
<>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Azure Service Endpoint
</label>
<input
type="url"
name="AzureOpenAiEndpoint"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="https://my-azure.openai.azure.com"
defaultValue={settings?.AzureOpenAiEndpoint}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
API Key
</label>
<input
type="password"
name="AzureOpenAiKey"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="Azure OpenAI API Key"
defaultValue={
settings?.AzureOpenAiKey ? "*".repeat(20) : ""
}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Embedding Deployment Name
</label>
<input
type="text"
name="AzureOpenAiEmbeddingModelPref"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="Azure OpenAI embedding model deployment name"
defaultValue={settings?.AzureOpenAiEmbeddingModelPref}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
</>
)}
{embeddingChoice === "localai" && (
<>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
LocalAI Base URL
</label>
<input
type="url"
name="EmbeddingBasePath"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="http://localhost:8080/v1"
defaultValue={settings?.EmbeddingBasePath}
onChange={(e) => setBasePathValue(e.target.value)}
onBlur={updateBasePath}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<LocalAIModelSelection
settings={settings}
basePath={basePath}
/>
</>
)}
</div>
</>
</div>
</form>
</div>

View file

@ -97,16 +97,44 @@ const VECTOR_DB_PRIVACY = {
},
};
const EMBEDDING_ENGINE_PRIVACY = {
openai: {
name: "OpenAI",
description: [
"Your documents are visible to OpenAI",
"Your documents are not used for training",
],
logo: OpenAiLogo,
},
azure: {
name: "Azure OpenAI",
description: [
"Your documents are not visible to OpenAI or Microsoft",
"Your documents not used for training",
],
logo: AzureOpenAiLogo,
},
localai: {
name: "LocalAI",
description: [
"Your documents are only accessible on the server running LocalAI",
],
logo: LocalAiLogo,
},
};
function DataHandling({ nextStep, prevStep, currentStep }) {
const [llmChoice, setLLMChoice] = useState("openai");
const [loading, setLoading] = useState(true);
const [vectorDb, setVectorDb] = useState("pinecone");
const [embeddingEngine, setEmbeddingEngine] = useState("openai");
useEffect(() => {
async function fetchKeys() {
const _settings = await System.keys();
setLLMChoice(_settings?.LLMProvider);
setVectorDb(_settings?.VectorDB);
setEmbeddingEngine(_settings?.EmbeddingEngine);
setLoading(false);
}
@ -124,8 +152,8 @@ function DataHandling({ nextStep, prevStep, currentStep }) {
return (
<div className="max-w-[750px]">
<div className="p-8 flex gap-x-16">
<div className="w-1/2 flex flex-col gap-y-3.5">
<div className="p-8 flex flex-col gap-8">
<div className="flex flex-col gap-y-3.5 border-b border-zinc-500/50 pb-8">
<div className="text-white text-base font-bold">LLM Selection</div>
<div className="flex items-center gap-2.5">
<img
@ -146,7 +174,28 @@ function DataHandling({ nextStep, prevStep, currentStep }) {
</div>
</div>
<div className="w-1/2 flex flex-col gap-y-3.5">
<div className="flex flex-col gap-y-3.5 border-b border-zinc-500/50 pb-8">
<div className="text-white text-base font-bold">Embedding Engine</div>
<div className="flex items-center gap-2.5">
<img
src={EMBEDDING_ENGINE_PRIVACY[embeddingEngine].logo}
alt="Vector DB Logo"
className="w-8 h-8 rounded"
/>
<p className="text-white text-sm font-bold">
{EMBEDDING_ENGINE_PRIVACY[embeddingEngine].name}
</p>
</div>
<ul className="flex flex-col list-disc">
{EMBEDDING_ENGINE_PRIVACY[embeddingEngine].description.map(
(desc) => (
<li className="text-white/90 text-sm">{desc}</li>
)
)}
</ul>
</div>
<div className="flex flex-col gap-y-3.5 ">
<div className="text-white text-base font-bold">Vector Database</div>
<div className="flex items-center gap-2.5">
<img

View file

@ -113,6 +113,19 @@ function EmbeddingSelection({ nextStep, prevStep, currentStep }) {
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Model Preference
</label>
<select
disabled={true}
className="cursor-not-allowed bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
text-embedding-ada-002
</option>
</select>
</div>
</>
)}

View file

@ -46,15 +46,7 @@ function LLMSelection({ nextStep, prevStep, currentStep }) {
alert(`Failed to save LLM settings: ${error}`, "error");
return;
}
switch (data.LLMProvider) {
case "anthropic":
case "lmstudio":
case "localai":
return nextStep("embedding_preferences");
default:
return nextStep("vector_database");
}
nextStep("embedding_preferences");
};
if (loading)

View file

@ -65,8 +65,7 @@ const STEPS = {
},
embedding_preferences: {
title: "Embedding Preference",
description:
"Due to your LLM selection you need to set up a provider for embedding files and text.",
description: "Choose a provider for embedding files and text.",
component: EmbeddingSelection,
},
};

View file

@ -1,9 +1,8 @@
const { AzureOpenAiEmbedder } = require("../../EmbeddingEngines/azureOpenAi");
const { chatPrompt } = require("../../chats");
class AzureOpenAiLLM extends AzureOpenAiEmbedder {
constructor() {
super();
class AzureOpenAiLLM {
constructor(embedder = null) {
const { OpenAIClient, AzureKeyCredential } = require("@azure/openai");
if (!process.env.AZURE_OPENAI_ENDPOINT)
throw new Error("No Azure API endpoint was set.");
@ -20,6 +19,12 @@ class AzureOpenAiLLM extends AzureOpenAiEmbedder {
system: this.promptWindowLimit() * 0.15,
user: this.promptWindowLimit() * 0.7,
};
if (!embedder)
console.warn(
"No embedding provider defined for AzureOpenAiLLM - falling back to AzureOpenAiEmbedder for embedding!"
);
this.embedder = !embedder ? new AzureOpenAiEmbedder() : embedder;
}
streamingEnabled() {
@ -114,6 +119,14 @@ Context:
return data.choices[0].message.content;
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);
}
async embedChunks(textChunks = []) {
return await this.embedder.embedChunks(textChunks);
}
async compressMessages(promptArgs = {}, rawHistory = []) {
const { messageArrayCompressor } = require("../../helpers/chat");
const messageArray = this.constructPrompt(promptArgs);

View file

@ -1,9 +1,8 @@
const { OpenAiEmbedder } = require("../../EmbeddingEngines/openAi");
const { chatPrompt } = require("../../chats");
class OpenAiLLM extends OpenAiEmbedder {
constructor() {
super();
class OpenAiLLM {
constructor(embedder = null) {
const { Configuration, OpenAIApi } = require("openai");
if (!process.env.OPEN_AI_KEY) throw new Error("No OpenAI API key was set.");
@ -17,6 +16,12 @@ class OpenAiLLM extends OpenAiEmbedder {
system: this.promptWindowLimit() * 0.15,
user: this.promptWindowLimit() * 0.7,
};
if (!embedder)
console.warn(
"No embedding provider defined for OpenAiLLM - falling back to OpenAiEmbedder for embedding!"
);
this.embedder = !embedder ? new OpenAiEmbedder() : embedder;
}
streamingEnabled() {
@ -203,6 +208,14 @@ Context:
return streamRequest;
}
// Simple wrapper for dynamic embedder & normalize interface for all LLM implementations
async embedTextInput(textInput) {
return await this.embedder.embedTextInput(textInput);
}
async embedChunks(textChunks = []) {
return await this.embedder.embedChunks(textChunks);
}
async compressMessages(promptArgs = {}, rawHistory = []) {
const { messageArrayCompressor } = require("../../helpers/chat");
const messageArray = this.constructPrompt(promptArgs);

View file

@ -23,25 +23,22 @@ function getVectorDbClass() {
function getLLMProvider() {
const vectorSelection = process.env.LLM_PROVIDER || "openai";
let embedder = null;
const embedder = getEmbeddingEngineSelection();
switch (vectorSelection) {
case "openai":
const { OpenAiLLM } = require("../AiProviders/openAi");
return new OpenAiLLM();
return new OpenAiLLM(embedder);
case "azure":
const { AzureOpenAiLLM } = require("../AiProviders/azureOpenAi");
return new AzureOpenAiLLM();
return new AzureOpenAiLLM(embedder);
case "anthropic":
const { AnthropicLLM } = require("../AiProviders/anthropic");
embedder = getEmbeddingEngineSelection();
return new AnthropicLLM(embedder);
case "lmstudio":
const { LMStudioLLM } = require("../AiProviders/lmStudio");
embedder = getEmbeddingEngineSelection();
return new LMStudioLLM(embedder);
case "localai":
const { LocalAiLLM } = require("../AiProviders/localAi");
embedder = getEmbeddingEngineSelection();
return new LocalAiLLM(embedder);
default:
throw new Error("ENV: No LLM_PROVIDER value found in environment!");