mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-17 18:18:11 +00:00
[FEAT] add support for new openai embedding models (#653)
* add support for new openai models * QOL changes/improve logic for adding new openai embedding models * add example file inputs for Openai embedding ENV selection; * Fix if stmt conditional --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
5614e2ed30
commit
9d41ff58e2
5 changed files with 37 additions and 12 deletions
docker
frontend/src/components
server
|
@ -54,6 +54,7 @@ GID='1000'
|
|||
# Only used if you are using an LLM that does not natively support embedding (openai or Azure)
|
||||
# EMBEDDING_ENGINE='openai'
|
||||
# OPEN_AI_KEY=sk-xxxx
|
||||
# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
|
||||
|
||||
# EMBEDDING_ENGINE='azure'
|
||||
# AZURE_OPENAI_ENDPOINT=
|
||||
|
|
|
@ -22,12 +22,27 @@ export default function OpenAiOptions({ settings }) {
|
|||
Model Preference
|
||||
</label>
|
||||
<select
|
||||
disabled={true}
|
||||
className="cursor-not-allowed bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
name="EmbeddingModelPref"
|
||||
required={true}
|
||||
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
|
||||
>
|
||||
<option disabled={true} selected={true}>
|
||||
text-embedding-ada-002
|
||||
</option>
|
||||
<optgroup label="Available embedding models">
|
||||
{[
|
||||
"text-embedding-ada-002",
|
||||
"text-embedding-3-small",
|
||||
"text-embedding-3-large",
|
||||
].map((model) => {
|
||||
return (
|
||||
<option
|
||||
key={model}
|
||||
value={model}
|
||||
selected={settings?.EmbeddingModelPref === model}
|
||||
>
|
||||
{model}
|
||||
</option>
|
||||
);
|
||||
})}
|
||||
</optgroup>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -6,9 +6,14 @@ import Directory from "./Directory";
|
|||
import showToast from "../../../../utils/toast";
|
||||
import WorkspaceDirectory from "./WorkspaceDirectory";
|
||||
|
||||
// OpenAI Cost per token for text-ada-embedding
|
||||
// OpenAI Cost per token
|
||||
// ref: https://openai.com/pricing#:~:text=%C2%A0/%201K%20tokens-,Embedding%20models,-Build%20advanced%20search
|
||||
const COST_PER_TOKEN = 0.0000001; // $0.0001 / 1K tokens
|
||||
|
||||
const MODEL_COSTS = {
|
||||
"text-embedding-ada-002": 0.0000001, // $0.0001 / 1K tokens
|
||||
"text-embedding-3-small": 0.00000002, // $0.00002 / 1K tokens
|
||||
"text-embedding-3-large": 0.00000013, // $0.00013 / 1K tokens
|
||||
};
|
||||
|
||||
export default function DocumentSettings({
|
||||
workspace,
|
||||
|
@ -142,10 +147,12 @@ export default function DocumentSettings({
|
|||
});
|
||||
|
||||
// Do not do cost estimation unless the embedding engine is OpenAi.
|
||||
if (
|
||||
!systemSettings?.EmbeddingEngine ||
|
||||
systemSettings.EmbeddingEngine === "openai"
|
||||
) {
|
||||
if (systemSettings?.EmbeddingEngine === "openai") {
|
||||
const COST_PER_TOKEN =
|
||||
MODEL_COSTS[
|
||||
systemSettings?.EmbeddingModelPref || "text-embedding-ada-002"
|
||||
];
|
||||
|
||||
const dollarAmount = (totalTokenCount / 1000) * COST_PER_TOKEN;
|
||||
setEmbeddingsCost(dollarAmount);
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
|
|||
# Only used if you are using an LLM that does not natively support embedding (openai or Azure)
|
||||
# EMBEDDING_ENGINE='openai'
|
||||
# OPEN_AI_KEY=sk-xxxx
|
||||
# EMBEDDING_MODEL_PREF='text-embedding-ada-002'
|
||||
|
||||
# EMBEDDING_ENGINE='azure'
|
||||
# AZURE_OPENAI_ENDPOINT=
|
||||
|
|
|
@ -9,6 +9,7 @@ class OpenAiEmbedder {
|
|||
});
|
||||
const openai = new OpenAIApi(config);
|
||||
this.openai = openai;
|
||||
this.model = process.env.EMBEDDING_MODEL_PREF || "text-embedding-ada-002";
|
||||
|
||||
// Limit of how many strings we can process in a single pass to stay with resource or network limits
|
||||
this.maxConcurrentChunks = 500;
|
||||
|
@ -30,7 +31,7 @@ class OpenAiEmbedder {
|
|||
new Promise((resolve) => {
|
||||
this.openai
|
||||
.createEmbedding({
|
||||
model: "text-embedding-ada-002",
|
||||
model: this.model,
|
||||
input: chunk,
|
||||
})
|
||||
.then((res) => {
|
||||
|
|
Loading…
Add table
Reference in a new issue