anything-llm/server/utils/helpers/updateENV.js
Hakeem Abbas 5614e2ed30
feature: Integrate Astra as vectorDBProvider ()
* feature: Integrate Astra as vectorDBProvider

feature: Integrate Astra as vectorDBProvider

* Update .env.example

* Add env.example to docker example file
Update spellcheck fo Astra
Update Astra key for vector selection
Update order of AstraDB options
Resize Astra logo image to 330x330
Update methods of Astra to take in latest vectorDB params like TopN and more
Update Astra interface to support default methods and avoid crash errors from 404 collections
Update Astra interface to comply to max chunk insertion limitations
Update Astra interface to dynamically set dimensionality from chunk 0 size on creation

* reset workspaces

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
2024-01-26 13:07:53 -08:00

489 lines
12 KiB
JavaScript

const KEY_MAPPING = {
LLMProvider: {
envKey: "LLM_PROVIDER",
checks: [isNotEmpty, supportedLLM],
postUpdate: [wipeWorkspaceModelPreference],
},
// OpenAI Settings
OpenAiKey: {
envKey: "OPEN_AI_KEY",
checks: [isNotEmpty, validOpenAIKey],
},
OpenAiModelPref: {
envKey: "OPEN_MODEL_PREF",
checks: [isNotEmpty],
},
// Azure OpenAI Settings
AzureOpenAiEndpoint: {
envKey: "AZURE_OPENAI_ENDPOINT",
checks: [isNotEmpty, validAzureURL],
},
AzureOpenAiTokenLimit: {
envKey: "AZURE_OPENAI_TOKEN_LIMIT",
checks: [validOpenAiTokenLimit],
},
AzureOpenAiKey: {
envKey: "AZURE_OPENAI_KEY",
checks: [isNotEmpty],
},
AzureOpenAiModelPref: {
envKey: "OPEN_MODEL_PREF",
checks: [isNotEmpty],
},
AzureOpenAiEmbeddingModelPref: {
envKey: "EMBEDDING_MODEL_PREF",
checks: [isNotEmpty],
},
// Anthropic Settings
AnthropicApiKey: {
envKey: "ANTHROPIC_API_KEY",
checks: [isNotEmpty, validAnthropicApiKey],
},
AnthropicModelPref: {
envKey: "ANTHROPIC_MODEL_PREF",
checks: [isNotEmpty, validAnthropicModel],
},
GeminiLLMApiKey: {
envKey: "GEMINI_API_KEY",
checks: [isNotEmpty],
},
GeminiLLMModelPref: {
envKey: "GEMINI_LLM_MODEL_PREF",
checks: [isNotEmpty, validGeminiModel],
},
// LMStudio Settings
LMStudioBasePath: {
envKey: "LMSTUDIO_BASE_PATH",
checks: [isNotEmpty, validLLMExternalBasePath, validDockerizedUrl],
},
LMStudioTokenLimit: {
envKey: "LMSTUDIO_MODEL_TOKEN_LIMIT",
checks: [nonZero],
},
// LocalAI Settings
LocalAiBasePath: {
envKey: "LOCAL_AI_BASE_PATH",
checks: [isNotEmpty, validLLMExternalBasePath, validDockerizedUrl],
},
LocalAiModelPref: {
envKey: "LOCAL_AI_MODEL_PREF",
checks: [],
},
LocalAiTokenLimit: {
envKey: "LOCAL_AI_MODEL_TOKEN_LIMIT",
checks: [nonZero],
},
LocalAiApiKey: {
envKey: "LOCAL_AI_API_KEY",
checks: [],
},
OllamaLLMBasePath: {
envKey: "OLLAMA_BASE_PATH",
checks: [isNotEmpty, validOllamaLLMBasePath, validDockerizedUrl],
},
OllamaLLMModelPref: {
envKey: "OLLAMA_MODEL_PREF",
checks: [],
},
OllamaLLMTokenLimit: {
envKey: "OLLAMA_MODEL_TOKEN_LIMIT",
checks: [nonZero],
},
MistralApiKey: {
envKey: "MISTRAL_API_KEY",
checks: [isNotEmpty],
},
MistralModelPref: {
envKey: "MISTRAL_MODEL_PREF",
checks: [isNotEmpty],
},
// Native LLM Settings
NativeLLMModelPref: {
envKey: "NATIVE_LLM_MODEL_PREF",
checks: [isDownloadedModel],
},
NativeLLMTokenLimit: {
envKey: "NATIVE_LLM_MODEL_TOKEN_LIMIT",
checks: [nonZero],
},
EmbeddingEngine: {
envKey: "EMBEDDING_ENGINE",
checks: [supportedEmbeddingModel],
},
EmbeddingBasePath: {
envKey: "EMBEDDING_BASE_PATH",
checks: [isNotEmpty, validLLMExternalBasePath, validDockerizedUrl],
},
EmbeddingModelPref: {
envKey: "EMBEDDING_MODEL_PREF",
checks: [isNotEmpty],
},
EmbeddingModelMaxChunkLength: {
envKey: "EMBEDDING_MODEL_MAX_CHUNK_LENGTH",
checks: [nonZero],
},
// Vector Database Selection Settings
VectorDB: {
envKey: "VECTOR_DB",
checks: [isNotEmpty, supportedVectorDB],
},
// Chroma Options
ChromaEndpoint: {
envKey: "CHROMA_ENDPOINT",
checks: [isValidURL, validChromaURL, validDockerizedUrl],
},
ChromaApiHeader: {
envKey: "CHROMA_API_HEADER",
checks: [],
},
ChromaApiKey: {
envKey: "CHROMA_API_KEY",
checks: [],
},
// Weaviate Options
WeaviateEndpoint: {
envKey: "WEAVIATE_ENDPOINT",
checks: [isValidURL, validDockerizedUrl],
},
WeaviateApiKey: {
envKey: "WEAVIATE_API_KEY",
checks: [],
},
// QDrant Options
QdrantEndpoint: {
envKey: "QDRANT_ENDPOINT",
checks: [isValidURL, validDockerizedUrl],
},
QdrantApiKey: {
envKey: "QDRANT_API_KEY",
checks: [],
},
PineConeKey: {
envKey: "PINECONE_API_KEY",
checks: [],
},
PineConeIndex: {
envKey: "PINECONE_INDEX",
checks: [],
},
// Milvus Options
MilvusAddress: {
envKey: "MILVUS_ADDRESS",
checks: [isValidURL, validDockerizedUrl],
},
MilvusUsername: {
envKey: "MILVUS_USERNAME",
checks: [isNotEmpty],
},
MilvusPassword: {
envKey: "MILVUS_PASSWORD",
checks: [isNotEmpty],
},
// Zilliz Cloud Options
ZillizEndpoint: {
envKey: "ZILLIZ_ENDPOINT",
checks: [isValidURL],
},
ZillizApiToken: {
envKey: "ZILLIZ_API_TOKEN",
checks: [isNotEmpty],
},
// Astra DB Options
AstraDBApplicationToken: {
envKey: "ASTRA_DB_APPLICATION_TOKEN",
checks: [isNotEmpty],
},
AstraDBEndpoint: {
envKey: "ASTRA_DB_ENDPOINT",
checks: [isNotEmpty],
},
// Together Ai Options
TogetherAiApiKey: {
envKey: "TOGETHER_AI_API_KEY",
checks: [isNotEmpty],
},
TogetherAiModelPref: {
envKey: "TOGETHER_AI_MODEL_PREF",
checks: [isNotEmpty],
},
// System Settings
AuthToken: {
envKey: "AUTH_TOKEN",
checks: [requiresForceMode],
},
JWTSecret: {
envKey: "JWT_SECRET",
checks: [requiresForceMode],
},
};
function isNotEmpty(input = "") {
return !input || input.length === 0 ? "Value cannot be empty" : null;
}
function nonZero(input = "") {
if (isNaN(Number(input))) return "Value must be a number";
return Number(input) <= 0 ? "Value must be greater than zero" : null;
}
function isValidURL(input = "") {
try {
new URL(input);
return null;
} catch (e) {
return "URL is not a valid URL.";
}
}
function validOpenAIKey(input = "") {
return input.startsWith("sk-") ? null : "OpenAI Key must start with sk-";
}
function validAnthropicApiKey(input = "") {
return input.startsWith("sk-ant-")
? null
: "Anthropic Key must start with sk-ant-";
}
function validLLMExternalBasePath(input = "") {
try {
new URL(input);
if (!input.includes("v1")) return "URL must include /v1";
if (input.split("").slice(-1)?.[0] === "/")
return "URL cannot end with a slash";
return null;
} catch {
return "Not a valid URL";
}
}
function validOllamaLLMBasePath(input = "") {
try {
new URL(input);
if (input.split("").slice(-1)?.[0] === "/")
return "URL cannot end with a slash";
return null;
} catch {
return "Not a valid URL";
}
}
function supportedLLM(input = "") {
const validSelection = [
"openai",
"azure",
"anthropic",
"gemini",
"lmstudio",
"localai",
"ollama",
"native",
"togetherai",
"mistral",
].includes(input);
return validSelection ? null : `${input} is not a valid LLM provider.`;
}
function validGeminiModel(input = "") {
const validModels = ["gemini-pro"];
return validModels.includes(input)
? null
: `Invalid Model type. Must be one of ${validModels.join(", ")}.`;
}
function validAnthropicModel(input = "") {
const validModels = ["claude-2", "claude-instant-1"];
return validModels.includes(input)
? null
: `Invalid Model type. Must be one of ${validModels.join(", ")}.`;
}
function supportedEmbeddingModel(input = "") {
const supported = ["openai", "azure", "localai", "native"];
return supported.includes(input)
? null
: `Invalid Embedding model type. Must be one of ${supported.join(", ")}.`;
}
function supportedVectorDB(input = "") {
const supported = [
"chroma",
"pinecone",
"lancedb",
"weaviate",
"qdrant",
"milvus",
"zilliz",
"astra",
];
return supported.includes(input)
? null
: `Invalid VectorDB type. Must be one of ${supported.join(", ")}.`;
}
function validChromaURL(input = "") {
return input.slice(-1) === "/"
? `Chroma Instance URL should not end in a trailing slash.`
: null;
}
function validAzureURL(input = "") {
try {
new URL(input);
if (!input.includes("openai.azure.com"))
return "URL must include openai.azure.com";
return null;
} catch {
return "Not a valid URL";
}
}
function validOpenAiTokenLimit(input = "") {
const tokenLimit = Number(input);
if (isNaN(tokenLimit)) return "Token limit is not a number";
if (![4_096, 16_384, 8_192, 32_768, 128_000].includes(tokenLimit))
return "Invalid OpenAI token limit.";
return null;
}
function requiresForceMode(_, forceModeEnabled = false) {
return forceModeEnabled === true ? null : "Cannot set this setting.";
}
function isDownloadedModel(input = "") {
const fs = require("fs");
const path = require("path");
const storageDir = path.resolve(
process.env.STORAGE_DIR
? path.resolve(process.env.STORAGE_DIR, "models", "downloaded")
: path.resolve(__dirname, `../../storage/models/downloaded`)
);
if (!fs.existsSync(storageDir)) return false;
const files = fs
.readdirSync(storageDir)
.filter((file) => file.includes(".gguf"));
return files.includes(input);
}
function validDockerizedUrl(input = "") {
if (process.env.ANYTHING_LLM_RUNTIME !== "docker") return null;
try {
const { hostname } = new URL(input);
if (["localhost", "127.0.0.1", "0.0.0.0"].includes(hostname.toLowerCase()))
return "Localhost, 127.0.0.1, or 0.0.0.0 origins cannot be reached from inside the AnythingLLM container. Please use host.docker.internal, a real machine ip, or domain to connect to your service.";
return null;
} catch {}
return null;
}
// If the LLMProvider has changed we need to reset all workspace model preferences to
// null since the provider<>model name combination will be invalid for whatever the new
// provider is.
async function wipeWorkspaceModelPreference(key, prev, next) {
if (prev === next) return;
const { Workspace } = require("../../models/workspace");
await Workspace.resetWorkspaceChatModels();
}
// This will force update .env variables which for any which reason were not able to be parsed or
// read from an ENV file as this seems to be a complicating step for many so allowing people to write
// to the process will at least alleviate that issue. It does not perform comprehensive validity checks or sanity checks
// and is simply for debugging when the .env not found issue many come across.
async function updateENV(newENVs = {}, force = false) {
let error = "";
const validKeys = Object.keys(KEY_MAPPING);
const ENV_KEYS = Object.keys(newENVs).filter(
(key) => validKeys.includes(key) && !newENVs[key].includes("******") // strip out answers where the value is all asterisks
);
const newValues = {};
for (const key of ENV_KEYS) {
const { envKey, checks, postUpdate = [] } = KEY_MAPPING[key];
const prevValue = process.env[envKey];
const nextValue = newENVs[key];
const errors = checks
.map((validityCheck) => validityCheck(nextValue, force))
.filter((err) => typeof err === "string");
if (errors.length > 0) {
error += errors.join("\n");
break;
}
newValues[key] = nextValue;
process.env[envKey] = nextValue;
for (const postUpdateFunc of postUpdate)
await postUpdateFunc(key, prevValue, nextValue);
}
return { newValues, error: error?.length > 0 ? error : false };
}
async function dumpENV() {
const fs = require("fs");
const path = require("path");
const frozenEnvs = {};
const protectedKeys = [
...Object.values(KEY_MAPPING).map((values) => values.envKey),
"STORAGE_DIR",
"SERVER_PORT",
// Password Schema Keys if present.
"PASSWORDMINCHAR",
"PASSWORDMAXCHAR",
"PASSWORDLOWERCASE",
"PASSWORDUPPERCASE",
"PASSWORDNUMERIC",
"PASSWORDSYMBOL",
"PASSWORDREQUIREMENTS",
// HTTPS SETUP KEYS
"ENABLE_HTTPS",
"HTTPS_CERT_PATH",
"HTTPS_KEY_PATH",
// DISABLED TELEMETRY
"DISABLE_TELEMETRY",
];
for (const key of protectedKeys) {
const envValue = process.env?.[key] || null;
if (!envValue) continue;
frozenEnvs[key] = process.env?.[key] || null;
}
var envResult = `# Auto-dump ENV from system call on ${new Date().toTimeString()}\n`;
envResult += Object.entries(frozenEnvs)
.map(([key, value]) => {
return `${key}='${value}'`;
})
.join("\n");
const envPath = path.join(__dirname, "../../.env");
fs.writeFileSync(envPath, envResult, { encoding: "utf8", flag: "w" });
return true;
}
module.exports = {
dumpENV,
updateENV,
};