Merge branch 'master' of github.com:Mintplex-Labs/anything-llm

This commit is contained in:
timothycarambat 2024-12-16 13:16:51 -08:00
commit 15abc3f803
13 changed files with 437 additions and 48 deletions
frontend/src
components/ChangeWarning
pages/GeneralSettings
EmbeddingPreference
VectorDatabase
server
endpoints/api/admin
models
swagger
utils
AiProviders/genericOpenAi
files
helpers
vectorStore

View file

@ -1,4 +1,4 @@
import { Warning } from "@phosphor-icons/react";
import { Warning, X } from "@phosphor-icons/react";
export default function ChangeWarningModal({
warningText = "",
@ -6,41 +6,55 @@ export default function ChangeWarningModal({
onConfirm,
}) {
return (
<div className="relative w-full max-w-2xl max-h-full">
<div className="relative bg-main-gradient rounded-lg shadow">
<div className="flex items-start justify-between p-4 border-b rounded-t border-gray-500/50">
<div className="flex items-center gap-2">
<Warning
className="text-yellow-300 text-lg w-6 h-6"
weight="fill"
/>
<h3 className="text-xl font-semibold text-yellow-300">Warning</h3>
</div>
<div className="w-full max-w-2xl bg-theme-bg-secondary rounded-lg shadow border-2 border-theme-modal-border overflow-hidden z-9999">
<div className="relative p-6 border-b rounded-t border-theme-modal-border">
<div className="w-full flex gap-x-2 items-center">
<Warning className="text-red-500 w-6 h-6" weight="fill" />
<h3 className="text-xl font-semibold text-red-500 overflow-hidden overflow-ellipsis whitespace-nowrap">
WARNING - This action is irreversible
</h3>
</div>
<div className="w-[550px] p-6 text-white">
<p>
{warningText}
<button
onClick={onClose}
type="button"
className="absolute top-4 right-4 transition-all duration-300 bg-transparent rounded-lg text-sm p-1 inline-flex items-center hover:bg-theme-modal-border hover:border-theme-modal-border hover:border-opacity-50 border-transparent border"
>
<X size={24} weight="bold" className="text-white" />
</button>
</div>
<div
className="h-full w-full overflow-y-auto"
style={{ maxHeight: "calc(100vh - 200px)" }}
>
<div className="py-7 px-9 space-y-2 flex-col">
<p className="text-white">
{warningText.split("\\n").map((line, index) => (
<span key={index}>
{line}
<br />
</span>
))}
<br />
<br />
Are you sure you want to proceed?
</p>
</div>
<div className="flex w-full justify-between items-center p-6 space-x-2 border-t rounded-b border-gray-500/50">
<button
onClick={onClose}
type="button"
className="px-4 py-2 rounded-lg text-white hover:bg-red-500 transition-all duration-300"
>
Cancel
</button>
<button
onClick={onConfirm}
className="transition-all duration-300 border border-slate-200 px-4 py-2 rounded-lg text-white text-sm items-center flex gap-x-2 hover:bg-slate-200 hover:text-slate-800 focus:ring-gray-800"
>
Confirm
</button>
</div>
</div>
<div className="flex w-full justify-end items-center p-6 space-x-2 border-t border-theme-modal-border rounded-b">
<button
onClick={onClose}
type="button"
className="transition-all duration-300 bg-transparent text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Cancel
</button>
<button
onClick={onConfirm}
type="submit"
className="transition-all duration-300 bg-red-500 light:text-white text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
>
Confirm
</button>
</div>
</div>
);

View file

@ -361,7 +361,7 @@ export default function GeneralEmbeddingPreference() {
)}
<ModalWrapper isOpen={isOpen}>
<ChangeWarningModal
warningText="Switching the embedding model will break previously embedded documents from working during chat. They will need to un-embed from every workspace and fully removed and re-uploaded so they can be embed by the new embedding model."
warningText="Switching the embedding model will reset all previously embedded documents in all workspaces.\n\nConfirming will clear all embeddings from your vector database and remove all documents from your workspaces. Your uploaded documents will not be deleted, they will be available for re-embedding."
onClose={closeModal}
onConfirm={handleSaveSettings}
/>

View file

@ -308,7 +308,7 @@ export default function GeneralVectorDatabase() {
)}
<ModalWrapper isOpen={isOpen}>
<ChangeWarningModal
warningText="Switching the vector database will ignore previously embedded documents and future similarity search results. They will need to be re-added to each workspace."
warningText="Switching the vector database will reset all previously embedded documents in all workspaces.\n\nConfirming will clear all embeddings from your vector database and remove all documents from your workspaces. Your uploaded documents will not be deleted, they will be available for re-embedding."
onClose={closeModal}
onConfirm={handleSaveSettings}
/>

View file

@ -4,6 +4,7 @@ const { SystemSettings } = require("../../../models/systemSettings");
const { User } = require("../../../models/user");
const { Workspace } = require("../../../models/workspace");
const { WorkspaceChats } = require("../../../models/workspaceChats");
const { WorkspaceUser } = require("../../../models/workspaceUsers");
const { canModifyAdmin } = require("../../../utils/helpers/admin");
const { multiUserMode, reqBody } = require("../../../utils/http");
const { validApiKey } = require("../../../utils/middleware/validApiKey");
@ -420,6 +421,7 @@ function apiAdminEndpoints(app) {
}
}
);
app.get(
"/v1/admin/workspaces/:workspaceId/users",
[validApiKey],
@ -474,12 +476,14 @@ function apiAdminEndpoints(app) {
}
}
);
app.post(
"/v1/admin/workspaces/:workspaceId/update-users",
[validApiKey],
async (request, response) => {
/*
#swagger.tags = ['Admin']
#swagger.deprecated = true
#swagger.parameters['workspaceId'] = {
in: 'path',
description: 'id of the workspace in the database.',
@ -539,6 +543,130 @@ function apiAdminEndpoints(app) {
}
}
);
app.post(
"/v1/admin/workspaces/:workspaceSlug/manage-users",
[validApiKey],
async (request, response) => {
/*
#swagger.tags = ['Admin']
#swagger.parameters['workspaceSlug'] = {
in: 'path',
description: 'slug of the workspace in the database',
required: true,
type: 'string'
}
#swagger.description = 'Set workspace permissions to be accessible by the given user ids and admins. Methods are disabled until multi user mode is enabled via the UI.'
#swagger.requestBody = {
description: 'Array of user ids who will be given access to the target workspace. <code>reset</code> will remove all existing users from the workspace and only add the new users - default <code>false</code>.',
required: true,
content: {
"application/json": {
example: {
userIds: [1,2,4,12],
reset: false
}
}
}
}
#swagger.responses[200] = {
content: {
"application/json": {
schema: {
type: 'object',
example: {
success: true,
error: null,
users: [
{"userId": 1, "username": "main-admin", "role": "admin"},
{"userId": 2, "username": "sample-sam", "role": "default"}
]
}
}
}
}
}
#swagger.responses[403] = {
schema: {
"$ref": "#/definitions/InvalidAPIKey"
}
}
#swagger.responses[401] = {
description: "Instance is not in Multi-User mode. Method denied",
}
*/
try {
if (!multiUserMode(response)) {
response.sendStatus(401).end();
return;
}
const { workspaceSlug } = request.params;
const { userIds: _uids, reset = false } = reqBody(request);
const userIds = (
await User.where({ id: { in: _uids.map(Number) } })
).map((user) => user.id);
const workspace = await Workspace.get({ slug: String(workspaceSlug) });
const workspaceUsers = await Workspace.workspaceUsers(workspace.id);
if (!workspace) {
response
.status(404)
.json({
success: false,
error: `Workspace ${workspaceSlug} not found`,
users: workspaceUsers,
});
return;
}
if (userIds.length === 0) {
response
.status(404)
.json({
success: false,
error: `No valid user IDs provided.`,
users: workspaceUsers,
});
return;
}
// Reset all users in the workspace and add the new users as the only users in the workspace
if (reset) {
const { success, error } = await Workspace.updateUsers(
workspace.id,
userIds
);
return response
.status(200)
.json({
success,
error,
users: await Workspace.workspaceUsers(workspace.id),
});
}
// Add new users to the workspace if they are not already in the workspace
const existingUserIds = workspaceUsers.map((user) => user.userId);
const usersToAdd = userIds.filter(
(userId) => !existingUserIds.includes(userId)
);
if (usersToAdd.length > 0)
await WorkspaceUser.createManyUsers(usersToAdd, workspace.id);
response
.status(200)
.json({
success: true,
error: null,
users: await Workspace.workspaceUsers(workspace.id),
});
} catch (e) {
console.error(e);
response.sendStatus(500).end();
}
}
);
app.post(
"/v1/admin/workspace-chats",
[validApiKey],

View file

@ -25,6 +25,19 @@ const DocumentVectors = {
}
},
where: async function (clause = {}, limit) {
try {
const results = await prisma.document_vectors.findMany({
where: clause,
take: limit || undefined,
});
return results;
} catch (error) {
console.error("Where query failed", error);
return [];
}
},
deleteForWorkspace: async function (workspaceId) {
const documents = await Document.forWorkspace(workspaceId);
const docIds = [...new Set(documents.map((doc) => doc.docId))];
@ -40,19 +53,6 @@ const DocumentVectors = {
}
},
where: async function (clause = {}, limit) {
try {
const results = await prisma.document_vectors.findMany({
where: clause,
take: limit || undefined,
});
return results;
} catch (error) {
console.error("Where query failed", error);
return [];
}
},
deleteIds: async function (ids = []) {
try {
await prisma.document_vectors.deleteMany({
@ -64,6 +64,16 @@ const DocumentVectors = {
return false;
}
},
delete: async function (clause = {}) {
try {
await prisma.document_vectors.deleteMany({ where: clause });
return true;
} catch (error) {
console.error("Delete failed", error);
return false;
}
},
};
module.exports = { DocumentVectors };

View file

@ -243,6 +243,11 @@ const Workspace = {
}
},
/**
* Get all users for a workspace.
* @param {number} workspaceId - The ID of the workspace to get users for.
* @returns {Promise<Array<{userId: number, username: string, role: string}>>} A promise that resolves to an array of user objects.
*/
workspaceUsers: async function (workspaceId) {
try {
const users = (
@ -270,6 +275,12 @@ const Workspace = {
}
},
/**
* Update the users for a workspace. Will remove all existing users and replace them with the new list.
* @param {number} workspaceId - The ID of the workspace to update.
* @param {number[]} userIds - An array of user IDs to add to the workspace.
* @returns {Promise<{success: boolean, error: string | null}>} A promise that resolves to an object containing the success status and an error message if applicable.
*/
updateUsers: async function (workspaceId, userIds = []) {
try {
await WorkspaceUser.delete({ workspace_id: Number(workspaceId) });

View file

@ -17,6 +17,12 @@ const WorkspaceUser = {
return;
},
/**
* Create many workspace users.
* @param {Array<number>} userIds - An array of user IDs to create workspace users for.
* @param {number} workspaceId - The ID of the workspace to create workspace users for.
* @returns {Promise<void>} A promise that resolves when the workspace users are created.
*/
createManyUsers: async function (userIds = [], workspaceId) {
if (userIds.length === 0) return;
try {

View file

@ -635,6 +635,95 @@
}
}
}
},
"deprecated": true
}
},
"/v1/admin/workspaces/{workspaceSlug}/manage-users": {
"post": {
"tags": [
"Admin"
],
"description": "Set workspace permissions to be accessible by the given user ids and admins. Methods are disabled until multi user mode is enabled via the UI.",
"parameters": [
{
"name": "workspaceSlug",
"in": "path",
"required": true,
"schema": {
"type": "string"
},
"description": "slug of the workspace in the database"
}
],
"responses": {
"200": {
"description": "OK",
"content": {
"application/json": {
"schema": {
"type": "object",
"example": {
"success": true,
"error": null,
"users": [
{
"userId": 1,
"username": "main-admin",
"role": "admin"
},
{
"userId": 2,
"username": "sample-sam",
"role": "default"
}
]
}
}
}
}
},
"401": {
"description": "Instance is not in Multi-User mode. Method denied"
},
"403": {
"description": "Forbidden",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/InvalidAPIKey"
}
},
"application/xml": {
"schema": {
"$ref": "#/components/schemas/InvalidAPIKey"
}
}
}
},
"404": {
"description": "Not Found"
},
"500": {
"description": "Internal Server Error"
}
},
"requestBody": {
"description": "Array of user ids who will be given access to the target workspace. <code>reset</code> will remove all existing users from the workspace and only add the new users - default <code>false</code>.",
"required": true,
"content": {
"application/json": {
"example": {
"userIds": [
1,
2,
4,
12
],
"reset": false
}
}
}
}
}
},

View file

@ -77,17 +77,65 @@ class GenericOpenAiLLM {
return true;
}
/**
* Generates appropriate content array for a message + attachments.
*
* ## Developer Note
* This function assumes the generic OpenAI provider is _actually_ OpenAI compatible.
* For example, Ollama is "OpenAI compatible" but does not support images as a content array.
* The contentString also is the base64 string WITH `data:image/xxx;base64,` prefix, which may not be the case for all providers.
* If your provider does not work exactly this way, then attachments will not function or potentially break vision requests.
* If you encounter this issue, you are welcome to open an issue asking for your specific provider to be supported.
*
* This function will **not** be updated for providers that **do not** support images as a content array like OpenAI does.
* Do not open issues to update this function due to your specific provider not being compatible. Open an issue to request support for your specific provider.
* @param {Object} props
* @param {string} props.userPrompt - the user prompt to be sent to the model
* @param {import("../../helpers").Attachment[]} props.attachments - the array of attachments to be sent to the model
* @returns {string|object[]}
*/
#generateContent({ userPrompt, attachments = [] }) {
if (!attachments.length) {
return userPrompt;
}
const content = [{ type: "text", text: userPrompt }];
for (let attachment of attachments) {
content.push({
type: "image_url",
image_url: {
url: attachment.contentString,
detail: "high",
},
});
}
return content.flat();
}
/**
* Construct the user prompt for this model.
* @param {{attachments: import("../../helpers").Attachment[]}} param0
* @returns
*/
constructPrompt({
systemPrompt = "",
contextTexts = [],
chatHistory = [],
userPrompt = "",
attachments = [],
}) {
const prompt = {
role: "system",
content: `${systemPrompt}${this.#appendContext(contextTexts)}`,
};
return [prompt, ...chatHistory, { role: "user", content: userPrompt }];
return [
prompt,
...chatHistory,
{
role: "user",
content: this.#generateContent({ userPrompt, attachments }),
},
];
}
async getChatCompletion(messages = null, { temperature = 0.7 }) {

View file

@ -281,6 +281,16 @@ async function getWatchedDocumentFilenames(filenames = []) {
}, {});
}
/**
* Purges the entire vector-cache folder and recreates it.
* @returns {void}
*/
function purgeEntireVectorCache() {
fs.rmSync(vectorCachePath, { recursive: true, force: true });
fs.mkdirSync(vectorCachePath);
return;
}
module.exports = {
findDocumentInDocuments,
cachedVectorInformation,
@ -293,4 +303,5 @@ module.exports = {
isWithin,
documentsPath,
hasVectorCachedFiles,
purgeEntireVectorCache,
};

View file

@ -52,10 +52,11 @@
/**
* Gets the systems current vector database provider.
* @param {('pinecone' | 'chroma' | 'lancedb' | 'weaviate' | 'qdrant' | 'milvus' | 'zilliz' | 'astra') | null} getExactly - If provided, this will return an explit provider.
* @returns { BaseVectorDatabaseProvider}
*/
function getVectorDbClass() {
const vectorSelection = process.env.VECTOR_DB || "lancedb";
function getVectorDbClass(getExactly = null) {
const vectorSelection = getExactly ?? process.env.VECTOR_DB ?? "lancedb";
switch (vectorSelection) {
case "pinecone":
const { Pinecone } = require("../vectorDbProviders/pinecone");

View file

@ -1,3 +1,5 @@
const { resetAllVectorStores } = require("../vectorStore/resetAllVectorStores");
const KEY_MAPPING = {
LLMProvider: {
envKey: "LLM_PROVIDER",
@ -248,6 +250,7 @@ const KEY_MAPPING = {
EmbeddingEngine: {
envKey: "EMBEDDING_ENGINE",
checks: [supportedEmbeddingModel],
postUpdate: [handleVectorStoreReset],
},
EmbeddingBasePath: {
envKey: "EMBEDDING_BASE_PATH",
@ -256,6 +259,7 @@ const KEY_MAPPING = {
EmbeddingModelPref: {
envKey: "EMBEDDING_MODEL_PREF",
checks: [isNotEmpty],
postUpdate: [handleVectorStoreReset],
},
EmbeddingModelMaxChunkLength: {
envKey: "EMBEDDING_MODEL_MAX_CHUNK_LENGTH",
@ -276,6 +280,7 @@ const KEY_MAPPING = {
VectorDB: {
envKey: "VECTOR_DB",
checks: [isNotEmpty, supportedVectorDB],
postUpdate: [handleVectorStoreReset],
},
// Chroma Options
@ -878,6 +883,24 @@ function noRestrictedChars(input = "") {
: null;
}
async function handleVectorStoreReset(key, prevValue, nextValue) {
if (prevValue === nextValue) return;
if (key === "VectorDB") {
console.log(
`Vector configuration changed from ${prevValue} to ${nextValue} - resetting ${prevValue} namespaces`
);
return await resetAllVectorStores({ vectorDbKey: prevValue });
}
if (key === "EmbeddingEngine" || key === "EmbeddingModelPref") {
console.log(
`${key} changed from ${prevValue} to ${nextValue} - resetting ${process.env.VECTOR_DB} namespaces`
);
return await resetAllVectorStores({ vectorDbKey: process.env.VECTOR_DB });
}
return false;
}
// This will force update .env variables which for any which reason were not able to be parsed or
// read from an ENV file as this seems to be a complicating step for many so allowing people to write
// to the process will at least alleviate that issue. It does not perform comprehensive validity checks or sanity checks

View file

@ -0,0 +1,48 @@
const { Workspace } = require("../../models/workspace");
const { Document } = require("../../models/documents");
const { DocumentVectors } = require("../../models/vectors");
const { EventLogs } = require("../../models/eventLogs");
const { purgeEntireVectorCache } = require("../files");
const { getVectorDbClass } = require("../helpers");
/**
* Resets all vector database and associated content:
* - Purges the entire vector-cache folder.
* - Deletes all document vectors from the database.
* - Deletes all documents from the database.
* - Deletes all vector db namespaces for each workspace.
* - Logs an event indicating the reset.
* @param {string} vectorDbKey - The _previous_ vector database provider name that we will be resetting.
* @returns {Promise<boolean>} - True if successful, false otherwise.
*/
async function resetAllVectorStores({ vectorDbKey }) {
try {
const workspaces = await Workspace.where();
purgeEntireVectorCache(); // Purges the entire vector-cache folder.
await DocumentVectors.delete(); // Deletes all document vectors from the database.
await Document.delete(); // Deletes all documents from the database.
await EventLogs.logEvent("workspace_vectors_reset", {
reason: "System vector configuration changed",
});
console.log(
"Resetting anythingllm managed vector namespaces for",
vectorDbKey
);
const VectorDb = getVectorDbClass(vectorDbKey);
for (const workspace of workspaces) {
try {
await VectorDb["delete-namespace"]({ namespace: workspace.slug });
} catch (e) {
console.error(e.message);
}
}
return true;
} catch (error) {
console.error("Failed to reset vector stores:", error);
return false;
}
}
module.exports = { resetAllVectorStores };