mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-23 13:08:11 +00:00
Purge cached docs and remove docs from all workspaces on vectorDB/embedder changes (#2819)
* wip remove all docs clear vector db on embedder/vector db change * purge all cached docs and remove docs from workspaces on vectordb/embedder change * lint * remove unneeded console log * remove reset vector stores endpoint and move to server side updateENV with postUpdate check * reset embed module * remove unused import * simplify deletion process rescoped document deletion to be more general for speed, everything needs to be reset anyway fixed issue where unembedded docs not in any workspaces, but cached, were not removed * add back missing readme file update warning text modals --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
d145602d5a
commit
ae510619f0
8 changed files with 154 additions and 47 deletions
frontend/src
components/ChangeWarning
pages/GeneralSettings
server
|
@ -1,4 +1,4 @@
|
||||||
import { Warning } from "@phosphor-icons/react";
|
import { Warning, X } from "@phosphor-icons/react";
|
||||||
|
|
||||||
export default function ChangeWarningModal({
|
export default function ChangeWarningModal({
|
||||||
warningText = "",
|
warningText = "",
|
||||||
|
@ -6,41 +6,55 @@ export default function ChangeWarningModal({
|
||||||
onConfirm,
|
onConfirm,
|
||||||
}) {
|
}) {
|
||||||
return (
|
return (
|
||||||
<div className="relative w-full max-w-2xl max-h-full">
|
<div className="w-full max-w-2xl bg-theme-bg-secondary rounded-lg shadow border-2 border-theme-modal-border overflow-hidden z-9999">
|
||||||
<div className="relative bg-main-gradient rounded-lg shadow">
|
<div className="relative p-6 border-b rounded-t border-theme-modal-border">
|
||||||
<div className="flex items-start justify-between p-4 border-b rounded-t border-gray-500/50">
|
<div className="w-full flex gap-x-2 items-center">
|
||||||
<div className="flex items-center gap-2">
|
<Warning className="text-red-500 w-6 h-6" weight="fill" />
|
||||||
<Warning
|
<h3 className="text-xl font-semibold text-red-500 overflow-hidden overflow-ellipsis whitespace-nowrap">
|
||||||
className="text-yellow-300 text-lg w-6 h-6"
|
WARNING - This action is irreversible
|
||||||
weight="fill"
|
</h3>
|
||||||
/>
|
|
||||||
<h3 className="text-xl font-semibold text-yellow-300">Warning</h3>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
<div className="w-[550px] p-6 text-white">
|
<button
|
||||||
<p>
|
onClick={onClose}
|
||||||
{warningText}
|
type="button"
|
||||||
|
className="absolute top-4 right-4 transition-all duration-300 bg-transparent rounded-lg text-sm p-1 inline-flex items-center hover:bg-theme-modal-border hover:border-theme-modal-border hover:border-opacity-50 border-transparent border"
|
||||||
|
>
|
||||||
|
<X size={24} weight="bold" className="text-white" />
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
<div
|
||||||
|
className="h-full w-full overflow-y-auto"
|
||||||
|
style={{ maxHeight: "calc(100vh - 200px)" }}
|
||||||
|
>
|
||||||
|
<div className="py-7 px-9 space-y-2 flex-col">
|
||||||
|
<p className="text-white">
|
||||||
|
{warningText.split("\\n").map((line, index) => (
|
||||||
|
<span key={index}>
|
||||||
|
{line}
|
||||||
|
<br />
|
||||||
|
</span>
|
||||||
|
))}
|
||||||
<br />
|
<br />
|
||||||
<br />
|
<br />
|
||||||
Are you sure you want to proceed?
|
Are you sure you want to proceed?
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
<div className="flex w-full justify-between items-center p-6 space-x-2 border-t rounded-b border-gray-500/50">
|
<div className="flex w-full justify-end items-center p-6 space-x-2 border-t border-theme-modal-border rounded-b">
|
||||||
<button
|
<button
|
||||||
onClick={onClose}
|
onClick={onClose}
|
||||||
type="button"
|
type="button"
|
||||||
className="px-4 py-2 rounded-lg text-white hover:bg-red-500 transition-all duration-300"
|
className="transition-all duration-300 bg-transparent text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
|
||||||
>
|
>
|
||||||
Cancel
|
Cancel
|
||||||
</button>
|
</button>
|
||||||
<button
|
<button
|
||||||
onClick={onConfirm}
|
onClick={onConfirm}
|
||||||
className="transition-all duration-300 border border-slate-200 px-4 py-2 rounded-lg text-white text-sm items-center flex gap-x-2 hover:bg-slate-200 hover:text-slate-800 focus:ring-gray-800"
|
type="submit"
|
||||||
>
|
className="transition-all duration-300 bg-red-500 light:text-white text-white hover:opacity-60 px-4 py-2 rounded-lg text-sm"
|
||||||
Confirm
|
>
|
||||||
</button>
|
Confirm
|
||||||
</div>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
);
|
);
|
||||||
|
|
|
@ -361,7 +361,7 @@ export default function GeneralEmbeddingPreference() {
|
||||||
)}
|
)}
|
||||||
<ModalWrapper isOpen={isOpen}>
|
<ModalWrapper isOpen={isOpen}>
|
||||||
<ChangeWarningModal
|
<ChangeWarningModal
|
||||||
warningText="Switching the embedding model will break previously embedded documents from working during chat. They will need to un-embed from every workspace and fully removed and re-uploaded so they can be embed by the new embedding model."
|
warningText="Switching the embedding model will reset all previously embedded documents in all workspaces.\n\nConfirming will clear all embeddings from your vector database and remove all documents from your workspaces. Your uploaded documents will not be deleted, they will be available for re-embedding."
|
||||||
onClose={closeModal}
|
onClose={closeModal}
|
||||||
onConfirm={handleSaveSettings}
|
onConfirm={handleSaveSettings}
|
||||||
/>
|
/>
|
||||||
|
|
|
@ -308,7 +308,7 @@ export default function GeneralVectorDatabase() {
|
||||||
)}
|
)}
|
||||||
<ModalWrapper isOpen={isOpen}>
|
<ModalWrapper isOpen={isOpen}>
|
||||||
<ChangeWarningModal
|
<ChangeWarningModal
|
||||||
warningText="Switching the vector database will ignore previously embedded documents and future similarity search results. They will need to be re-added to each workspace."
|
warningText="Switching the vector database will reset all previously embedded documents in all workspaces.\n\nConfirming will clear all embeddings from your vector database and remove all documents from your workspaces. Your uploaded documents will not be deleted, they will be available for re-embedding."
|
||||||
onClose={closeModal}
|
onClose={closeModal}
|
||||||
onConfirm={handleSaveSettings}
|
onConfirm={handleSaveSettings}
|
||||||
/>
|
/>
|
||||||
|
|
|
@ -25,6 +25,19 @@ const DocumentVectors = {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
where: async function (clause = {}, limit) {
|
||||||
|
try {
|
||||||
|
const results = await prisma.document_vectors.findMany({
|
||||||
|
where: clause,
|
||||||
|
take: limit || undefined,
|
||||||
|
});
|
||||||
|
return results;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Where query failed", error);
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
},
|
||||||
|
|
||||||
deleteForWorkspace: async function (workspaceId) {
|
deleteForWorkspace: async function (workspaceId) {
|
||||||
const documents = await Document.forWorkspace(workspaceId);
|
const documents = await Document.forWorkspace(workspaceId);
|
||||||
const docIds = [...new Set(documents.map((doc) => doc.docId))];
|
const docIds = [...new Set(documents.map((doc) => doc.docId))];
|
||||||
|
@ -40,19 +53,6 @@ const DocumentVectors = {
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
where: async function (clause = {}, limit) {
|
|
||||||
try {
|
|
||||||
const results = await prisma.document_vectors.findMany({
|
|
||||||
where: clause,
|
|
||||||
take: limit || undefined,
|
|
||||||
});
|
|
||||||
return results;
|
|
||||||
} catch (error) {
|
|
||||||
console.error("Where query failed", error);
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
deleteIds: async function (ids = []) {
|
deleteIds: async function (ids = []) {
|
||||||
try {
|
try {
|
||||||
await prisma.document_vectors.deleteMany({
|
await prisma.document_vectors.deleteMany({
|
||||||
|
@ -64,6 +64,16 @@ const DocumentVectors = {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
|
delete: async function (clause = {}) {
|
||||||
|
try {
|
||||||
|
await prisma.document_vectors.deleteMany({ where: clause });
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Delete failed", error);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
module.exports = { DocumentVectors };
|
module.exports = { DocumentVectors };
|
||||||
|
|
|
@ -281,6 +281,16 @@ async function getWatchedDocumentFilenames(filenames = []) {
|
||||||
}, {});
|
}, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Purges the entire vector-cache folder and recreates it.
|
||||||
|
* @returns {void}
|
||||||
|
*/
|
||||||
|
function purgeEntireVectorCache() {
|
||||||
|
fs.rmSync(vectorCachePath, { recursive: true, force: true });
|
||||||
|
fs.mkdirSync(vectorCachePath);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
findDocumentInDocuments,
|
findDocumentInDocuments,
|
||||||
cachedVectorInformation,
|
cachedVectorInformation,
|
||||||
|
@ -293,4 +303,5 @@ module.exports = {
|
||||||
isWithin,
|
isWithin,
|
||||||
documentsPath,
|
documentsPath,
|
||||||
hasVectorCachedFiles,
|
hasVectorCachedFiles,
|
||||||
|
purgeEntireVectorCache,
|
||||||
};
|
};
|
||||||
|
|
|
@ -52,10 +52,11 @@
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the systems current vector database provider.
|
* Gets the systems current vector database provider.
|
||||||
|
* @param {('pinecone' | 'chroma' | 'lancedb' | 'weaviate' | 'qdrant' | 'milvus' | 'zilliz' | 'astra') | null} getExactly - If provided, this will return an explit provider.
|
||||||
* @returns { BaseVectorDatabaseProvider}
|
* @returns { BaseVectorDatabaseProvider}
|
||||||
*/
|
*/
|
||||||
function getVectorDbClass() {
|
function getVectorDbClass(getExactly = null) {
|
||||||
const vectorSelection = process.env.VECTOR_DB || "lancedb";
|
const vectorSelection = getExactly ?? process.env.VECTOR_DB ?? "lancedb";
|
||||||
switch (vectorSelection) {
|
switch (vectorSelection) {
|
||||||
case "pinecone":
|
case "pinecone":
|
||||||
const { Pinecone } = require("../vectorDbProviders/pinecone");
|
const { Pinecone } = require("../vectorDbProviders/pinecone");
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
const { resetAllVectorStores } = require("../vectorStore/resetAllVectorStores");
|
||||||
|
|
||||||
const KEY_MAPPING = {
|
const KEY_MAPPING = {
|
||||||
LLMProvider: {
|
LLMProvider: {
|
||||||
envKey: "LLM_PROVIDER",
|
envKey: "LLM_PROVIDER",
|
||||||
|
@ -248,6 +250,7 @@ const KEY_MAPPING = {
|
||||||
EmbeddingEngine: {
|
EmbeddingEngine: {
|
||||||
envKey: "EMBEDDING_ENGINE",
|
envKey: "EMBEDDING_ENGINE",
|
||||||
checks: [supportedEmbeddingModel],
|
checks: [supportedEmbeddingModel],
|
||||||
|
postUpdate: [handleVectorStoreReset],
|
||||||
},
|
},
|
||||||
EmbeddingBasePath: {
|
EmbeddingBasePath: {
|
||||||
envKey: "EMBEDDING_BASE_PATH",
|
envKey: "EMBEDDING_BASE_PATH",
|
||||||
|
@ -256,6 +259,7 @@ const KEY_MAPPING = {
|
||||||
EmbeddingModelPref: {
|
EmbeddingModelPref: {
|
||||||
envKey: "EMBEDDING_MODEL_PREF",
|
envKey: "EMBEDDING_MODEL_PREF",
|
||||||
checks: [isNotEmpty],
|
checks: [isNotEmpty],
|
||||||
|
postUpdate: [handleVectorStoreReset],
|
||||||
},
|
},
|
||||||
EmbeddingModelMaxChunkLength: {
|
EmbeddingModelMaxChunkLength: {
|
||||||
envKey: "EMBEDDING_MODEL_MAX_CHUNK_LENGTH",
|
envKey: "EMBEDDING_MODEL_MAX_CHUNK_LENGTH",
|
||||||
|
@ -276,6 +280,7 @@ const KEY_MAPPING = {
|
||||||
VectorDB: {
|
VectorDB: {
|
||||||
envKey: "VECTOR_DB",
|
envKey: "VECTOR_DB",
|
||||||
checks: [isNotEmpty, supportedVectorDB],
|
checks: [isNotEmpty, supportedVectorDB],
|
||||||
|
postUpdate: [handleVectorStoreReset],
|
||||||
},
|
},
|
||||||
|
|
||||||
// Chroma Options
|
// Chroma Options
|
||||||
|
@ -878,6 +883,24 @@ function noRestrictedChars(input = "") {
|
||||||
: null;
|
: null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function handleVectorStoreReset(key, prevValue, nextValue) {
|
||||||
|
if (prevValue === nextValue) return;
|
||||||
|
if (key === "VectorDB") {
|
||||||
|
console.log(
|
||||||
|
`Vector configuration changed from ${prevValue} to ${nextValue} - resetting ${prevValue} namespaces`
|
||||||
|
);
|
||||||
|
return await resetAllVectorStores({ vectorDbKey: prevValue });
|
||||||
|
}
|
||||||
|
|
||||||
|
if (key === "EmbeddingEngine" || key === "EmbeddingModelPref") {
|
||||||
|
console.log(
|
||||||
|
`${key} changed from ${prevValue} to ${nextValue} - resetting ${process.env.VECTOR_DB} namespaces`
|
||||||
|
);
|
||||||
|
return await resetAllVectorStores({ vectorDbKey: process.env.VECTOR_DB });
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
// This will force update .env variables which for any which reason were not able to be parsed or
|
// This will force update .env variables which for any which reason were not able to be parsed or
|
||||||
// read from an ENV file as this seems to be a complicating step for many so allowing people to write
|
// read from an ENV file as this seems to be a complicating step for many so allowing people to write
|
||||||
// to the process will at least alleviate that issue. It does not perform comprehensive validity checks or sanity checks
|
// to the process will at least alleviate that issue. It does not perform comprehensive validity checks or sanity checks
|
||||||
|
|
48
server/utils/vectorStore/resetAllVectorStores.js
Normal file
48
server/utils/vectorStore/resetAllVectorStores.js
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
const { Workspace } = require("../../models/workspace");
|
||||||
|
const { Document } = require("../../models/documents");
|
||||||
|
const { DocumentVectors } = require("../../models/vectors");
|
||||||
|
const { EventLogs } = require("../../models/eventLogs");
|
||||||
|
const { purgeEntireVectorCache } = require("../files");
|
||||||
|
const { getVectorDbClass } = require("../helpers");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resets all vector database and associated content:
|
||||||
|
* - Purges the entire vector-cache folder.
|
||||||
|
* - Deletes all document vectors from the database.
|
||||||
|
* - Deletes all documents from the database.
|
||||||
|
* - Deletes all vector db namespaces for each workspace.
|
||||||
|
* - Logs an event indicating the reset.
|
||||||
|
* @param {string} vectorDbKey - The _previous_ vector database provider name that we will be resetting.
|
||||||
|
* @returns {Promise<boolean>} - True if successful, false otherwise.
|
||||||
|
*/
|
||||||
|
async function resetAllVectorStores({ vectorDbKey }) {
|
||||||
|
try {
|
||||||
|
const workspaces = await Workspace.where();
|
||||||
|
purgeEntireVectorCache(); // Purges the entire vector-cache folder.
|
||||||
|
await DocumentVectors.delete(); // Deletes all document vectors from the database.
|
||||||
|
await Document.delete(); // Deletes all documents from the database.
|
||||||
|
await EventLogs.logEvent("workspace_vectors_reset", {
|
||||||
|
reason: "System vector configuration changed",
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(
|
||||||
|
"Resetting anythingllm managed vector namespaces for",
|
||||||
|
vectorDbKey
|
||||||
|
);
|
||||||
|
const VectorDb = getVectorDbClass(vectorDbKey);
|
||||||
|
for (const workspace of workspaces) {
|
||||||
|
try {
|
||||||
|
await VectorDb["delete-namespace"]({ namespace: workspace.slug });
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
console.error("Failed to reset vector stores:", error);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { resetAllVectorStores };
|
Loading…
Add table
Reference in a new issue