mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-17 18:18:11 +00:00
315 show citations based on relevancy score (#316)
* settings for similarity score threshold and prisma schema updated * prisma schema migration for adding similarityScore setting * WIP * Min score default change * added similarityThreshold checking for all vectordb providers * linting --------- Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
This commit is contained in:
parent
d34ec68702
commit
88d4808c52
10 changed files with 107 additions and 21 deletions
frontend/src/components/Modals/MangeWorkspace/Settings
server
models
prisma
utils
|
@ -17,6 +17,9 @@ function castToType(key, value) {
|
|||
openAiHistory: {
|
||||
cast: (value) => Number(value),
|
||||
},
|
||||
similarityThreshold: {
|
||||
cast: (value) => parseFloat(value),
|
||||
},
|
||||
};
|
||||
|
||||
if (!definitions.hasOwnProperty(key)) return value;
|
||||
|
@ -233,6 +236,39 @@ export default function WorkspaceSettings({ workspace }) {
|
|||
autoComplete="off"
|
||||
onChange={() => setHasChanges(true)}
|
||||
/>
|
||||
<div className="mt-4">
|
||||
<div className="flex flex-col">
|
||||
<label
|
||||
htmlFor="name"
|
||||
className="block text-sm font-medium text-white"
|
||||
>
|
||||
Document similarity threshold
|
||||
</label>
|
||||
<p className="text-white text-opacity-60 text-xs font-medium py-1.5">
|
||||
The minimum similarity score required for a source to be
|
||||
considered related to the chat. The higher the number, the
|
||||
more similar the source must be to the chat.
|
||||
</p>
|
||||
</div>
|
||||
<select
|
||||
name="similarityThreshold"
|
||||
defaultValue={workspace?.similarityThreshold ?? 0.25}
|
||||
className="bg-zinc-900 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5"
|
||||
onChange={() => setHasChanges(true)}
|
||||
required={true}
|
||||
>
|
||||
<option value={0.0}>No restriction</option>
|
||||
<option value={0.25}>
|
||||
Low (similarity score ≥ .25)
|
||||
</option>
|
||||
<option value={0.5}>
|
||||
Medium (similarity score ≥ .50)
|
||||
</option>
|
||||
<option value={0.75}>
|
||||
High (similarity score ≥ .75)
|
||||
</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -13,6 +13,7 @@ const Workspace = {
|
|||
"openAiHistory",
|
||||
"lastUpdatedAt",
|
||||
"openAiPrompt",
|
||||
"similarityThreshold",
|
||||
],
|
||||
|
||||
new: async function (name = null, creatorId = null) {
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
-- AlterTable
|
||||
ALTER TABLE "workspaces" ADD COLUMN "similarityThreshold" REAL DEFAULT 0.25;
|
|
@ -82,17 +82,18 @@ model welcome_messages {
|
|||
}
|
||||
|
||||
model workspaces {
|
||||
id Int @id @default(autoincrement())
|
||||
name String
|
||||
slug String @unique
|
||||
vectorTag String?
|
||||
createdAt DateTime @default(now())
|
||||
openAiTemp Float?
|
||||
openAiHistory Int @default(20)
|
||||
lastUpdatedAt DateTime @default(now())
|
||||
openAiPrompt String?
|
||||
workspace_users workspace_users[]
|
||||
documents workspace_documents[]
|
||||
id Int @id @default(autoincrement())
|
||||
name String
|
||||
slug String @unique
|
||||
vectorTag String?
|
||||
createdAt DateTime @default(now())
|
||||
openAiTemp Float?
|
||||
openAiHistory Int @default(20)
|
||||
lastUpdatedAt DateTime @default(now())
|
||||
openAiPrompt String?
|
||||
similarityThreshold Float? @default(0.25)
|
||||
workspace_users workspace_users[]
|
||||
documents workspace_documents[]
|
||||
}
|
||||
|
||||
model workspace_chats {
|
||||
|
|
|
@ -116,6 +116,7 @@ async function chatWithWorkspace(
|
|||
namespace: workspace.slug,
|
||||
input: message,
|
||||
LLMConnector,
|
||||
similarityThreshold: workspace?.similarityThreshold,
|
||||
});
|
||||
|
||||
// Failed similarity search.
|
||||
|
|
|
@ -59,7 +59,12 @@ const Chroma = {
|
|||
const namespace = await this.namespace(client, _namespace);
|
||||
return namespace?.vectorCount || 0;
|
||||
},
|
||||
similarityResponse: async function (client, namespace, queryVector) {
|
||||
similarityResponse: async function (
|
||||
client,
|
||||
namespace,
|
||||
queryVector,
|
||||
similarityThreshold = 0.25
|
||||
) {
|
||||
const collection = await client.getCollection({ name: namespace });
|
||||
const result = {
|
||||
contextTexts: [],
|
||||
|
@ -72,6 +77,11 @@ const Chroma = {
|
|||
nResults: 4,
|
||||
});
|
||||
response.ids[0].forEach((_, i) => {
|
||||
if (
|
||||
this.distanceToSimilarity(response.distances[0][i]) <
|
||||
similarityThreshold
|
||||
)
|
||||
return;
|
||||
result.contextTexts.push(response.documents[0][i]);
|
||||
result.sourceDocuments.push(response.metadatas[0][i]);
|
||||
result.scores.push(this.distanceToSimilarity(response.distances[0][i]));
|
||||
|
@ -256,6 +266,7 @@ const Chroma = {
|
|||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
similarityThreshold = 0.25,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
@ -273,7 +284,8 @@ const Chroma = {
|
|||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
queryVector,
|
||||
similarityThreshold
|
||||
);
|
||||
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
|
|
|
@ -54,7 +54,12 @@ const LanceDb = {
|
|||
embedder: function () {
|
||||
return new OpenAIEmbeddings({ openAIApiKey: process.env.OPEN_AI_KEY });
|
||||
},
|
||||
similarityResponse: async function (client, namespace, queryVector) {
|
||||
similarityResponse: async function (
|
||||
client,
|
||||
namespace,
|
||||
queryVector,
|
||||
similarityThreshold = 0.25
|
||||
) {
|
||||
const collection = await client.openTable(namespace);
|
||||
const result = {
|
||||
contextTexts: [],
|
||||
|
@ -69,6 +74,7 @@ const LanceDb = {
|
|||
.execute();
|
||||
|
||||
response.forEach((item) => {
|
||||
if (this.distanceToSimilarity(item.score) < similarityThreshold) return;
|
||||
const { vector: _, ...rest } = item;
|
||||
result.contextTexts.push(rest.text);
|
||||
result.sourceDocuments.push(rest);
|
||||
|
@ -229,6 +235,7 @@ const LanceDb = {
|
|||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
similarityThreshold = 0.25,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
@ -246,7 +253,8 @@ const LanceDb = {
|
|||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
queryVector,
|
||||
similarityThreshold
|
||||
);
|
||||
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
|
|
|
@ -36,7 +36,12 @@ const Pinecone = {
|
|||
const namespace = await this.namespace(pineconeIndex, _namespace);
|
||||
return namespace?.vectorCount || 0;
|
||||
},
|
||||
similarityResponse: async function (index, namespace, queryVector) {
|
||||
similarityResponse: async function (
|
||||
index,
|
||||
namespace,
|
||||
queryVector,
|
||||
similarityThreshold = 0.25
|
||||
) {
|
||||
const result = {
|
||||
contextTexts: [],
|
||||
sourceDocuments: [],
|
||||
|
@ -52,6 +57,7 @@ const Pinecone = {
|
|||
});
|
||||
|
||||
response.matches.forEach((match) => {
|
||||
if (match.score < similarityThreshold) return;
|
||||
result.contextTexts.push(match.metadata.text);
|
||||
result.sourceDocuments.push(match);
|
||||
result.scores.push(match.score);
|
||||
|
@ -59,6 +65,7 @@ const Pinecone = {
|
|||
|
||||
return result;
|
||||
},
|
||||
|
||||
namespace: async function (index, namespace = null) {
|
||||
if (!namespace) throw new Error("No namespace value provided.");
|
||||
const { namespaces } = await index.describeIndexStats1();
|
||||
|
@ -225,6 +232,7 @@ const Pinecone = {
|
|||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
similarityThreshold = 0.25,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
@ -239,7 +247,8 @@ const Pinecone = {
|
|||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
pineconeIndex,
|
||||
namespace,
|
||||
queryVector
|
||||
queryVector,
|
||||
similarityThreshold
|
||||
);
|
||||
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
|
|
|
@ -45,7 +45,12 @@ const QDrant = {
|
|||
const namespace = await this.namespace(client, _namespace);
|
||||
return namespace?.vectorCount || 0;
|
||||
},
|
||||
similarityResponse: async function (_client, namespace, queryVector) {
|
||||
similarityResponse: async function (
|
||||
_client,
|
||||
namespace,
|
||||
queryVector,
|
||||
similarityThreshold = 0.25
|
||||
) {
|
||||
const { client } = await this.connect();
|
||||
const result = {
|
||||
contextTexts: [],
|
||||
|
@ -60,6 +65,7 @@ const QDrant = {
|
|||
});
|
||||
|
||||
responses.forEach((response) => {
|
||||
if (response.score < similarityThreshold) return;
|
||||
result.contextTexts.push(response?.payload?.text || "");
|
||||
result.sourceDocuments.push({
|
||||
...(response?.payload || {}),
|
||||
|
@ -265,6 +271,7 @@ const QDrant = {
|
|||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
similarityThreshold = 0.25,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
@ -282,7 +289,8 @@ const QDrant = {
|
|||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
queryVector,
|
||||
similarityThreshold
|
||||
);
|
||||
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
|
|
|
@ -72,7 +72,12 @@ const Weaviate = {
|
|||
return 0;
|
||||
}
|
||||
},
|
||||
similarityResponse: async function (client, namespace, queryVector) {
|
||||
similarityResponse: async function (
|
||||
client,
|
||||
namespace,
|
||||
queryVector,
|
||||
similarityThreshold = 0.25
|
||||
) {
|
||||
const result = {
|
||||
contextTexts: [],
|
||||
sourceDocuments: [],
|
||||
|
@ -97,6 +102,7 @@ const Weaviate = {
|
|||
_additional: { id, certainty },
|
||||
...rest
|
||||
} = response;
|
||||
if (certainty < similarityThreshold) return;
|
||||
result.contextTexts.push(rest.text);
|
||||
result.sourceDocuments.push({ ...rest, id });
|
||||
result.scores.push(certainty);
|
||||
|
@ -336,6 +342,7 @@ const Weaviate = {
|
|||
namespace = null,
|
||||
input = "",
|
||||
LLMConnector = null,
|
||||
similarityThreshold = 0.25,
|
||||
}) {
|
||||
if (!namespace || !input || !LLMConnector)
|
||||
throw new Error("Invalid request to performSimilaritySearch.");
|
||||
|
@ -353,7 +360,8 @@ const Weaviate = {
|
|||
const { contextTexts, sourceDocuments } = await this.similarityResponse(
|
||||
client,
|
||||
namespace,
|
||||
queryVector
|
||||
queryVector,
|
||||
similarityThreshold
|
||||
);
|
||||
|
||||
const sources = sourceDocuments.map((metadata, i) => {
|
||||
|
|
Loading…
Add table
Reference in a new issue