315 show citations based on relevancy score ()

* settings for similarity score threshold and prisma schema updated

* prisma schema migration for adding similarityScore setting

* WIP

* Min score default change

* added similarityThreshold checking for all vectordb providers

* linting

---------

Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
This commit is contained in:
Timothy Carambat 2023-11-06 16:49:29 -08:00 committed by GitHub
parent d34ec68702
commit 88d4808c52
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 107 additions and 21 deletions
frontend/src/components/Modals/MangeWorkspace/Settings
server
models
prisma
migrations/20231101001441_init
schema.prisma
utils
chats
vectorDbProviders
chroma
lance
pinecone
qdrant
weaviate

View file

@ -17,6 +17,9 @@ function castToType(key, value) {
openAiHistory: {
cast: (value) => Number(value),
},
similarityThreshold: {
cast: (value) => parseFloat(value),
},
};
if (!definitions.hasOwnProperty(key)) return value;
@ -233,6 +236,39 @@ export default function WorkspaceSettings({ workspace }) {
autoComplete="off"
onChange={() => setHasChanges(true)}
/>
<div className="mt-4">
<div className="flex flex-col">
<label
htmlFor="name"
className="block text-sm font-medium text-white"
>
Document similarity threshold
</label>
<p className="text-white text-opacity-60 text-xs font-medium py-1.5">
The minimum similarity score required for a source to be
considered related to the chat. The higher the number, the
more similar the source must be to the chat.
</p>
</div>
<select
name="similarityThreshold"
defaultValue={workspace?.similarityThreshold ?? 0.25}
className="bg-zinc-900 text-white text-sm rounded-lg focus:ring-blue-500 focus:border-blue-500 block w-full p-2.5"
onChange={() => setHasChanges(true)}
required={true}
>
<option value={0.0}>No restriction</option>
<option value={0.25}>
Low (similarity score &ge; .25)
</option>
<option value={0.5}>
Medium (similarity score &ge; .50)
</option>
<option value={0.75}>
High (similarity score &ge; .75)
</option>
</select>
</div>
</div>
</div>
</div>

View file

@ -13,6 +13,7 @@ const Workspace = {
"openAiHistory",
"lastUpdatedAt",
"openAiPrompt",
"similarityThreshold",
],
new: async function (name = null, creatorId = null) {

View file

@ -0,0 +1,2 @@
-- AlterTable
ALTER TABLE "workspaces" ADD COLUMN "similarityThreshold" REAL DEFAULT 0.25;

View file

@ -82,17 +82,18 @@ model welcome_messages {
}
model workspaces {
id Int @id @default(autoincrement())
name String
slug String @unique
vectorTag String?
createdAt DateTime @default(now())
openAiTemp Float?
openAiHistory Int @default(20)
lastUpdatedAt DateTime @default(now())
openAiPrompt String?
workspace_users workspace_users[]
documents workspace_documents[]
id Int @id @default(autoincrement())
name String
slug String @unique
vectorTag String?
createdAt DateTime @default(now())
openAiTemp Float?
openAiHistory Int @default(20)
lastUpdatedAt DateTime @default(now())
openAiPrompt String?
similarityThreshold Float? @default(0.25)
workspace_users workspace_users[]
documents workspace_documents[]
}
model workspace_chats {

View file

@ -116,6 +116,7 @@ async function chatWithWorkspace(
namespace: workspace.slug,
input: message,
LLMConnector,
similarityThreshold: workspace?.similarityThreshold,
});
// Failed similarity search.

View file

@ -59,7 +59,12 @@ const Chroma = {
const namespace = await this.namespace(client, _namespace);
return namespace?.vectorCount || 0;
},
similarityResponse: async function (client, namespace, queryVector) {
similarityResponse: async function (
client,
namespace,
queryVector,
similarityThreshold = 0.25
) {
const collection = await client.getCollection({ name: namespace });
const result = {
contextTexts: [],
@ -72,6 +77,11 @@ const Chroma = {
nResults: 4,
});
response.ids[0].forEach((_, i) => {
if (
this.distanceToSimilarity(response.distances[0][i]) <
similarityThreshold
)
return;
result.contextTexts.push(response.documents[0][i]);
result.sourceDocuments.push(response.metadatas[0][i]);
result.scores.push(this.distanceToSimilarity(response.distances[0][i]));
@ -256,6 +266,7 @@ const Chroma = {
namespace = null,
input = "",
LLMConnector = null,
similarityThreshold = 0.25,
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
@ -273,7 +284,8 @@ const Chroma = {
const { contextTexts, sourceDocuments } = await this.similarityResponse(
client,
namespace,
queryVector
queryVector,
similarityThreshold
);
const sources = sourceDocuments.map((metadata, i) => {

View file

@ -54,7 +54,12 @@ const LanceDb = {
embedder: function () {
return new OpenAIEmbeddings({ openAIApiKey: process.env.OPEN_AI_KEY });
},
similarityResponse: async function (client, namespace, queryVector) {
similarityResponse: async function (
client,
namespace,
queryVector,
similarityThreshold = 0.25
) {
const collection = await client.openTable(namespace);
const result = {
contextTexts: [],
@ -69,6 +74,7 @@ const LanceDb = {
.execute();
response.forEach((item) => {
if (this.distanceToSimilarity(item.score) < similarityThreshold) return;
const { vector: _, ...rest } = item;
result.contextTexts.push(rest.text);
result.sourceDocuments.push(rest);
@ -229,6 +235,7 @@ const LanceDb = {
namespace = null,
input = "",
LLMConnector = null,
similarityThreshold = 0.25,
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
@ -246,7 +253,8 @@ const LanceDb = {
const { contextTexts, sourceDocuments } = await this.similarityResponse(
client,
namespace,
queryVector
queryVector,
similarityThreshold
);
const sources = sourceDocuments.map((metadata, i) => {

View file

@ -36,7 +36,12 @@ const Pinecone = {
const namespace = await this.namespace(pineconeIndex, _namespace);
return namespace?.vectorCount || 0;
},
similarityResponse: async function (index, namespace, queryVector) {
similarityResponse: async function (
index,
namespace,
queryVector,
similarityThreshold = 0.25
) {
const result = {
contextTexts: [],
sourceDocuments: [],
@ -52,6 +57,7 @@ const Pinecone = {
});
response.matches.forEach((match) => {
if (match.score < similarityThreshold) return;
result.contextTexts.push(match.metadata.text);
result.sourceDocuments.push(match);
result.scores.push(match.score);
@ -59,6 +65,7 @@ const Pinecone = {
return result;
},
namespace: async function (index, namespace = null) {
if (!namespace) throw new Error("No namespace value provided.");
const { namespaces } = await index.describeIndexStats1();
@ -225,6 +232,7 @@ const Pinecone = {
namespace = null,
input = "",
LLMConnector = null,
similarityThreshold = 0.25,
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
@ -239,7 +247,8 @@ const Pinecone = {
const { contextTexts, sourceDocuments } = await this.similarityResponse(
pineconeIndex,
namespace,
queryVector
queryVector,
similarityThreshold
);
const sources = sourceDocuments.map((metadata, i) => {

View file

@ -45,7 +45,12 @@ const QDrant = {
const namespace = await this.namespace(client, _namespace);
return namespace?.vectorCount || 0;
},
similarityResponse: async function (_client, namespace, queryVector) {
similarityResponse: async function (
_client,
namespace,
queryVector,
similarityThreshold = 0.25
) {
const { client } = await this.connect();
const result = {
contextTexts: [],
@ -60,6 +65,7 @@ const QDrant = {
});
responses.forEach((response) => {
if (response.score < similarityThreshold) return;
result.contextTexts.push(response?.payload?.text || "");
result.sourceDocuments.push({
...(response?.payload || {}),
@ -265,6 +271,7 @@ const QDrant = {
namespace = null,
input = "",
LLMConnector = null,
similarityThreshold = 0.25,
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
@ -282,7 +289,8 @@ const QDrant = {
const { contextTexts, sourceDocuments } = await this.similarityResponse(
client,
namespace,
queryVector
queryVector,
similarityThreshold
);
const sources = sourceDocuments.map((metadata, i) => {

View file

@ -72,7 +72,12 @@ const Weaviate = {
return 0;
}
},
similarityResponse: async function (client, namespace, queryVector) {
similarityResponse: async function (
client,
namespace,
queryVector,
similarityThreshold = 0.25
) {
const result = {
contextTexts: [],
sourceDocuments: [],
@ -97,6 +102,7 @@ const Weaviate = {
_additional: { id, certainty },
...rest
} = response;
if (certainty < similarityThreshold) return;
result.contextTexts.push(rest.text);
result.sourceDocuments.push({ ...rest, id });
result.scores.push(certainty);
@ -336,6 +342,7 @@ const Weaviate = {
namespace = null,
input = "",
LLMConnector = null,
similarityThreshold = 0.25,
}) {
if (!namespace || !input || !LLMConnector)
throw new Error("Invalid request to performSimilaritySearch.");
@ -353,7 +360,8 @@ const Weaviate = {
const { contextTexts, sourceDocuments } = await this.similarityResponse(
client,
namespace,
queryVector
queryVector,
similarityThreshold
);
const sources = sourceDocuments.map((metadata, i) => {