mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-03-16 07:02:22 +00:00
Add vector search API endpoint (#2815)
* Add vector search API endpoint * Add missing import * Modify the data that is returned * Change similarityThreshold to scoreThreshold As this is what is actually returned by the search * Removing logging (oops!) * chore: regen swagger docs for new endpoint fix: update function to sanity check values to prevent crashes during search --------- Co-authored-by: Scott Bowler <scott@dcsdigital.co.uk>
This commit is contained in:
parent
e4a556d551
commit
538078747d
2 changed files with 206 additions and 1 deletions
server
|
@ -4,7 +4,7 @@ const { Telemetry } = require("../../../models/telemetry");
|
|||
const { DocumentVectors } = require("../../../models/vectors");
|
||||
const { Workspace } = require("../../../models/workspace");
|
||||
const { WorkspaceChats } = require("../../../models/workspaceChats");
|
||||
const { getVectorDbClass } = require("../../../utils/helpers");
|
||||
const { getVectorDbClass, getLLMProvider } = require("../../../utils/helpers");
|
||||
const { multiUserMode, reqBody } = require("../../../utils/http");
|
||||
const { validApiKey } = require("../../../utils/middleware/validApiKey");
|
||||
const { VALID_CHAT_MODE } = require("../../../utils/chats/stream");
|
||||
|
@ -841,6 +841,137 @@ function apiWorkspaceEndpoints(app) {
|
|||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/v1/workspace/:slug/vector-search",
|
||||
[validApiKey],
|
||||
async (request, response) => {
|
||||
/*
|
||||
#swagger.tags = ['Workspaces']
|
||||
#swagger.description = 'Perform a vector similarity search in a workspace'
|
||||
#swagger.parameters['slug'] = {
|
||||
in: 'path',
|
||||
description: 'Unique slug of workspace to search in',
|
||||
required: true,
|
||||
type: 'string'
|
||||
}
|
||||
#swagger.requestBody = {
|
||||
description: 'Query to perform vector search with and optional parameters',
|
||||
required: true,
|
||||
content: {
|
||||
"application/json": {
|
||||
example: {
|
||||
query: "What is the meaning of life?",
|
||||
topN: 4,
|
||||
scoreThreshold: 0.75
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#swagger.responses[200] = {
|
||||
content: {
|
||||
"application/json": {
|
||||
schema: {
|
||||
type: 'object',
|
||||
example: {
|
||||
results: [
|
||||
{
|
||||
id: "5a6bee0a-306c-47fc-942b-8ab9bf3899c4",
|
||||
text: "Document chunk content...",
|
||||
metadata: {
|
||||
url: "file://document.txt",
|
||||
title: "document.txt",
|
||||
author: "no author specified",
|
||||
description: "no description found",
|
||||
docSource: "post:123456",
|
||||
chunkSource: "document.txt",
|
||||
published: "12/1/2024, 11:39:39 AM",
|
||||
wordCount: 8,
|
||||
tokenCount: 9
|
||||
},
|
||||
distance: 0.541887640953064,
|
||||
score: 0.45811235904693604
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
try {
|
||||
const { slug } = request.params;
|
||||
const { query, topN, scoreThreshold } = reqBody(request);
|
||||
const workspace = await Workspace.get({ slug: String(slug) });
|
||||
|
||||
if (!workspace)
|
||||
return response.status(400).json({
|
||||
message: `Workspace ${slug} is not a valid workspace.`,
|
||||
});
|
||||
|
||||
if (!query?.length)
|
||||
return response.status(400).json({
|
||||
message: "Query parameter cannot be empty.",
|
||||
});
|
||||
|
||||
const VectorDb = getVectorDbClass();
|
||||
const hasVectorizedSpace = await VectorDb.hasNamespace(workspace.slug);
|
||||
const embeddingsCount = await VectorDb.namespaceCount(workspace.slug);
|
||||
|
||||
if (!hasVectorizedSpace || embeddingsCount === 0)
|
||||
return response
|
||||
.status(200)
|
||||
.json({
|
||||
results: [],
|
||||
message: "No embeddings found for this workspace.",
|
||||
});
|
||||
|
||||
const parseSimilarityThreshold = () => {
|
||||
let input = parseFloat(scoreThreshold);
|
||||
if (isNaN(input) || input < 0 || input > 1)
|
||||
return workspace?.similarityThreshold ?? 0.25;
|
||||
return input;
|
||||
};
|
||||
|
||||
const parseTopN = () => {
|
||||
let input = Number(topN);
|
||||
if (isNaN(input) || input < 1) return workspace?.topN ?? 4;
|
||||
return input;
|
||||
};
|
||||
|
||||
const results = await VectorDb.performSimilaritySearch({
|
||||
namespace: workspace.slug,
|
||||
input: String(query),
|
||||
LLMConnector: getLLMProvider(),
|
||||
similarityThreshold: parseSimilarityThreshold(),
|
||||
topN: parseTopN(),
|
||||
});
|
||||
|
||||
response.status(200).json({
|
||||
results: results.sources.map((source) => ({
|
||||
id: source.id,
|
||||
text: source.text,
|
||||
metadata: {
|
||||
url: source.url,
|
||||
title: source.title,
|
||||
author: source.docAuthor,
|
||||
description: source.description,
|
||||
docSource: source.docSource,
|
||||
chunkSource: source.chunkSource,
|
||||
published: source.published,
|
||||
wordCount: source.wordCount,
|
||||
tokenCount: source.token_count_estimate,
|
||||
},
|
||||
distance: source._distance,
|
||||
score: source.score,
|
||||
})),
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e.message, e);
|
||||
response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
module.exports = { apiWorkspaceEndpoints };
|
||||
|
|
|
@ -2056,6 +2056,80 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"/v1/workspace/{slug}/vector-search": {
|
||||
"post": {
|
||||
"tags": [
|
||||
"Workspaces"
|
||||
],
|
||||
"description": "Perform a vector similarity search in a workspace",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "slug",
|
||||
"in": "path",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Unique slug of workspace to search in"
|
||||
}
|
||||
],
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "OK",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"type": "object",
|
||||
"example": {
|
||||
"results": [
|
||||
{
|
||||
"id": "5a6bee0a-306c-47fc-942b-8ab9bf3899c4",
|
||||
"text": "Document chunk content...",
|
||||
"metadata": {
|
||||
"url": "file://document.txt",
|
||||
"title": "document.txt",
|
||||
"author": "no author specified",
|
||||
"description": "no description found",
|
||||
"docSource": "post:123456",
|
||||
"chunkSource": "document.txt",
|
||||
"published": "12/1/2024, 11:39:39 AM",
|
||||
"wordCount": 8,
|
||||
"tokenCount": 9
|
||||
},
|
||||
"distance": 0.541887640953064,
|
||||
"score": 0.45811235904693604
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"description": "Bad Request"
|
||||
},
|
||||
"403": {
|
||||
"description": "Forbidden"
|
||||
},
|
||||
"500": {
|
||||
"description": "Internal Server Error"
|
||||
}
|
||||
},
|
||||
"requestBody": {
|
||||
"description": "Query to perform vector search with and optional parameters",
|
||||
"required": true,
|
||||
"content": {
|
||||
"application/json": {
|
||||
"example": {
|
||||
"query": "What is the meaning of life?",
|
||||
"topN": 4,
|
||||
"scoreThreshold": 0.75
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"/v1/system/env-dump": {
|
||||
"get": {
|
||||
"tags": [
|
||||
|
|
Loading…
Add table
Reference in a new issue