[FEAT] support pinecone serverless ()

* migrate pinecone package to latest version and migrate pinecone vectordb provider class

* remove pinecone environment name env variable and update docs to reflect removal & serverless support complete

* migrate query for pinecone db

* typo in log

---------

Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
Sean Hatfield 2024-01-22 16:41:20 -08:00 committed by GitHub
parent ad5a35b662
commit 2f3db0e63a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 64 additions and 78 deletions
docker
frontend/src/components/VectorDBSelection/PineconeDBOptions
server

View file

@ -76,7 +76,6 @@ GID='1000'
# Enable all below if you are using vector database: Pinecone. # Enable all below if you are using vector database: Pinecone.
# VECTOR_DB="pinecone" # VECTOR_DB="pinecone"
# PINECONE_ENVIRONMENT=
# PINECONE_API_KEY= # PINECONE_API_KEY=
# PINECONE_INDEX= # PINECONE_INDEX=

View file

@ -17,23 +17,6 @@ export default function PineconeDBOptions({ settings }) {
spellCheck={false} spellCheck={false}
/> />
</div> </div>
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4">
Pinecone Index Environment
</label>
<input
type="text"
name="PineConeEnvironment"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="us-gcp-west-1"
defaultValue={settings?.PineConeEnvironment}
required={true}
autoComplete="off"
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-4"> <label className="text-white text-sm font-semibold block mb-4">
Pinecone Index Name Pinecone Index Name

View file

@ -73,7 +73,6 @@ JWT_SECRET="my-random-string-for-seeding" # Please generate random string at lea
# Enable all below if you are using vector database: Pinecone. # Enable all below if you are using vector database: Pinecone.
# VECTOR_DB="pinecone" # VECTOR_DB="pinecone"
# PINECONE_ENVIRONMENT=
# PINECONE_API_KEY= # PINECONE_API_KEY=
# PINECONE_INDEX= # PINECONE_INDEX=

View file

@ -40,7 +40,6 @@ function apiSystemEndpoints(app) {
example: { example: {
"settings": { "settings": {
"VectorDB": "pinecone", "VectorDB": "pinecone",
"PineConeEnvironment": "us-west4-gcp-free",
"PineConeKey": true, "PineConeKey": true,
"PineConeIndex": "my-pinecone-index", "PineConeIndex": "my-pinecone-index",
"LLMProvider": "azure", "LLMProvider": "azure",

View file

@ -32,7 +32,6 @@ const SystemSettings = {
LocalAiApiKey: !!process.env.LOCAL_AI_API_KEY, LocalAiApiKey: !!process.env.LOCAL_AI_API_KEY,
...(vectorDB === "pinecone" ...(vectorDB === "pinecone"
? { ? {
PineConeEnvironment: process.env.PINECONE_ENVIRONMENT,
PineConeKey: !!process.env.PINECONE_API_KEY, PineConeKey: !!process.env.PINECONE_API_KEY,
PineConeIndex: process.env.PINECONE_INDEX, PineConeIndex: process.env.PINECONE_INDEX,
} }

View file

@ -24,7 +24,7 @@
"@azure/openai": "1.0.0-beta.10", "@azure/openai": "1.0.0-beta.10",
"@google/generative-ai": "^0.1.3", "@google/generative-ai": "^0.1.3",
"@googleapis/youtube": "^9.0.0", "@googleapis/youtube": "^9.0.0",
"@pinecone-database/pinecone": "^0.1.6", "@pinecone-database/pinecone": "^2.0.1",
"@prisma/client": "5.3.0", "@prisma/client": "5.3.0",
"@qdrant/js-client-rest": "^1.4.0", "@qdrant/js-client-rest": "^1.4.0",
"@xenova/transformers": "^2.14.0", "@xenova/transformers": "^2.14.0",
@ -77,4 +77,4 @@
"nodemon": "^2.0.22", "nodemon": "^2.0.22",
"prettier": "^3.0.3" "prettier": "^3.0.3"
} }
} }

View file

@ -1941,7 +1941,6 @@
"example": { "example": {
"settings": { "settings": {
"VectorDB": "pinecone", "VectorDB": "pinecone",
"PineConeEnvironment": "us-west4-gcp-free",
"PineConeKey": true, "PineConeKey": true,
"PineConeIndex": "my-pinecone-index", "PineConeIndex": "my-pinecone-index",
"LLMProvider": "azure", "LLMProvider": "azure",

View file

@ -171,11 +171,6 @@ const KEY_MAPPING = {
envKey: "QDRANT_API_KEY", envKey: "QDRANT_API_KEY",
checks: [], checks: [],
}, },
PineConeEnvironment: {
envKey: "PINECONE_ENVIRONMENT",
checks: [],
},
PineConeKey: { PineConeKey: {
envKey: "PINECONE_API_KEY", envKey: "PINECONE_API_KEY",
checks: [], checks: [],

View file

@ -19,7 +19,6 @@
``` ```
VECTOR_DB="pinecone" VECTOR_DB="pinecone"
PINECONE_ENVIRONMENT=us-west4-gcp-free
PINECONE_API_KEY=sklive-123xyz PINECONE_API_KEY=sklive-123xyz
PINECONE_INDEX=my-primary-index # the value from the first instruction! PINECONE_INDEX=my-primary-index # the value from the first instruction!
``` ```

View file

@ -1,4 +1,4 @@
const { PineconeClient } = require("@pinecone-database/pinecone"); const { Pinecone } = require("@pinecone-database/pinecone");
const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter"); const { RecursiveCharacterTextSplitter } = require("langchain/text_splitter");
const { storeVectorResult, cachedVectorInformation } = require("../../files"); const { storeVectorResult, cachedVectorInformation } = require("../../files");
const { v4: uuidv4 } = require("uuid"); const { v4: uuidv4 } = require("uuid");
@ -8,37 +8,35 @@ const {
getEmbeddingEngineSelection, getEmbeddingEngineSelection,
} = require("../../helpers"); } = require("../../helpers");
const Pinecone = { const PineconeDB = {
name: "Pinecone", name: "Pinecone",
connect: async function () { connect: async function () {
if (process.env.VECTOR_DB !== "pinecone") if (process.env.VECTOR_DB !== "pinecone")
throw new Error("Pinecone::Invalid ENV settings"); throw new Error("Pinecone::Invalid ENV settings");
const client = new PineconeClient(); const client = new Pinecone({
await client.init({
apiKey: process.env.PINECONE_API_KEY, apiKey: process.env.PINECONE_API_KEY,
environment: process.env.PINECONE_ENVIRONMENT,
});
const pineconeIndex = client.Index(process.env.PINECONE_INDEX);
const { status } = await client.describeIndex({
indexName: process.env.PINECONE_INDEX,
}); });
if (!status.ready) throw new Error("Pinecode::Index not ready."); const pineconeIndex = client.Index(process.env.PINECONE_INDEX);
const { status } = await client.describeIndex(process.env.PINECONE_INDEX);
if (!status.ready) throw new Error("Pinecone::Index not ready.");
return { client, pineconeIndex, indexName: process.env.PINECONE_INDEX }; return { client, pineconeIndex, indexName: process.env.PINECONE_INDEX };
}, },
totalVectors: async function () { totalVectors: async function () {
const { pineconeIndex } = await this.connect(); const { pineconeIndex } = await this.connect();
const { namespaces } = await pineconeIndex.describeIndexStats1(); const { namespaces } = await pineconeIndex.describeIndexStats();
return Object.values(namespaces).reduce( return Object.values(namespaces).reduce(
(a, b) => a + (b?.vectorCount || 0), (a, b) => a + (b?.recordCount || 0),
0 0
); );
}, },
namespaceCount: async function (_namespace = null) { namespaceCount: async function (_namespace = null) {
const { pineconeIndex } = await this.connect(); const { pineconeIndex } = await this.connect();
const namespace = await this.namespace(pineconeIndex, _namespace); const namespace = await this.namespace(pineconeIndex, _namespace);
return namespace?.vectorCount || 0; return namespace?.recordCount || 0;
}, },
similarityResponse: async function ( similarityResponse: async function (
index, index,
@ -52,13 +50,12 @@ const Pinecone = {
sourceDocuments: [], sourceDocuments: [],
scores: [], scores: [],
}; };
const response = await index.query({
queryRequest: { const pineconeNamespace = index.namespace(namespace);
namespace, const response = await pineconeNamespace.query({
vector: queryVector, vector: queryVector,
topK: topN, topK: topN,
includeMetadata: true, includeMetadata: true,
},
}); });
response.matches.forEach((match) => { response.matches.forEach((match) => {
@ -70,10 +67,9 @@ const Pinecone = {
return result; return result;
}, },
namespace: async function (index, namespace = null) { namespace: async function (index, namespace = null) {
if (!namespace) throw new Error("No namespace value provided."); if (!namespace) throw new Error("No namespace value provided.");
const { namespaces } = await index.describeIndexStats1(); const { namespaces } = await index.describeIndexStats();
return namespaces.hasOwnProperty(namespace) ? namespaces[namespace] : null; return namespaces.hasOwnProperty(namespace) ? namespaces[namespace] : null;
}, },
hasNamespace: async function (namespace = null) { hasNamespace: async function (namespace = null) {
@ -83,11 +79,12 @@ const Pinecone = {
}, },
namespaceExists: async function (index, namespace = null) { namespaceExists: async function (index, namespace = null) {
if (!namespace) throw new Error("No namespace value provided."); if (!namespace) throw new Error("No namespace value provided.");
const { namespaces } = await index.describeIndexStats1(); const { namespaces } = await index.describeIndexStats();
return namespaces.hasOwnProperty(namespace); return namespaces.hasOwnProperty(namespace);
}, },
deleteVectorsInNamespace: async function (index, namespace = null) { deleteVectorsInNamespace: async function (index, namespace = null) {
await index.delete1({ namespace, deleteAll: true }); const pineconeNamespace = index.namespace(namespace);
await pineconeNamespace.deleteAll();
return true; return true;
}, },
addDocumentToNamespace: async function ( addDocumentToNamespace: async function (
@ -104,6 +101,7 @@ const Pinecone = {
const cacheResult = await cachedVectorInformation(fullFilePath); const cacheResult = await cachedVectorInformation(fullFilePath);
if (cacheResult.exists) { if (cacheResult.exists) {
const { pineconeIndex } = await this.connect(); const { pineconeIndex } = await this.connect();
const pineconeNamespace = pineconeIndex.namespace(namespace);
const { chunks } = cacheResult; const { chunks } = cacheResult;
const documentVectors = []; const documentVectors = [];
@ -115,14 +113,7 @@ const Pinecone = {
documentVectors.push({ docId, vectorId: id }); documentVectors.push({ docId, vectorId: id });
return { ...chunk, id }; return { ...chunk, id };
}); });
await pineconeNamespace.upsert([...newChunks]);
// Push chunks with new ids to pinecone.
await pineconeIndex.upsert({
upsertRequest: {
vectors: [...newChunks],
namespace,
},
});
} }
await DocumentVectors.bulkInsert(documentVectors); await DocumentVectors.bulkInsert(documentVectors);
@ -170,15 +161,11 @@ const Pinecone = {
if (vectors.length > 0) { if (vectors.length > 0) {
const chunks = []; const chunks = [];
const { pineconeIndex } = await this.connect(); const { pineconeIndex } = await this.connect();
const pineconeNamespace = pineconeIndex.namespace(namespace);
console.log("Inserting vectorized chunks into Pinecone."); console.log("Inserting vectorized chunks into Pinecone.");
for (const chunk of toChunks(vectors, 100)) { for (const chunk of toChunks(vectors, 100)) {
chunks.push(chunk); chunks.push(chunk);
await pineconeIndex.upsert({ await pineconeNamespace.upsert([...chunk]);
upsertRequest: {
vectors: [...chunk],
namespace,
},
});
} }
await storeVectorResult(chunks, fullFilePath); await storeVectorResult(chunks, fullFilePath);
} }
@ -199,11 +186,10 @@ const Pinecone = {
if (knownDocuments.length === 0) return; if (knownDocuments.length === 0) return;
const vectorIds = knownDocuments.map((doc) => doc.vectorId); const vectorIds = knownDocuments.map((doc) => doc.vectorId);
const pineconeNamespace = pineconeIndex.namespace(namespace);
for (const batchOfVectorIds of toChunks(vectorIds, 1000)) { for (const batchOfVectorIds of toChunks(vectorIds, 1000)) {
await pineconeIndex.delete1({ await pineconeNamespace.deleteMany(batchOfVectorIds);
ids: batchOfVectorIds,
namespace,
});
} }
const indexes = knownDocuments.map((doc) => doc.id); const indexes = knownDocuments.map((doc) => doc.id);
@ -285,4 +271,4 @@ const Pinecone = {
}, },
}; };
module.exports.Pinecone = Pinecone; module.exports.Pinecone = PineconeDB;

View file

@ -625,12 +625,15 @@
"@octokit/webhooks-types" "7.1.0" "@octokit/webhooks-types" "7.1.0"
aggregate-error "^3.1.0" aggregate-error "^3.1.0"
"@pinecone-database/pinecone@^0.1.6": "@pinecone-database/pinecone@^2.0.1":
version "0.1.6" version "2.0.1"
resolved "https://registry.yarnpkg.com/@pinecone-database/pinecone/-/pinecone-0.1.6.tgz#13374ae9462c8eea0dc26683cafeddc4e7c0375f" resolved "https://registry.yarnpkg.com/@pinecone-database/pinecone/-/pinecone-2.0.1.tgz#1dce6e06e299dfe2c26490aacffe1103e316b8fc"
integrity sha512-tCnVc28udecthhgSBTdcMhYEW+xsR++AdZasp+ZE/AvUD1hOR2IR3edjk9m0sDxZyvXbno2KeqUbLIOZr7sCTw== integrity sha512-a1ejzrqdSQ2yW+9QUi2TVlKwYUbrvGH+QH6POJhITyaOz9ANE+EhXqToC9af93Ctzq9n87+bOUvBvewLeW++Mw==
dependencies: dependencies:
"@sinclair/typebox" "^0.29.0"
ajv "^8.12.0"
cross-fetch "^3.1.5" cross-fetch "^3.1.5"
encoding "^0.1.13"
"@pkgr/core@^0.1.0": "@pkgr/core@^0.1.0":
version "0.1.0" version "0.1.0"
@ -743,6 +746,11 @@
resolved "https://registry.yarnpkg.com/@sideway/pinpoint/-/pinpoint-2.0.0.tgz#cff8ffadc372ad29fd3f78277aeb29e632cc70df" resolved "https://registry.yarnpkg.com/@sideway/pinpoint/-/pinpoint-2.0.0.tgz#cff8ffadc372ad29fd3f78277aeb29e632cc70df"
integrity sha512-RNiOoTPkptFtSVzQevY/yWtZwf/RxyVnPy/OcA9HBM3MlGDnBEYL5B41H0MTn0Uec8Hi+2qUtTfG2WWZBmMejQ== integrity sha512-RNiOoTPkptFtSVzQevY/yWtZwf/RxyVnPy/OcA9HBM3MlGDnBEYL5B41H0MTn0Uec8Hi+2qUtTfG2WWZBmMejQ==
"@sinclair/typebox@^0.29.0":
version "0.29.6"
resolved "https://registry.yarnpkg.com/@sinclair/typebox/-/typebox-0.29.6.tgz#4cd8372f9247372edd5fc5af44f67e2032c46e2f"
integrity sha512-aX5IFYWlMa7tQ8xZr3b2gtVReCvg7f3LEhjir/JAjX2bJCMVJA5tIPv30wTD4KDfcwMd7DDYY3hFDeGmOgtrZQ==
"@tootallnate/once@1": "@tootallnate/once@1":
version "1.1.2" version "1.1.2"
resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82" resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82"
@ -955,6 +963,16 @@ ajv@^6.12.4:
json-schema-traverse "^0.4.1" json-schema-traverse "^0.4.1"
uri-js "^4.2.2" uri-js "^4.2.2"
ajv@^8.12.0:
version "8.12.0"
resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.12.0.tgz#d1a0527323e22f53562c567c00991577dfbe19d1"
integrity sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA==
dependencies:
fast-deep-equal "^3.1.1"
json-schema-traverse "^1.0.0"
require-from-string "^2.0.2"
uri-js "^4.2.2"
ansi-regex@^5.0.1: ansi-regex@^5.0.1:
version "5.0.1" version "5.0.1"
resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304" resolved "https://registry.yarnpkg.com/ansi-regex/-/ansi-regex-5.0.1.tgz#082cb2c89c9fe8659a311a53bd6a4dc5301db304"
@ -1934,7 +1952,7 @@ encodeurl@~1.0.2:
resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59" resolved "https://registry.yarnpkg.com/encodeurl/-/encodeurl-1.0.2.tgz#ad3ff4c86ec2d029322f5a02c3a9a606c95b3f59"
integrity sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w== integrity sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==
encoding@^0.1.12: encoding@^0.1.12, encoding@^0.1.13:
version "0.1.13" version "0.1.13"
resolved "https://registry.yarnpkg.com/encoding/-/encoding-0.1.13.tgz#56574afdd791f54a8e9b2785c0582a2d26210fa9" resolved "https://registry.yarnpkg.com/encoding/-/encoding-0.1.13.tgz#56574afdd791f54a8e9b2785c0582a2d26210fa9"
integrity sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A== integrity sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==
@ -3321,6 +3339,11 @@ json-schema-traverse@^0.4.1:
resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660" resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz#69f6a87d9513ab8bb8fe63bdb0979c448e684660"
integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg== integrity sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==
json-schema-traverse@^1.0.0:
version "1.0.0"
resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz#ae7bcb3656ab77a73ba5c49bf654f38e6b6860e2"
integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==
json-stable-stringify-without-jsonify@^1.0.1: json-stable-stringify-without-jsonify@^1.0.1:
version "1.0.1" version "1.0.1"
resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651" resolved "https://registry.yarnpkg.com/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz#9db7b59496ad3f3cfef30a75142d2d930ad72651"
@ -4674,6 +4697,11 @@ require-directory@^2.1.1:
resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42" resolved "https://registry.yarnpkg.com/require-directory/-/require-directory-2.1.1.tgz#8c64ad5fd30dab1c976e2344ffe7f792a6a6df42"
integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q== integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==
require-from-string@^2.0.2:
version "2.0.2"
resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909"
integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==
resolve-from@^4.0.0: resolve-from@^4.0.0:
version "4.0.0" version "4.0.0"
resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6" resolved "https://registry.yarnpkg.com/resolve-from/-/resolve-from-4.0.0.tgz#4abcd852ad32dd7baabfe9b40e00a36db5f392e6"