From c61cbd1502e900ebb421bc532647d548f41162d5 Mon Sep 17 00:00:00 2001 From: Timothy Carambat <rambat1010@gmail.com> Date: Tue, 16 Jan 2024 14:58:49 -0800 Subject: [PATCH] Add support for fetching single document in documents folder (#607) --- .../ChatHistory/Citation/index.jsx | 1 - server/endpoints/api/document/index.js | 60 ++++++++++++++- server/swagger/openapi.json | 75 +++++++++++++++++++ server/utils/files/index.js | 38 ++++++++++ 4 files changed, 172 insertions(+), 2 deletions(-) diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx index c4bda294c..9af36fc5a 100644 --- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx +++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx @@ -157,7 +157,6 @@ function CitationDetailModal({ source, onClose }) { ); } - function truncateMiddle(title) { if (title.length <= 18) return title; diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js index a813e2df6..f1282e7c2 100644 --- a/server/endpoints/api/document/index.js +++ b/server/endpoints/api/document/index.js @@ -6,7 +6,10 @@ const { acceptedFileTypes, processDocument, } = require("../../../utils/files/documentProcessor"); -const { viewLocalFiles } = require("../../../utils/files"); +const { + viewLocalFiles, + findDocumentInDocuments, +} = require("../../../utils/files"); const { handleUploads } = setupMulter(); function apiDocumentEndpoints(app) { @@ -133,6 +136,61 @@ function apiDocumentEndpoints(app) { } }); + app.get("/v1/document/:docName", [validApiKey], async (request, response) => { + /* + #swagger.tags = ['Documents'] + #swagger.description = 'Get a single document by its unique AnythingLLM document name' + #swagger.parameters['docName'] = { + in: 'path', + description: 'Unique document name to find (name in /documents)', + required: true, + type: 'string' + } + #swagger.responses[200] = { + content: { + "application/json": { + schema: { + type: 'object', + example: { + "localFiles": { + "name": "documents", + "type": "folder", + items: [ + { + "name": "my-stored-document.txt-uuid1234.json", + "type": "file", + "id": "bb07c334-4dab-4419-9462-9d00065a49a1", + "url": "file://my-stored-document.txt", + "title": "my-stored-document.txt", + "cached": false + }, + ] + } + } + } + } + } + } + #swagger.responses[403] = { + schema: { + "$ref": "#/definitions/InvalidAPIKey" + } + } + */ + try { + const { docName } = request.params; + const document = await findDocumentInDocuments(docName); + if (!document) { + response.sendStatus(404).end(); + return; + } + response.status(200).json({ document }); + } catch (e) { + console.log(e.message, e); + response.sendStatus(500).end(); + } + }); + app.get( "/v1/document/accepted-file-types", [validApiKey], diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json index e7b07484a..7d91579fd 100644 --- a/server/swagger/openapi.json +++ b/server/swagger/openapi.json @@ -953,6 +953,81 @@ } } }, + "/v1/document/{docName}": { + "get": { + "tags": [ + "Documents" + ], + "description": "Get a single document by its unique AnythingLLM document name", + "parameters": [ + { + "name": "docName", + "in": "path", + "required": true, + "schema": { + "type": "string" + }, + "description": "Unique document name to find (name in /documents)" + }, + { + "name": "Authorization", + "in": "header", + "schema": { + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "OK", + "content": { + "application/json": { + "schema": { + "type": "object", + "example": { + "localFiles": { + "name": "documents", + "type": "folder", + "items": [ + { + "name": "my-stored-document.txt-uuid1234.json", + "type": "file", + "id": "bb07c334-4dab-4419-9462-9d00065a49a1", + "url": "file://my-stored-document.txt", + "title": "my-stored-document.txt", + "cached": false + } + ] + } + } + } + } + } + }, + "403": { + "description": "Forbidden", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InvalidAPIKey" + } + }, + "application/xml": { + "schema": { + "$ref": "#/components/schemas/InvalidAPIKey" + } + } + } + }, + "404": { + "description": "Not Found" + }, + "500": { + "description": "Internal Server Error" + } + } + } + }, "/v1/document/accepted-file-types": { "get": { "tags": [ diff --git a/server/utils/files/index.js b/server/utils/files/index.js index 2ff1d60cc..e713a318a 100644 --- a/server/utils/files/index.js +++ b/server/utils/files/index.js @@ -157,11 +157,49 @@ async function purgeVectorCache(filename = null) { return; } +// Search for a specific document by its unique name in the entire `documents` +// folder via iteration of all folders and checking if the expected file exists. +async function findDocumentInDocuments(documentName = null) { + if (!documentName) return null; + const documentsFolder = + process.env.NODE_ENV === "development" + ? path.resolve(__dirname, `../../storage/documents`) + : path.resolve(process.env.STORAGE_DIR, `documents`); + + for (const folder of fs.readdirSync(documentsFolder)) { + const isFolder = fs + .lstatSync(path.join(documentsFolder, folder)) + .isDirectory(); + if (!isFolder) continue; + + const targetFilename = normalizePath(documentName); + const targetFileLocation = path.join( + documentsFolder, + folder, + targetFilename + ); + if (!fs.existsSync(targetFileLocation)) continue; + + const fileData = fs.readFileSync(targetFileLocation, "utf8"); + const cachefilename = `${folder}/${targetFilename}`; + const { pageContent, ...metadata } = JSON.parse(fileData); + return { + name: targetFilename, + type: "file", + ...metadata, + cached: await cachedVectorInformation(cachefilename, true), + }; + } + + return null; +} + function normalizePath(filepath = "") { return path.normalize(filepath).replace(/^(\.\.(\/|\\|$))+/, ""); } module.exports = { + findDocumentInDocuments, cachedVectorInformation, viewLocalFiles, purgeSourceDocument,