From c61cbd1502e900ebb421bc532647d548f41162d5 Mon Sep 17 00:00:00 2001
From: Timothy Carambat <rambat1010@gmail.com>
Date: Tue, 16 Jan 2024 14:58:49 -0800
Subject: [PATCH] Add support for fetching single document in documents folder
 (#607)

---
 .../ChatHistory/Citation/index.jsx            |  1 -
 server/endpoints/api/document/index.js        | 60 ++++++++++++++-
 server/swagger/openapi.json                   | 75 +++++++++++++++++++
 server/utils/files/index.js                   | 38 ++++++++++
 4 files changed, 172 insertions(+), 2 deletions(-)

diff --git a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx
index c4bda294c..9af36fc5a 100644
--- a/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx
+++ b/frontend/src/components/WorkspaceChat/ChatContainer/ChatHistory/Citation/index.jsx
@@ -157,7 +157,6 @@ function CitationDetailModal({ source, onClose }) {
   );
 }
 
-
 function truncateMiddle(title) {
   if (title.length <= 18) return title;
 
diff --git a/server/endpoints/api/document/index.js b/server/endpoints/api/document/index.js
index a813e2df6..f1282e7c2 100644
--- a/server/endpoints/api/document/index.js
+++ b/server/endpoints/api/document/index.js
@@ -6,7 +6,10 @@ const {
   acceptedFileTypes,
   processDocument,
 } = require("../../../utils/files/documentProcessor");
-const { viewLocalFiles } = require("../../../utils/files");
+const {
+  viewLocalFiles,
+  findDocumentInDocuments,
+} = require("../../../utils/files");
 const { handleUploads } = setupMulter();
 
 function apiDocumentEndpoints(app) {
@@ -133,6 +136,61 @@ function apiDocumentEndpoints(app) {
     }
   });
 
+  app.get("/v1/document/:docName", [validApiKey], async (request, response) => {
+    /* 
+    #swagger.tags = ['Documents']
+    #swagger.description = 'Get a single document by its unique AnythingLLM document name'
+    #swagger.parameters['docName'] = {
+        in: 'path',
+        description: 'Unique document name to find (name in /documents)',
+        required: true,
+        type: 'string'
+    }
+    #swagger.responses[200] = {
+      content: {
+        "application/json": {
+          schema: {
+            type: 'object',
+            example: {
+             "localFiles": {
+              "name": "documents",
+              "type": "folder",
+              items: [
+                {
+                  "name": "my-stored-document.txt-uuid1234.json",
+                  "type": "file",
+                  "id": "bb07c334-4dab-4419-9462-9d00065a49a1",
+                  "url": "file://my-stored-document.txt",
+                  "title": "my-stored-document.txt",
+                  "cached": false
+                },
+              ]
+             }
+            }
+          }
+        }           
+      }
+    }  
+    #swagger.responses[403] = {
+      schema: {
+        "$ref": "#/definitions/InvalidAPIKey"
+      }
+    }
+    */
+    try {
+      const { docName } = request.params;
+      const document = await findDocumentInDocuments(docName);
+      if (!document) {
+        response.sendStatus(404).end();
+        return;
+      }
+      response.status(200).json({ document });
+    } catch (e) {
+      console.log(e.message, e);
+      response.sendStatus(500).end();
+    }
+  });
+
   app.get(
     "/v1/document/accepted-file-types",
     [validApiKey],
diff --git a/server/swagger/openapi.json b/server/swagger/openapi.json
index e7b07484a..7d91579fd 100644
--- a/server/swagger/openapi.json
+++ b/server/swagger/openapi.json
@@ -953,6 +953,81 @@
         }
       }
     },
+    "/v1/document/{docName}": {
+      "get": {
+        "tags": [
+          "Documents"
+        ],
+        "description": "Get a single document by its unique AnythingLLM document name",
+        "parameters": [
+          {
+            "name": "docName",
+            "in": "path",
+            "required": true,
+            "schema": {
+              "type": "string"
+            },
+            "description": "Unique document name to find (name in /documents)"
+          },
+          {
+            "name": "Authorization",
+            "in": "header",
+            "schema": {
+              "type": "string"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "OK",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "type": "object",
+                  "example": {
+                    "localFiles": {
+                      "name": "documents",
+                      "type": "folder",
+                      "items": [
+                        {
+                          "name": "my-stored-document.txt-uuid1234.json",
+                          "type": "file",
+                          "id": "bb07c334-4dab-4419-9462-9d00065a49a1",
+                          "url": "file://my-stored-document.txt",
+                          "title": "my-stored-document.txt",
+                          "cached": false
+                        }
+                      ]
+                    }
+                  }
+                }
+              }
+            }
+          },
+          "403": {
+            "description": "Forbidden",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/InvalidAPIKey"
+                }
+              },
+              "application/xml": {
+                "schema": {
+                  "$ref": "#/components/schemas/InvalidAPIKey"
+                }
+              }
+            }
+          },
+          "404": {
+            "description": "Not Found"
+          },
+          "500": {
+            "description": "Internal Server Error"
+          }
+        }
+      }
+    },
     "/v1/document/accepted-file-types": {
       "get": {
         "tags": [
diff --git a/server/utils/files/index.js b/server/utils/files/index.js
index 2ff1d60cc..e713a318a 100644
--- a/server/utils/files/index.js
+++ b/server/utils/files/index.js
@@ -157,11 +157,49 @@ async function purgeVectorCache(filename = null) {
   return;
 }
 
+// Search for a specific document by its unique name in the entire `documents`
+// folder via iteration of all folders and checking if the expected file exists.
+async function findDocumentInDocuments(documentName = null) {
+  if (!documentName) return null;
+  const documentsFolder =
+    process.env.NODE_ENV === "development"
+      ? path.resolve(__dirname, `../../storage/documents`)
+      : path.resolve(process.env.STORAGE_DIR, `documents`);
+
+  for (const folder of fs.readdirSync(documentsFolder)) {
+    const isFolder = fs
+      .lstatSync(path.join(documentsFolder, folder))
+      .isDirectory();
+    if (!isFolder) continue;
+
+    const targetFilename = normalizePath(documentName);
+    const targetFileLocation = path.join(
+      documentsFolder,
+      folder,
+      targetFilename
+    );
+    if (!fs.existsSync(targetFileLocation)) continue;
+
+    const fileData = fs.readFileSync(targetFileLocation, "utf8");
+    const cachefilename = `${folder}/${targetFilename}`;
+    const { pageContent, ...metadata } = JSON.parse(fileData);
+    return {
+      name: targetFilename,
+      type: "file",
+      ...metadata,
+      cached: await cachedVectorInformation(cachefilename, true),
+    };
+  }
+
+  return null;
+}
+
 function normalizePath(filepath = "") {
   return path.normalize(filepath).replace(/^(\.\.(\/|\\|$))+/, "");
 }
 
 module.exports = {
+  findDocumentInDocuments,
   cachedVectorInformation,
   viewLocalFiles,
   purgeSourceDocument,