GitHub loader extension + extension support v1 ()

* feat: implement github repo loading
fix: purge of folders
fix: rendering of sub-files

* noshow delete on custom-documents

* Add API key support because of rate limits

* WIP for frontend of data connectors

* wip

* Add frontend form for GitHub repo data connector

* remove console.logs
block custom-documents from being deleted

* remove _meta unused arg

* Add support for ignore pathing in request
Ignore path input via tagging

* Update hint
This commit is contained in:
Timothy Carambat 2023-12-18 15:48:02 -08:00 committed by GitHub
parent 2d700b13f6
commit 452582489e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
32 changed files with 975 additions and 128 deletions
collector
extensions
index.jspackage.json
utils/extensions/GithubRepo
yarn.lock
frontend
package.json
src
App.jsx
components
DataConnectorOption
Modals/MangeWorkspace/Documents/Directory
SettingsSidebar
Sidebar
index.css
models
pages/GeneralSettings/DataConnectors
utils
yarn.lock
server

View file

@ -0,0 +1,52 @@
const { reqBody } = require("../utils/http");
function extensions(app) {
if (!app) return;
app.post("/ext/github-repo", async function (request, response) {
try {
const loadGithubRepo = require("../utils/extensions/GithubRepo");
const { success, reason, data } = await loadGithubRepo(reqBody(request));
response.status(200).json({
success,
reason,
data
});
} catch (e) {
console.error(e);
response.status(200).json({
success: false,
reason: e.message || "A processing error occurred.",
data: {},
});
}
return;
});
// gets all branches for a specific repo
app.post("/ext/github-repo/branches", async function (request, response) {
try {
const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader");
const allBranches = await (new GithubRepoLoader(reqBody(request))).getRepoBranches()
response.status(200).json({
success: true,
reason: null,
data: {
branches: allBranches
}
});
} catch (e) {
console.error(e);
response.status(400).json({
success: false,
reason: e.message,
data: {
branches: []
}
});
}
return;
});
}
module.exports = extensions;

View file

@ -11,6 +11,7 @@ const { reqBody } = require("./utils/http");
const { processSingleFile } = require("./processSingleFile");
const { processLink } = require("./processLink");
const { wipeCollectorStorage } = require("./utils/files");
const extensions = require("./extensions");
const app = express();
app.use(cors({ origin: true }));
@ -57,6 +58,8 @@ app.post("/process-link", async function (request, response) {
return;
});
extensions(app);
app.get("/accepts", function (_, response) {
response.status(200).json(ACCEPTED_MIMES);
});

View file

@ -24,6 +24,7 @@
"express": "^4.18.2",
"extract-zip": "^2.0.1",
"fluent-ffmpeg": "^2.1.2",
"ignore": "^5.3.0",
"js-tiktoken": "^1.0.8",
"langchain": "0.0.201",
"mammoth": "^1.6.0",
@ -35,6 +36,7 @@
"pdf-parse": "^1.1.1",
"puppeteer": "^21.6.1",
"slugify": "^1.6.6",
"url-pattern": "^1.0.3",
"uuid": "^9.0.0",
"wavefile": "^11.0.0"
},
@ -42,4 +44,4 @@
"nodemon": "^2.0.22",
"prettier": "^2.4.1"
}
}
}

View file

@ -0,0 +1,149 @@
class RepoLoader {
constructor(args = {}) {
this.ready = false;
this.repo = args?.repo;
this.branch = args?.branch;
this.accessToken = args?.accessToken || null;
this.ignorePaths = args?.ignorePaths || [];
this.author = null;
this.project = null;
this.branches = [];
}
#validGithubUrl() {
const UrlPattern = require("url-pattern");
const pattern = new UrlPattern("https\\://github.com/(:author)/(:project)");
const match = pattern.match(this.repo);
if (!match) return false;
this.author = match.author;
this.project = match.project;
return true;
}
// Ensure the branch provided actually exists
// and if it does not or has not been set auto-assign to primary branch.
async #validBranch() {
await this.getRepoBranches();
if (!!this.branch && this.branches.includes(this.branch)) return;
console.log(
"[Github Loader]: Branch not set! Auto-assigning to a default branch."
);
this.branch = this.branches.includes("main") ? "main" : "master";
console.log(`[Github Loader]: Branch auto-assigned to ${this.branch}.`);
return;
}
async #validateAccessToken() {
if (!this.accessToken) return;
const valid = await fetch("https://api.github.com/octocat", {
method: "GET",
headers: {
Authorization: `Bearer ${this.accessToken}`,
"X-GitHub-Api-Version": "2022-11-28",
},
})
.then((res) => {
if (!res.ok) throw new Error(res.statusText);
return res.ok;
})
.catch((e) => {
console.error(
"Invalid Github Access Token provided! Access token will not be used",
e.message
);
return false;
});
if (!valid) this.accessToken = null;
return;
}
async init() {
if (!this.#validGithubUrl()) return;
await this.#validBranch();
await this.#validateAccessToken();
this.ready = true;
return this;
}
async recursiveLoader() {
if (!this.ready) throw new Error("[Github Loader]: not in ready state!");
const {
GithubRepoLoader: LCGithubLoader,
} = require("langchain/document_loaders/web/github");
if (this.accessToken)
console.log(
`[Github Loader]: Access token set! Recursive loading enabled!`
);
const loader = new LCGithubLoader(this.repo, {
accessToken: this.accessToken,
branch: this.branch,
recursive: !!this.accessToken, // Recursive will hit rate limits.
maxConcurrency: 5,
unknown: "ignore",
ignorePaths: this.ignorePaths,
});
const docs = [];
for await (const doc of loader.loadAsStream()) docs.push(doc);
return docs;
}
// Sort branches to always show either main or master at the top of the result.
#branchPrefSort(branches = []) {
const preferredSort = ["main", "master"];
return branches.reduce((acc, branch) => {
if (preferredSort.includes(branch)) return [branch, ...acc];
return [...acc, branch];
}, []);
}
// Get all branches for a given repo.
async getRepoBranches() {
if (!this.#validGithubUrl() || !this.author || !this.project) return [];
await this.#validateAccessToken(); // Ensure API access token is valid for pre-flight
let page = 0;
let polling = true;
const branches = [];
while (polling) {
console.log(`Fetching page ${page} of branches for ${this.project}`);
await fetch(
`https://api.github.com/repos/${this.author}/${this.project}/branches?per_page=100&page=${page}`,
{
method: "GET",
headers: {
...(this.accessToken
? { Authorization: `Bearer ${this.accessToken}` }
: {}),
"X-GitHub-Api-Version": "2022-11-28",
},
}
)
.then((res) => {
if (res.ok) return res.json();
throw new Error(`Invalid request to Github API: ${res.statusText}`);
})
.then((branchObjects) => {
polling = branchObjects.length > 0;
branches.push(branchObjects.map((branch) => branch.name));
page++;
})
.catch((err) => {
polling = false;
console.log(`RepoLoader.branches`, err);
});
}
this.branches = [...new Set(branches.flat())];
return this.#branchPrefSort(this.branches);
}
}
module.exports = RepoLoader;

View file

@ -0,0 +1,78 @@
const RepoLoader = require("./RepoLoader");
const fs = require("fs");
const path = require("path");
const { default: slugify } = require("slugify");
const { v4 } = require("uuid");
const { writeToServerDocuments } = require("../../files");
const { tokenizeString } = require("../../tokenizer");
async function loadGithubRepo(args) {
const repo = new RepoLoader(args);
await repo.init();
if (!repo.ready)
return {
success: false,
reason: "Could not prepare Github repo for loading! Check URL",
};
console.log(
`-- Working Github ${repo.author}/${repo.project}:${repo.branch} --`
);
const docs = await repo.recursiveLoader();
if (!docs.length) {
return {
success: false,
reason: "No files were found for those settings.",
};
}
console.log(`[Github Loader]: Found ${docs.length} source files. Saving...`);
const outFolder = slugify(
`${repo.author}-${repo.project}-${repo.branch}-${v4().slice(0, 4)}`
).toLowerCase();
const outFolderPath = path.resolve(
__dirname,
`../../../../server/storage/documents/${outFolder}`
);
fs.mkdirSync(outFolderPath);
for (const doc of docs) {
if (!doc.pageContent) continue;
const data = {
id: v4(),
url: "github://" + doc.metadata.source,
title: doc.metadata.source,
docAuthor: repo.author,
description: "No description found.",
docSource: repo.repo,
chunkSource: doc.metadata.source,
published: new Date().toLocaleString(),
wordCount: doc.pageContent.split(" ").length,
pageContent: doc.pageContent,
token_count_estimate: tokenizeString(doc.pageContent).length,
};
console.log(
`[Github Loader]: Saving ${doc.metadata.source} to ${outFolder}`
);
writeToServerDocuments(
data,
`${slugify(doc.metadata.source)}-${data.id}`,
outFolderPath
);
}
return {
success: true,
reason: null,
data: {
author: repo.author,
repo: repo.project,
branch: repo.branch,
files: docs.length,
destination: outFolder,
},
};
}
module.exports = loadGithubRepo;

View file

@ -1530,6 +1530,11 @@ ignore-by-default@^1.0.1:
resolved "https://registry.yarnpkg.com/ignore-by-default/-/ignore-by-default-1.0.1.tgz#48ca6d72f6c6a3af00a9ad4ae6876be3889e2b09"
integrity sha512-Ius2VYcGNk7T90CppJqcIkS5ooHUZyIQK+ClZfMfMNFEF9VSE73Fq+906u/CWu92x4gzZMWOwfFYckPObzdEbA==
ignore@^5.3.0:
version "5.3.0"
resolved "https://registry.yarnpkg.com/ignore/-/ignore-5.3.0.tgz#67418ae40d34d6999c95ff56016759c718c82f78"
integrity sha512-g7dmpshy+gD7mh88OC9NwSGTKoc3kyLAZQRU1mt53Aw/vnvfXnbC+F/7F7QoYVKbV+KNvJx8wArewKy1vXMtlg==
immediate@~3.0.5:
version "3.0.6"
resolved "https://registry.yarnpkg.com/immediate/-/immediate-3.0.6.tgz#9db1dbd0faf8de6fbe0f5dd5e56bb606280de69b"
@ -3127,6 +3132,11 @@ unpipe@1.0.0, unpipe@~1.0.0:
resolved "https://registry.yarnpkg.com/unpipe/-/unpipe-1.0.0.tgz#b2bf4ee8514aae6165b4817829d21b2ef49904ec"
integrity sha512-pjy2bYhSsufwWlKwPc+l3cN7+wuJlK6uz0YdJEOlQDbl6jo/YlPi4mb8agUkVC8BF7V8NuzeyPNqRksA3hztKQ==
url-pattern@^1.0.3:
version "1.0.3"
resolved "https://registry.yarnpkg.com/url-pattern/-/url-pattern-1.0.3.tgz#0409292471b24f23c50d65a47931793d2b5acfc1"
integrity sha512-uQcEj/2puA4aq1R3A2+VNVBgaWYR24FdWjl7VNW83rnWftlhyzOZ/tBjezRiC2UkIzuxC8Top3IekN3vUf1WxA==
url-template@^2.0.8:
version "2.0.8"
resolved "https://registry.yarnpkg.com/url-template/-/url-template-2.0.8.tgz#fc565a3cccbff7730c775f5641f9555791439f21"

View file

@ -27,6 +27,7 @@
"react-loading-icons": "^1.1.0",
"react-loading-skeleton": "^3.1.0",
"react-router-dom": "^6.3.0",
"react-tag-input-component": "^2.0.2",
"react-toastify": "^9.1.3",
"text-case": "^1.0.9",
"truncate": "^3.0.0",

View file

@ -36,6 +36,12 @@ const GeneralExportImport = lazy(() =>
import("@/pages/GeneralSettings/ExportImport")
);
const GeneralSecurity = lazy(() => import("@/pages/GeneralSettings/Security"));
const DataConnectors = lazy(() =>
import("@/pages/GeneralSettings/DataConnectors")
);
const DataConnectorSetup = lazy(() =>
import("@/pages/GeneralSettings/DataConnectors/Connectors")
);
const OnboardingFlow = lazy(() => import("@/pages/OnboardingFlow"));
export default function App() {
@ -103,6 +109,15 @@ export default function App() {
path="/settings/workspaces"
element={<ManagerRoute Component={AdminWorkspaces} />}
/>
<Route
path="/settings/data-connectors"
element={<ManagerRoute Component={DataConnectors} />}
/>
<Route
path="/settings/data-connectors/:connector"
element={<ManagerRoute Component={DataConnectorSetup} />}
/>
{/* Onboarding Flow */}
<Route path="/onboarding" element={<OnboardingFlow />} />
</Routes>

View file

@ -0,0 +1,39 @@
import paths from "@/utils/paths";
import ConnectorImages from "./media";
export default function DataConnectorOption({ slug }) {
if (!DATA_CONNECTORS.hasOwnProperty(slug)) return null;
const { path, image, name, description, link } = DATA_CONNECTORS[slug];
return (
<a href={path}>
<label className="transition-all duration-300 inline-flex flex-col h-full w-60 cursor-pointer items-start justify-between rounded-2xl bg-preference-gradient border-2 border-transparent shadow-md px-5 py-4 text-white hover:bg-selected-preference-gradient hover:border-white/60 peer-checked:border-white peer-checked:border-opacity-90 peer-checked:bg-selected-preference-gradient">
<div className="flex items-center">
<img src={image} alt={name} className="h-10 w-10 rounded" />
<div className="ml-4 text-sm font-semibold">{name}</div>
</div>
<div className="mt-2 text-xs font-base text-white tracking-wide">
{description}
</div>
<a
href={link}
target="_blank"
className="mt-2 text-xs text-white font-medium underline"
>
{link}
</a>
</label>
</a>
);
}
export const DATA_CONNECTORS = {
github: {
name: "GitHub Repo",
path: paths.settings.dataConnectors.github(),
image: ConnectorImages.github,
description:
"Import an entire public or private Github repository in a single click.",
link: "https://github.com",
},
};

Binary file not shown.

After

(image error) Size: 22 KiB

View file

@ -0,0 +1,5 @@
import Github from "./github.png";
const ConnectorImages = {
github: Github,
};
export default ConnectorImages;

View file

@ -33,7 +33,7 @@ export default function FileRow({
try {
setLoading(true);
setLoadingMessage("This may take a while for large documents");
await System.deleteDocument(`${folderName}/${item.name}`, item);
await System.deleteDocument(`${folderName}/${item.name}`);
await fetchKeys(true);
} catch (error) {
console.error("Failed to delete the document:", error);
@ -60,7 +60,7 @@ export default function FileRow({
selected ? "bg-sky-500/20" : ""
} ${expanded ? "bg-sky-500/10" : ""}`}`}
>
<div className="col-span-4 flex gap-x-[4px] items-center">
<div className="pl-4 col-span-4 flex gap-x-[4px] items-center">
<div
className="w-3 h-3 rounded border-[1px] border-white flex justify-center items-center cursor-pointer"
role="checkbox"

View file

@ -1,7 +1,8 @@
import { useState } from "react";
import FileRow from "../FileRow";
import { CaretDown, FolderNotch } from "@phosphor-icons/react";
import { CaretDown, FolderNotch, Trash } from "@phosphor-icons/react";
import { middleTruncate } from "@/utils/directories";
import System from "@/models/system";
export default function FolderRow({
item,
@ -12,8 +13,32 @@ export default function FolderRow({
fetchKeys,
setLoading,
setLoadingMessage,
autoExpanded = false,
}) {
const [expanded, setExpanded] = useState(true);
const [expanded, setExpanded] = useState(autoExpanded);
const onTrashClick = async (event) => {
event.stopPropagation();
if (
!window.confirm(
"Are you sure you want to delete this folder?\nThis will require you to re-upload and re-embed it.\nAny documents in this folder will be removed from any workspace that is currently referencing it.\nThis action is not reversible."
)
) {
return false;
}
try {
setLoading(true);
setLoadingMessage("This may take a while for large folders");
await System.deleteFolder(item.name);
await fetchKeys(true);
} catch (error) {
console.error("Failed to delete the document:", error);
}
if (selected) toggleSelection(item);
setLoading(false);
};
const handleExpandClick = (event) => {
event.stopPropagation();
@ -30,7 +55,7 @@ export default function FolderRow({
>
<div className="col-span-4 flex gap-x-[4px] items-center">
<div
className="w-3 h-3 rounded border-[1px] border-white flex justify-center items-center cursor-pointer"
className="shrink-0 w-3 h-3 rounded border-[1px] border-white flex justify-center items-center cursor-pointer"
role="checkbox"
aria-checked={selected}
tabIndex={0}
@ -46,7 +71,7 @@ export default function FolderRow({
<CaretDown className="text-base font-bold w-4 h-4" />
</div>
<FolderNotch
className="text-base font-bold w-4 h-4 mr-[3px]"
className="shrink-0 text-base font-bold w-4 h-4 mr-[3px]"
weight="fill"
/>
<p className="whitespace-nowrap overflow-show">
@ -56,7 +81,14 @@ export default function FolderRow({
<p className="col-span-2 pl-3.5" />
<p className="col-span-2 pl-3" />
<p className="col-span-2 pl-2" />
<div className="col-span-2 flex justify-end items-center" />
<div className="col-span-2 flex justify-end items-center">
{item.name !== "custom-documents" && (
<Trash
onClick={onTrashClick}
className="text-base font-bold w-4 h-4 ml-2 flex-shrink-0 cursor-pointer"
/>
)}
</div>
</div>
{expanded && (
<div className="col-span-full">

View file

@ -106,6 +106,7 @@ export default function Directory({
isSelected={isSelected}
setLoading={setLoading}
setLoadingMessage={setLoadingMessage}
autoExpanded={index === 0}
/>
)
)

View file

@ -22,6 +22,7 @@ import {
X,
List,
FileCode,
Plugs,
} from "@phosphor-icons/react";
import useUser from "@/hooks/useUser";
import { USER_BACKGROUND_COLOR } from "@/utils/constants";
@ -127,6 +128,11 @@ export default function SettingsSidebar() {
btnText="Vector Database"
icon={<Database className="h-5 w-5 flex-shrink-0" />}
/>
<Option
href={paths.settings.dataConnectors.list()}
btnText="Data Connectors"
icon={<Plugs className="h-5 w-5 flex-shrink-0" />}
/>
</>
)}

View file

@ -1,34 +0,0 @@
import pluralize from "pluralize";
import React, { useEffect, useState } from "react";
import System from "@/models/system";
import { numberWithCommas } from "@/utils/numbers";
export default function IndexCount() {
const [indexes, setIndexes] = useState(null);
useEffect(() => {
async function indexCount() {
setIndexes(await System.totalIndexes());
}
indexCount();
}, []);
if (indexes === null || indexes === 0) {
return (
<div className="flex w-full items-center justify-end gap-x-2">
<div className="flex items-center gap-x-1 px-2 rounded-full">
<p className="text-slate-400 leading-tight text-sm"></p>
</div>
</div>
);
}
return (
<div className="flex w-full items-center justify-end gap-x-2">
<div className="flex items-center gap-x-1 px-2 rounded-full">
<p className="text-slate-400 leading-tight text-sm">
{numberWithCommas(indexes)} {pluralize("vector", indexes)}
</p>
</div>
</div>
);
}

View file

@ -1,49 +0,0 @@
import React, { useEffect, useState } from "react";
import { WarningCircle, Circle } from "@phosphor-icons/react";
import System from "@/models/system";
export default function LLMStatus() {
const [status, setStatus] = useState(null);
useEffect(() => {
async function checkPing() {
setStatus(await System.ping());
}
checkPing();
}, []);
if (status === null) {
return (
<div className="flex w-full items-center justify-start gap-x-2">
<p className="text-slate-400 leading-loose text-sm">LLM</p>
<div className="flex items-center gap-x-1 border border-slate-400 px-2 rounded-full">
<p className="text-slate-400 leading-tight text-sm">unknown</p>
<Circle className="h-3 w-3 stroke-slate-700 fill-slate-400 animate-pulse" />
</div>
</div>
);
}
// TODO: add modal or toast on click to identify why this is broken
// need to likely start server.
if (status === false) {
return (
<div className="flex w-full items-center justify-end gap-x-2">
<p className="text-slate-400 leading-loose text-sm">LLM</p>
<div className="flex items-center gap-x-1 border border-red-400 px-2 bg-red-200 rounded-full">
<p className="text-red-700 leading-tight text-sm">offline</p>
<WarningCircle className="h-3 w-3 stroke-red-100 fill-red-400" />
</div>
</div>
);
}
return (
<div className="flex w-full items-center justify-end gap-x-2">
<p className="text-slate-400 leading-loose text-sm">LLM</p>
<div className="flex items-center gap-x-1 border border-slate-400 px-2 rounded-full">
<p className="text-slate-400 leading-tight text-sm">online</p>
<Circle className="h-3 w-3 stroke-green-100 fill-green-400 animate-pulse" />
</div>
</div>
);
}

View file

@ -71,25 +71,6 @@ export default function Sidebar() {
<ActiveWorkspaces />
</div>
<div className="flex flex-col flex-grow justify-end mb-2">
{/* <div className="flex flex-col gap-y-2">
<div className="w-full flex items-center justify-between">
<LLMStatus />
<IndexCount />
</div>
<a
href={paths.feedback()}
target="_blank"
className="flex flex-grow w-[100%] h-[36px] gap-x-2 py-[5px] px-4 border border-transparent rounded-lg text-slate-200 justify-center items-center bg-stone-800 hover:bg-stone-900"
>
<AtSign className="h-4 w-4" />
<p className="text-slate-200 text-xs leading-loose font-semibold">
Feedback form
</p>
</a>
<ManagedHosting />
<LogoutButton />
</div> */}
{/* Footer */}
<div className="flex justify-center mt-2">
<div className="flex space-x-4">

View file

@ -385,3 +385,7 @@ dialog::backdrop {
@apply border-blue-500 bg-blue-400/10 text-blue-800;
}
}
.rti--container {
@apply !bg-zinc-900 !text-white !placeholder-white !placeholder-opacity-60 !text-sm !rounded-lg !p-2.5;
}

View file

@ -0,0 +1,47 @@
import { API_BASE } from "@/utils/constants";
import { baseHeaders } from "@/utils/request";
import showToast from "@/utils/toast";
const DataConnector = {
github: {
branches: async ({ repo, accessToken }) => {
return await fetch(`${API_BASE}/ext/github/branches`, {
method: "POST",
headers: baseHeaders(),
cache: "force-cache",
body: JSON.stringify({ repo, accessToken }),
})
.then((res) => res.json())
.then((res) => {
if (!res.success) throw new Error(res.reason);
return res.data;
})
.then((data) => {
return { branches: data?.branches || [], error: null };
})
.catch((e) => {
console.error(e);
showToast(e.message, "error");
return { branches: [], error: e.message };
});
},
collect: async function ({ repo, accessToken, branch, ignorePaths = [] }) {
return await fetch(`${API_BASE}/ext/github/repo`, {
method: "POST",
headers: baseHeaders(),
body: JSON.stringify({ repo, accessToken, branch, ignorePaths }),
})
.then((res) => res.json())
.then((res) => {
if (!res.success) throw new Error(res.reason);
return { data: res.data, error: null };
})
.catch((e) => {
console.error(e);
return { data: null, error: e.message };
});
},
},
};
export default DataConnector;

View file

@ -1,5 +1,6 @@
import { API_BASE, AUTH_TIMESTAMP } from "@/utils/constants";
import { baseHeaders } from "@/utils/request";
import DataConnector from "./dataConnector";
const System = {
ping: async function () {
@ -133,11 +134,23 @@ const System = {
return false;
});
},
deleteDocument: async (name, meta) => {
deleteDocument: async (name) => {
return await fetch(`${API_BASE}/system/remove-document`, {
method: "DELETE",
headers: baseHeaders(),
body: JSON.stringify({ name, meta }),
body: JSON.stringify({ name }),
})
.then((res) => res.ok)
.catch((e) => {
console.error(e);
return false;
});
},
deleteFolder: async (name) => {
return await fetch(`${API_BASE}/system/remove-folder`, {
method: "DELETE",
headers: baseHeaders(),
body: JSON.stringify({ name }),
})
.then((res) => res.ok)
.catch((e) => {
@ -431,6 +444,7 @@ const System = {
return { success: false, error: e.message };
});
},
dataConnectors: DataConnector,
};
export default System;

View file

@ -0,0 +1,294 @@
import React, { useEffect, useState } from "react";
import Sidebar, { SidebarMobileHeader } from "@/components/SettingsSidebar";
import { isMobile } from "react-device-detect";
import { DATA_CONNECTORS } from "@/components/DataConnectorOption";
import System from "@/models/system";
import { Info } from "@phosphor-icons/react/dist/ssr";
import showToast from "@/utils/toast";
import pluralize from "pluralize";
import { TagsInput } from "react-tag-input-component";
const DEFAULT_BRANCHES = ["main", "master"];
export default function GithubConnectorSetup() {
const { image } = DATA_CONNECTORS.github;
const [loading, setLoading] = useState(false);
const [repo, setRepo] = useState(null);
const [accessToken, setAccessToken] = useState(null);
const [ignores, setIgnores] = useState([]);
const [settings, setSettings] = useState({
repo: null,
accessToken: null,
});
const handleSubmit = async (e) => {
e.preventDefault();
const form = new FormData(e.target);
try {
setLoading(true);
showToast(
"Fetching all files for repo - this may take a while.",
"info",
{ clear: true, autoClose: false }
);
const { data, error } = await System.dataConnectors.github.collect({
repo: form.get("repo"),
accessToken: form.get("accessToken"),
branch: form.get("branch"),
ignorePaths: ignores,
});
if (!!error) {
showToast(error, "error", { clear: true });
setLoading(false);
return;
}
showToast(
`${data.files} ${pluralize("file", data.files)} collected from ${
data.author
}/${data.repo}:${data.branch}. Output folder is ${data.destination}.`,
"success",
{ clear: true }
);
e.target.reset();
setLoading(false);
return;
} catch (e) {
console.error(e);
showToast(e.message, "error", { clear: true });
setLoading(false);
}
};
return (
<div className="w-screen h-screen overflow-hidden bg-sidebar flex">
{!isMobile && <Sidebar />}
<div
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[26px] bg-main-gradient w-full h-full overflow-y-scroll border-4 border-accent"
>
{isMobile && <SidebarMobileHeader />}
<div className="flex w-full">
<div className="flex flex-col w-full px-1 md:px-20 md:py-12 py-16">
<div className="flex w-full gap-x-4 items-center pb-6 border-white border-b-2 border-opacity-10">
<img src={image} alt="Github" className="rounded-lg h-16 w-16" />
<div className="w-full flex flex-col gap-y-1">
<div className="items-center flex gap-x-4">
<p className="text-2xl font-semibold text-white">
Import GitHub Repository
</p>
</div>
<p className="text-sm font-base text-white text-opacity-60">
Import all files from a public or private Github repository
and have its files be available in your workspace.
</p>
</div>
</div>
<form className="w-full" onSubmit={handleSubmit}>
{!accessToken && (
<div className="flex flex-col gap-y-1 py-4 ">
<div className="flex flex-col w-fit gap-y-2 bg-blue-600/20 rounded-lg px-4 py-2">
<div className="flex items-center gap-x-2">
<Info size={20} className="shrink-0 text-blue-400" />
<p className="text-blue-400 text-sm">
Trying to collect a GitHub repo without a{" "}
<a
href="https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens"
rel="noreferrer"
target="_blank"
className="underline"
>
Personal Access Token
</a>{" "}
will fail to collect all files due to GitHub API limits.
</p>
</div>
<a
href="https://github.com/settings/personal-access-tokens/new"
rel="noreferrer"
target="_blank"
className="text-blue-400 hover:underline"
>
Create a temporary Access Token for this data connector
&rarr;
</a>
</div>
</div>
)}
<div className="w-full flex flex-col py-2">
<div className="w-full flex items-center gap-4">
<div className="flex flex-col w-60">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-semibold block">
GitHub Repo URL
</label>
<p className="text-xs text-zinc-300">
Url of the GitHub repo you wish to collect.
</p>
</div>
<input
type="url"
name="repo"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="https://github.com/Mintplex-Labs/anything-llm"
required={true}
autoComplete="off"
onChange={(e) => setRepo(e.target.value)}
onBlur={() => setSettings({ ...settings, repo })}
spellCheck={false}
/>
</div>
<div className="flex flex-col w-60">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm block flex gap-x-2 items-center">
<p className="font-semibold ">Github Access Token</p>{" "}
<p className="text-xs text-zinc-300 font-base!">
<i>optional</i>
</p>
</label>
<p className="text-xs text-zinc-300 flex gap-x-2">
Access Token to prevent rate limiting.
</p>
</div>
<input
type="text"
name="accessToken"
className="bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white block w-full p-2.5"
placeholder="github_pat_1234_abcdefg"
required={false}
autoComplete="off"
spellCheck={false}
onChange={(e) => setAccessToken(e.target.value)}
onBlur={() => setSettings({ ...settings, accessToken })}
/>
</div>
<GitHubBranchSelection
repo={settings.repo}
accessToken={settings.accessToken}
/>
</div>
<div className="flex flex-col w-1/2 py-4">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm block flex gap-x-2 items-center">
<p className="font-semibold ">File Ignores</p>
</label>
<p className="text-xs text-zinc-300 flex gap-x-2">
List in .gitignore format to ignore specific files during
collection. Press enter after each entry you want to save.
</p>
</div>
<TagsInput
value={ignores}
onChange={setIgnores}
name="ignores"
placeholder="!*.js, images/*, .DS_Store, bin/*"
classNames={{
tag: "bg-blue-300/10 text-zinc-800 m-1",
input:
"flex bg-zinc-900 text-white placeholder-white placeholder-opacity-60 text-sm rounded-lg focus:border-white p-2.5",
}}
/>
</div>
</div>
<div className="flex flex-col gap-y-2 w-fit">
<button
type="submit"
disabled={loading}
className="mt-2 text-lg w-fit border border-slate-200 px-4 py-1 rounded-lg text-slate-200 items-center flex gap-x-2 hover:bg-slate-200 hover:text-slate-800 disabled:bg-slate-200 disabled:text-slate-800"
>
{loading
? "Collecting files..."
: "Collect all files from GitHub repo"}
</button>
{loading && (
<p className="text-xs text-zinc-300">
Once complete, all files will be available for embedding
into workspaces in the document picker.
</p>
)}
</div>
</form>
</div>
</div>
</div>
</div>
);
}
function GitHubBranchSelection({ repo, accessToken }) {
const [allBranches, setAllBranches] = useState(DEFAULT_BRANCHES);
const [loading, setLoading] = useState(true);
useEffect(() => {
async function fetchAllBranches() {
if (!repo) {
setAllBranches(DEFAULT_BRANCHES);
setLoading(false);
return;
}
setLoading(true);
const { branches } = await System.dataConnectors.github.branches({
repo,
accessToken,
});
setAllBranches(branches.length > 0 ? branches : DEFAULT_BRANCHES);
setLoading(false);
}
fetchAllBranches();
}, [repo, accessToken]);
if (loading) {
return (
<div className="flex flex-col w-60">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-semibold block">
Branch
</label>
<p className="text-xs text-zinc-300">
Branch you wish to collect files of
</p>
</div>
<select
name="branch"
required={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
<option disabled={true} selected={true}>
-- loading available models --
</option>
</select>
</div>
);
}
return (
<div className="flex flex-col w-60">
<div className="flex flex-col gap-y-1 mb-4">
<label className="text-white text-sm font-semibold block">Branch</label>
<p className="text-xs text-zinc-300">
Branch you wish to collect files of
</p>
</div>
<select
name="branch"
required={true}
className="bg-zinc-900 border border-gray-500 text-white text-sm rounded-lg block w-full p-2.5"
>
{allBranches.map((branch) => {
return (
<option key={branch} value={branch}>
{branch}
</option>
);
})}
</select>
</div>
);
}

View file

@ -0,0 +1,19 @@
import paths from "@/utils/paths";
import { lazy } from "react";
import { useParams } from "react-router-dom";
const Github = lazy(() => import("./Github"));
const CONNECTORS = {
github: Github,
};
export default function DataConnectorSetup() {
const { connector } = useParams();
if (!connector || !CONNECTORS.hasOwnProperty(connector)) {
window.location = paths.home();
return;
}
const Page = CONNECTORS[connector];
return <Page />;
}

View file

@ -0,0 +1,38 @@
import React from "react";
import Sidebar, { SidebarMobileHeader } from "@/components/SettingsSidebar";
import { isMobile } from "react-device-detect";
import DataConnectorOption from "@/components/DataConnectorOption";
export default function DataConnectors() {
return (
<div className="w-screen h-screen overflow-hidden bg-sidebar flex">
{!isMobile && <Sidebar />}
<div
style={{ height: isMobile ? "100%" : "calc(100% - 32px)" }}
className="relative md:ml-[2px] md:mr-[16px] md:my-[16px] md:rounded-[26px] bg-main-gradient w-full h-full overflow-y-scroll border-4 border-accent"
>
{isMobile && <SidebarMobileHeader />}
<div className="flex w-full">
<div className="flex flex-col w-full px-1 md:px-20 md:py-12 py-16">
<div className="w-full flex flex-col gap-y-1 pb-6 border-white border-b-2 border-opacity-10">
<div className="items-center flex gap-x-4">
<p className="text-2xl font-semibold text-white">
Data Connectors
</p>
</div>
<p className="text-sm font-base text-white text-opacity-60">
Verified data connectors allow you to add more content to your
AnythingLLM workspaces with no custom code or complexity.
<br />
Guaranteed to work with your AnythingLLM instance.
</p>
</div>
<div className="py-4 w-full flex md:flex-wrap overflow-x-scroll gap-4 max-w-full">
<DataConnectorOption slug="github" />
</div>
</div>
</div>
</div>
</div>
);
}

View file

@ -76,5 +76,13 @@ export default {
apiKeys: () => {
return "/settings/api-keys";
},
dataConnectors: {
list: () => {
return "/settings/data-connectors";
},
github: () => {
return "/settings/data-connectors/github";
},
},
},
};

View file

@ -2061,6 +2061,11 @@ react-router@6.12.1:
dependencies:
"@remix-run/router" "1.6.3"
react-tag-input-component@^2.0.2:
version "2.0.2"
resolved "https://registry.yarnpkg.com/react-tag-input-component/-/react-tag-input-component-2.0.2.tgz#f62f013c6a535141dd1c6c3a88858223170150f1"
integrity sha512-dydI9luVwwv9vrjE5u1TTnkcOVkOVL6mhFti8r6hLi78V2F2EKWQOLptURz79UYbDHLSk6tnbvGl8FE+sMpADg==
react-toastify@^9.1.3:
version "9.1.3"
resolved "https://registry.yarnpkg.com/react-toastify/-/react-toastify-9.1.3.tgz#1e798d260d606f50e0fab5ee31daaae1d628c5ff"

View file

@ -0,0 +1,53 @@
const { Telemetry } = require("../../models/telemetry");
const {
forwardExtensionRequest,
} = require("../../utils/files/documentProcessor");
const {
flexUserRoleValid,
} = require("../../utils/middleware/multiUserProtected");
const { validatedRequest } = require("../../utils/middleware/validatedRequest");
function extensionEndpoints(app) {
if (!app) return;
app.post(
"/ext/github/branches",
[validatedRequest, flexUserRoleValid],
async (request, response) => {
try {
const responseFromProcessor = await forwardExtensionRequest({
endpoint: "/ext/github-repo/branches",
method: "POST",
body: request.body,
});
response.status(200).json(responseFromProcessor);
} catch (e) {
console.error(e);
response.sendStatus(500).end();
}
}
);
app.post(
"/ext/github/repo",
[validatedRequest, flexUserRoleValid],
async (request, response) => {
try {
const responseFromProcessor = await forwardExtensionRequest({
endpoint: "/ext/github-repo",
method: "POST",
body: request.body,
});
await Telemetry.sendTelemetry("extension_invoked", {
type: "github_repo",
});
response.status(200).json(responseFromProcessor);
} catch (e) {
console.error(e);
response.sendStatus(500).end();
}
}
);
}
module.exports = { extensionEndpoints };

View file

@ -7,7 +7,7 @@ const {
checkProcessorAlive,
acceptedFileTypes,
} = require("../utils/files/documentProcessor");
const { purgeDocument } = require("../utils/files/purgeDocument");
const { purgeDocument, purgeFolder } = require("../utils/files/purgeDocument");
const { getVectorDbClass } = require("../utils/helpers");
const { updateENV, dumpENV } = require("../utils/helpers/updateENV");
const {
@ -196,8 +196,23 @@ function systemEndpoints(app) {
[validatedRequest],
async (request, response) => {
try {
const { name, meta } = reqBody(request);
await purgeDocument(name, meta);
const { name } = reqBody(request);
await purgeDocument(name);
response.sendStatus(200).end();
} catch (e) {
console.log(e.message, e);
response.sendStatus(500).end();
}
}
);
app.delete(
"/system/remove-folder",
[validatedRequest],
async (request, response) => {
try {
const { name } = reqBody(request);
await purgeFolder(name);
response.sendStatus(200).end();
} catch (e) {
console.log(e.message, e);

View file

@ -18,6 +18,7 @@ const { utilEndpoints } = require("./endpoints/utils");
const { Telemetry } = require("./models/telemetry");
const { developerEndpoints } = require("./endpoints/api");
const setupTelemetry = require("./utils/telemetry");
const { extensionEndpoints } = require("./endpoints/extensions");
const app = express();
const apiRouter = express.Router();
const FILE_LIMIT = "3GB";
@ -34,6 +35,7 @@ app.use(
app.use("/api", apiRouter);
systemEndpoints(apiRouter);
extensionEndpoints(apiRouter);
workspaceEndpoints(apiRouter);
chatEndpoints(apiRouter);
adminEndpoints(apiRouter);

View file

@ -59,9 +59,32 @@ async function processLink(link = "") {
});
}
// We will not ever expose the document processor to the frontend API so instead we relay
// all requests through the server. You can use this function to directly expose a specific endpoint
// on the document processor.
async function forwardExtensionRequest({ endpoint, method, body }) {
return await fetch(`${PROCESSOR_API}${endpoint}`, {
method,
body, // Stringified JSON!
headers: {
"Content-Type": "application/json",
},
})
.then((res) => {
if (!res.ok) throw new Error("Response could not be completed");
return res.json();
})
.then((res) => res)
.catch((e) => {
console.log(e.message);
return { success: false, data: {}, reason: e.message };
});
}
module.exports = {
checkProcessorAlive,
processDocument,
processLink,
acceptedFileTypes,
forwardExtensionRequest,
};

View file

@ -144,18 +144,14 @@ async function storeVectorResult(vectorData = [], filename = null) {
// Purges a file from the documents/ folder.
async function purgeSourceDocument(filename = null) {
if (!filename) return;
console.log(`Purging document of ${filename}.`);
console.log(`Purging source document of ${filename}.`);
const filePath =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../storage/documents`, filename)
: path.resolve(process.env.STORAGE_DIR, `documents`, filename);
if (!fs.existsSync(filePath)) {
console.log(`Could not located cachefile for ${filename}`, filePath);
return;
}
if (!fs.existsSync(filePath)) return;
fs.rmSync(filePath);
return;
}
@ -163,7 +159,7 @@ async function purgeSourceDocument(filename = null) {
// Purges a vector-cache file from the vector-cache/ folder.
async function purgeVectorCache(filename = null) {
if (!filename) return;
console.log(`Purging cached vectorized results of ${filename}.`);
console.log(`Purging vector-cache of ${filename}.`);
const digest = uuidv5(filename, uuidv5.URL);
const filePath =
@ -171,11 +167,7 @@ async function purgeVectorCache(filename = null) {
? path.resolve(__dirname, `../../storage/vector-cache`, `${digest}.json`)
: path.resolve(process.env.STORAGE_DIR, `vector-cache`, `${digest}.json`);
if (!fs.existsSync(filePath)) {
console.log(`Could not located cache file for ${filename}`, filePath);
return;
}
if (!fs.existsSync(filePath)) return;
fs.rmSync(filePath);
return;
}

View file

@ -1,8 +1,11 @@
const fs = require("fs");
const path = require("path");
const { purgeVectorCache, purgeSourceDocument } = require(".");
const { Document } = require("../../models/documents");
const { Workspace } = require("../../models/workspace");
async function purgeDocument(filename, meta) {
async function purgeDocument(filename) {
const workspaces = await Workspace.where();
for (const workspace of workspaces) {
await Document.removeDocuments(workspace, [filename]);
@ -12,6 +15,45 @@ async function purgeDocument(filename, meta) {
return;
}
async function purgeFolder(folderName) {
if (folderName === "custom-documents") return;
const documentsFolder =
process.env.NODE_ENV === "development"
? path.resolve(__dirname, `../../storage/documents`)
: path.resolve(process.env.STORAGE_DIR, `documents`);
const folderPath = path.resolve(documentsFolder, folderName);
const filenames = fs
.readdirSync(folderPath)
.map((file) => path.join(folderName, file));
const workspaces = await Workspace.where();
const purgePromises = [];
// Remove associated Vector-cache files
for (const filename of filenames) {
const rmVectorCache = () =>
new Promise((resolve) =>
purgeVectorCache(filename).then(() => resolve(true))
);
purgePromises.push(rmVectorCache);
}
// Remove workspace document associations
for (const workspace of workspaces) {
const rmWorkspaceDoc = () =>
new Promise((resolve) =>
Document.removeDocuments(workspace, filenames).then(() => resolve(true))
);
purgePromises.push(rmWorkspaceDoc);
}
await Promise.all(purgePromises.flat().map((f) => f()));
fs.rmSync(folderPath, { recursive: true }); // Delete root document and source files.
return;
}
module.exports = {
purgeDocument,
purgeFolder,
};