mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-05-02 09:03:12 +00:00
[FEAT] Confluence data connector (#1181)
* WIP Confluence data connector backend * confluence data connector complete * confluence citations * fix citation for confluence * Patch confulence integration * fix Citation Icon for confluence --------- Co-authored-by: timothycarambat <rambat1010@gmail.com>
This commit is contained in:
parent
11f6419c3c
commit
348b36bf85
11 changed files with 458 additions and 68 deletions
collector
frontend/src
components
DataConnectorOption/media
Modals/MangeWorkspace/DataConnectors
WorkspaceChat/ChatContainer/ChatHistory/Citation
media/dataConnectors
models
server/endpoints/extensions
|
@ -4,69 +4,112 @@ const { reqBody } = require("../utils/http");
|
|||
function extensions(app) {
|
||||
if (!app) return;
|
||||
|
||||
app.post("/ext/github-repo", [verifyPayloadIntegrity], async function (request, response) {
|
||||
try {
|
||||
const loadGithubRepo = require("../utils/extensions/GithubRepo");
|
||||
const { success, reason, data } = await loadGithubRepo(reqBody(request));
|
||||
response.status(200).json({
|
||||
success,
|
||||
reason,
|
||||
data
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.status(200).json({
|
||||
success: false,
|
||||
reason: e.message || "A processing error occurred.",
|
||||
data: {},
|
||||
});
|
||||
app.post(
|
||||
"/ext/github-repo",
|
||||
[verifyPayloadIntegrity],
|
||||
async function (request, response) {
|
||||
try {
|
||||
const loadGithubRepo = require("../utils/extensions/GithubRepo");
|
||||
const { success, reason, data } = await loadGithubRepo(
|
||||
reqBody(request)
|
||||
);
|
||||
response.status(200).json({
|
||||
success,
|
||||
reason,
|
||||
data,
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.status(200).json({
|
||||
success: false,
|
||||
reason: e.message || "A processing error occurred.",
|
||||
data: {},
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
return;
|
||||
});
|
||||
);
|
||||
|
||||
// gets all branches for a specific repo
|
||||
app.post("/ext/github-repo/branches", [verifyPayloadIntegrity], async function (request, response) {
|
||||
try {
|
||||
const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader");
|
||||
const allBranches = await (new GithubRepoLoader(reqBody(request))).getRepoBranches()
|
||||
response.status(200).json({
|
||||
success: true,
|
||||
reason: null,
|
||||
data: {
|
||||
branches: allBranches
|
||||
}
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.status(400).json({
|
||||
success: false,
|
||||
reason: e.message,
|
||||
data: {
|
||||
branches: []
|
||||
}
|
||||
});
|
||||
app.post(
|
||||
"/ext/github-repo/branches",
|
||||
[verifyPayloadIntegrity],
|
||||
async function (request, response) {
|
||||
try {
|
||||
const GithubRepoLoader = require("../utils/extensions/GithubRepo/RepoLoader");
|
||||
const allBranches = await new GithubRepoLoader(
|
||||
reqBody(request)
|
||||
).getRepoBranches();
|
||||
response.status(200).json({
|
||||
success: true,
|
||||
reason: null,
|
||||
data: {
|
||||
branches: allBranches,
|
||||
},
|
||||
});
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.status(400).json({
|
||||
success: false,
|
||||
reason: e.message,
|
||||
data: {
|
||||
branches: [],
|
||||
},
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
return;
|
||||
});
|
||||
);
|
||||
|
||||
app.post("/ext/youtube-transcript", [verifyPayloadIntegrity], async function (request, response) {
|
||||
try {
|
||||
const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript");
|
||||
const { success, reason, data } = await loadYouTubeTranscript(reqBody(request));
|
||||
response.status(200).json({ success, reason, data });
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.status(400).json({
|
||||
success: false,
|
||||
reason: e.message,
|
||||
data: {
|
||||
title: null,
|
||||
author: null
|
||||
}
|
||||
});
|
||||
app.post(
|
||||
"/ext/youtube-transcript",
|
||||
[verifyPayloadIntegrity],
|
||||
async function (request, response) {
|
||||
try {
|
||||
const loadYouTubeTranscript = require("../utils/extensions/YoutubeTranscript");
|
||||
const { success, reason, data } = await loadYouTubeTranscript(
|
||||
reqBody(request)
|
||||
);
|
||||
response.status(200).json({ success, reason, data });
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.status(400).json({
|
||||
success: false,
|
||||
reason: e.message,
|
||||
data: {
|
||||
title: null,
|
||||
author: null,
|
||||
},
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
return;
|
||||
});
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/ext/confluence",
|
||||
[verifyPayloadIntegrity],
|
||||
async function (request, response) {
|
||||
try {
|
||||
const loadConfluence = require("../utils/extensions/Confluence");
|
||||
const { success, reason, data } = await loadConfluence(
|
||||
reqBody(request)
|
||||
);
|
||||
response.status(200).json({ success, reason, data });
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.status(400).json({
|
||||
success: false,
|
||||
reason: e.message,
|
||||
data: {
|
||||
title: null,
|
||||
author: null,
|
||||
},
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
module.exports = extensions;
|
||||
|
|
|
@ -49,4 +49,4 @@
|
|||
"nodemon": "^2.0.22",
|
||||
"prettier": "^2.4.1"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
110
collector/utils/extensions/Confluence/index.js
Normal file
110
collector/utils/extensions/Confluence/index.js
Normal file
|
@ -0,0 +1,110 @@
|
|||
const fs = require("fs");
|
||||
const path = require("path");
|
||||
const { default: slugify } = require("slugify");
|
||||
const { v4 } = require("uuid");
|
||||
const { writeToServerDocuments } = require("../../files");
|
||||
const { tokenizeString } = require("../../tokenizer");
|
||||
const {
|
||||
ConfluencePagesLoader,
|
||||
} = require("langchain/document_loaders/web/confluence");
|
||||
|
||||
function validSpaceUrl(spaceUrl = "") {
|
||||
const UrlPattern = require("url-pattern");
|
||||
const pattern = new UrlPattern(
|
||||
"https\\://(:subdomain).atlassian.net/wiki/spaces/(:spaceKey)*"
|
||||
);
|
||||
const match = pattern.match(spaceUrl);
|
||||
if (!match) return { valid: false, result: null };
|
||||
return { valid: true, result: match };
|
||||
}
|
||||
|
||||
async function loadConfluence({ pageUrl, username, accessToken }) {
|
||||
if (!pageUrl || !username || !accessToken) {
|
||||
return {
|
||||
success: false,
|
||||
reason:
|
||||
"You need either a username and access token, or a personal access token (PAT), to use the Confluence connector.",
|
||||
};
|
||||
}
|
||||
|
||||
const validSpace = validSpaceUrl(pageUrl);
|
||||
if (!validSpace.result) {
|
||||
return {
|
||||
success: false,
|
||||
reason:
|
||||
"Confluence space URL is not in the expected format of https://domain.atlassian.net/wiki/space/~SPACEID/*",
|
||||
};
|
||||
}
|
||||
|
||||
const { subdomain, spaceKey } = validSpace.result;
|
||||
console.log(`-- Working Confluence ${subdomain}.atlassian.net --`);
|
||||
const loader = new ConfluencePagesLoader({
|
||||
baseUrl: `https://${subdomain}.atlassian.net/wiki`,
|
||||
spaceKey,
|
||||
username,
|
||||
accessToken,
|
||||
});
|
||||
|
||||
const { docs, error } = await loader
|
||||
.load()
|
||||
.then((docs) => {
|
||||
return { docs, error: null };
|
||||
})
|
||||
.catch((e) => {
|
||||
return {
|
||||
docs: [],
|
||||
error: e.message?.split("Error:")?.[1] || e.message,
|
||||
};
|
||||
});
|
||||
|
||||
if (!docs.length || !!error) {
|
||||
return {
|
||||
success: false,
|
||||
reason: error ?? "No pages found for that Confluence space.",
|
||||
};
|
||||
}
|
||||
const outFolder = slugify(
|
||||
`${subdomain}-confluence-${v4().slice(0, 4)}`
|
||||
).toLowerCase();
|
||||
const outFolderPath = path.resolve(
|
||||
__dirname,
|
||||
`../../../../server/storage/documents/${outFolder}`
|
||||
);
|
||||
fs.mkdirSync(outFolderPath);
|
||||
|
||||
docs.forEach((doc) => {
|
||||
const data = {
|
||||
id: v4(),
|
||||
url: doc.metadata.url + ".page",
|
||||
title: doc.metadata.title || doc.metadata.source,
|
||||
docAuthor: subdomain,
|
||||
description: doc.metadata.title,
|
||||
docSource: `${subdomain} Confluence`,
|
||||
chunkSource: `confluence://${doc.metadata.url}`,
|
||||
published: new Date().toLocaleString(),
|
||||
wordCount: doc.pageContent.split(" ").length,
|
||||
pageContent: doc.pageContent,
|
||||
token_count_estimate: tokenizeString(doc.pageContent).length,
|
||||
};
|
||||
|
||||
console.log(
|
||||
`[Confluence Loader]: Saving ${doc.metadata.title} to ${outFolder}`
|
||||
);
|
||||
writeToServerDocuments(
|
||||
data,
|
||||
`${slugify(doc.metadata.title)}-${data.id}`,
|
||||
outFolderPath
|
||||
);
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
reason: null,
|
||||
data: {
|
||||
spaceKey,
|
||||
destination: outFolder,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = loadConfluence;
|
Binary file not shown.
After ![]() (image error) Size: 5.5 KiB |
|
@ -1,9 +1,11 @@
|
|||
import Github from "./github.svg";
|
||||
import YouTube from "./youtube.svg";
|
||||
import Confluence from "./confluence.jpeg";
|
||||
|
||||
const ConnectorImages = {
|
||||
github: Github,
|
||||
youtube: YouTube,
|
||||
confluence: Confluence,
|
||||
};
|
||||
|
||||
export default ConnectorImages;
|
||||
|
|
|
@ -0,0 +1,164 @@
|
|||
import { useState } from "react";
|
||||
import System from "@/models/system";
|
||||
import showToast from "@/utils/toast";
|
||||
import { Warning } from "@phosphor-icons/react";
|
||||
import { Tooltip } from "react-tooltip";
|
||||
|
||||
export default function ConfluenceOptions() {
|
||||
const [loading, setLoading] = useState(false);
|
||||
|
||||
const handleSubmit = async (e) => {
|
||||
e.preventDefault();
|
||||
const form = new FormData(e.target);
|
||||
|
||||
try {
|
||||
setLoading(true);
|
||||
showToast(
|
||||
"Fetching all pages for Confluence space - this may take a while.",
|
||||
"info",
|
||||
{
|
||||
clear: true,
|
||||
autoClose: false,
|
||||
}
|
||||
);
|
||||
const { data, error } = await System.dataConnectors.confluence.collect({
|
||||
pageUrl: form.get("pageUrl"),
|
||||
username: form.get("username"),
|
||||
accessToken: form.get("accessToken"),
|
||||
});
|
||||
|
||||
if (!!error) {
|
||||
showToast(error, "error", { clear: true });
|
||||
setLoading(false);
|
||||
return;
|
||||
}
|
||||
|
||||
showToast(
|
||||
`Pages collected from Confluence space ${data.spaceKey}. Output folder is ${data.destination}.`,
|
||||
"success",
|
||||
{ clear: true }
|
||||
);
|
||||
e.target.reset();
|
||||
setLoading(false);
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
showToast(e.message, "error", { clear: true });
|
||||
setLoading(false);
|
||||
}
|
||||
};
|
||||
|
||||
return (
|
||||
<div className="flex w-full">
|
||||
<div className="flex flex-col w-full px-1 md:pb-6 pb-16">
|
||||
<form className="w-full" onSubmit={handleSubmit}>
|
||||
<div className="w-full flex flex-col py-2">
|
||||
<div className="w-full flex flex-col gap-4">
|
||||
<div className="flex flex-col pr-10">
|
||||
<div className="flex flex-col gap-y-1 mb-4">
|
||||
<label className="text-white text-sm font-bold flex gap-x-2 items-center">
|
||||
<p className="font-bold text-white">Confluence Page URL</p>
|
||||
</label>
|
||||
<p className="text-xs font-normal text-white/50">
|
||||
URL of a page in the Confluence space.
|
||||
</p>
|
||||
</div>
|
||||
<input
|
||||
type="url"
|
||||
name="pageUrl"
|
||||
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||
placeholder="https://example.atlassian.net/wiki/spaces/~7120208c08555d52224113949698b933a3bb56/pages/851969/Test+anythingLLM+page"
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-col pr-10">
|
||||
<div className="flex flex-col gap-y-1 mb-4">
|
||||
<label className="text-white text-sm font-bold">
|
||||
Confluence Username
|
||||
</label>
|
||||
<p className="text-xs font-normal text-white/50">
|
||||
Your Confluence username.
|
||||
</p>
|
||||
</div>
|
||||
<input
|
||||
type="email"
|
||||
name="username"
|
||||
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||
placeholder="jdoe@example.com"
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
<div className="flex flex-col pr-10">
|
||||
<div className="flex flex-col gap-y-1 mb-4">
|
||||
<label className="text-white text-sm font-bold flex gap-x-2 items-center">
|
||||
<p className="font-bold text-white">
|
||||
Confluence Access Token
|
||||
</p>
|
||||
<Warning
|
||||
size={14}
|
||||
className="ml-1 text-orange-500 cursor-pointer"
|
||||
data-tooltip-id="access-token-tooltip"
|
||||
data-tooltip-place="right"
|
||||
/>
|
||||
<Tooltip
|
||||
delayHide={300}
|
||||
id="access-token-tooltip"
|
||||
className="max-w-xs"
|
||||
clickable={true}
|
||||
>
|
||||
<p className="text-sm">
|
||||
You need to provide an access token for authentication.
|
||||
You can generate an access token{" "}
|
||||
<a
|
||||
href="https://id.atlassian.com/manage-profile/security/api-tokens"
|
||||
target="_blank"
|
||||
rel="noopener noreferrer"
|
||||
className="underline"
|
||||
onClick={(e) => e.stopPropagation()}
|
||||
>
|
||||
here
|
||||
</a>
|
||||
.
|
||||
</p>
|
||||
</Tooltip>
|
||||
</label>
|
||||
<p className="text-xs font-normal text-white/50">
|
||||
Access token for authentication.
|
||||
</p>
|
||||
</div>
|
||||
<input
|
||||
type="password"
|
||||
name="accessToken"
|
||||
className="bg-zinc-900 text-white placeholder:text-white/20 text-sm rounded-lg focus:border-white block w-full p-2.5"
|
||||
placeholder="abcd1234"
|
||||
required={true}
|
||||
autoComplete="off"
|
||||
spellCheck={false}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="flex flex-col gap-y-2 w-full pr-10">
|
||||
<button
|
||||
type="submit"
|
||||
disabled={loading}
|
||||
className="mt-2 w-full justify-center border border-slate-200 px-4 py-2 rounded-lg text-[#222628] text-sm font-bold items-center flex gap-x-2 bg-slate-200 hover:bg-slate-300 hover:text-slate-800 disabled:bg-slate-300 disabled:cursor-not-allowed"
|
||||
>
|
||||
{loading ? "Collecting pages..." : "Submit"}
|
||||
</button>
|
||||
{loading && (
|
||||
<p className="text-xs text-white/50">
|
||||
Once complete, all pages will be available for embedding into
|
||||
workspaces.
|
||||
</p>
|
||||
)}
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
|
@ -2,6 +2,7 @@ import ConnectorImages from "@/components/DataConnectorOption/media";
|
|||
import { MagnifyingGlass } from "@phosphor-icons/react";
|
||||
import GithubOptions from "./Connectors/Github";
|
||||
import YoutubeOptions from "./Connectors/Youtube";
|
||||
import ConfluenceOptions from "./Connectors/Confluence";
|
||||
import { useState } from "react";
|
||||
import ConnectorOption from "./ConnectorOption";
|
||||
|
||||
|
@ -20,6 +21,12 @@ export const DATA_CONNECTORS = {
|
|||
"Import the transcription of an entire YouTube video from a link.",
|
||||
options: <YoutubeOptions />,
|
||||
},
|
||||
confluence: {
|
||||
name: "Confluence",
|
||||
image: ConnectorImages.confluence,
|
||||
description: "Import an entire Confluence page in a single click.",
|
||||
options: <ConfluenceOptions />,
|
||||
},
|
||||
};
|
||||
|
||||
export default function DataConnectors() {
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
import { memo, useState } from "react";
|
||||
import React, { memo, useState } from "react";
|
||||
import { v4 } from "uuid";
|
||||
import { decode as HTMLDecode } from "he";
|
||||
import truncate from "truncate";
|
||||
|
@ -14,6 +14,7 @@ import {
|
|||
X,
|
||||
YoutubeLogo,
|
||||
} from "@phosphor-icons/react";
|
||||
import ConfluenceLogo from "@/media/dataConnectors/confluence.png";
|
||||
import { Tooltip } from "react-tooltip";
|
||||
import { toPercentString } from "@/utils/numbers";
|
||||
|
||||
|
@ -202,13 +203,6 @@ function CitationDetailModal({ source, onClose }) {
|
|||
);
|
||||
}
|
||||
|
||||
const ICONS = {
|
||||
file: FileText,
|
||||
link: Link,
|
||||
youtube: YoutubeLogo,
|
||||
github: GithubLogo,
|
||||
};
|
||||
|
||||
// Show the correct title and/or display text for citations
|
||||
// which contain valid outbound links that can be clicked by the
|
||||
// user when viewing a citation. Optionally allows various icons
|
||||
|
@ -221,10 +215,17 @@ function parseChunkSource({ title = "", chunks = [] }) {
|
|||
icon: "file",
|
||||
};
|
||||
|
||||
if (!chunks.length || !chunks[0].chunkSource.startsWith("link://"))
|
||||
if (
|
||||
!chunks.length ||
|
||||
(!chunks[0].chunkSource.startsWith("link://") &&
|
||||
!chunks[0].chunkSource.startsWith("confluence://"))
|
||||
)
|
||||
return nullResponse;
|
||||
try {
|
||||
const url = new URL(chunks[0].chunkSource.split("link://")[1]);
|
||||
const url = new URL(
|
||||
chunks[0].chunkSource.split("link://")[1] ||
|
||||
chunks[0].chunkSource.split("confluence://")[1]
|
||||
);
|
||||
let text = url.host + url.pathname;
|
||||
let icon = "link";
|
||||
|
||||
|
@ -238,6 +239,11 @@ function parseChunkSource({ title = "", chunks = [] }) {
|
|||
icon = "github";
|
||||
}
|
||||
|
||||
if (url.host.includes("atlassian.net")) {
|
||||
text = title;
|
||||
icon = "confluence";
|
||||
}
|
||||
|
||||
return {
|
||||
isUrl: true,
|
||||
href: url.toString(),
|
||||
|
@ -247,3 +253,16 @@ function parseChunkSource({ title = "", chunks = [] }) {
|
|||
} catch {}
|
||||
return nullResponse;
|
||||
}
|
||||
|
||||
// Patch to render Confluence icon as a element like we do with Phosphor
|
||||
const ConfluenceIcon = ({ ...props }) => (
|
||||
<img src={ConfluenceLogo} {...props} />
|
||||
);
|
||||
|
||||
const ICONS = {
|
||||
file: FileText,
|
||||
link: Link,
|
||||
youtube: YoutubeLogo,
|
||||
github: GithubLogo,
|
||||
confluence: ConfluenceIcon,
|
||||
};
|
||||
|
|
BIN
frontend/src/media/dataConnectors/confluence.png
Normal file
BIN
frontend/src/media/dataConnectors/confluence.png
Normal file
Binary file not shown.
After ![]() (image error) Size: 9.4 KiB |
|
@ -60,6 +60,29 @@ const DataConnector = {
|
|||
});
|
||||
},
|
||||
},
|
||||
|
||||
confluence: {
|
||||
collect: async function ({ pageUrl, username, accessToken }) {
|
||||
return await fetch(`${API_BASE}/ext/confluence`, {
|
||||
method: "POST",
|
||||
headers: baseHeaders(),
|
||||
body: JSON.stringify({
|
||||
pageUrl,
|
||||
username,
|
||||
accessToken,
|
||||
}),
|
||||
})
|
||||
.then((res) => res.json())
|
||||
.then((res) => {
|
||||
if (!res.success) throw new Error(res.reason);
|
||||
return { data: res.data, error: null };
|
||||
})
|
||||
.catch((e) => {
|
||||
console.error(e);
|
||||
return { data: null, error: e.message };
|
||||
});
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
export default DataConnector;
|
||||
|
|
|
@ -71,6 +71,28 @@ function extensionEndpoints(app) {
|
|||
}
|
||||
}
|
||||
);
|
||||
|
||||
app.post(
|
||||
"/ext/confluence",
|
||||
[validatedRequest, flexUserRoleValid([ROLES.admin, ROLES.manager])],
|
||||
async (request, response) => {
|
||||
try {
|
||||
const responseFromProcessor =
|
||||
await new CollectorApi().forwardExtensionRequest({
|
||||
endpoint: "/ext/confluence",
|
||||
method: "POST",
|
||||
body: request.body,
|
||||
});
|
||||
await Telemetry.sendTelemetry("extension_invoked", {
|
||||
type: "confluence",
|
||||
});
|
||||
response.status(200).json(responseFromProcessor);
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
response.sendStatus(500).end();
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
module.exports = { extensionEndpoints };
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue