mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-03-13 05:32:24 +00:00
* Add `querySelectorAll` capability to web-scraping block * patches and fallbacks * fix styles of text in web scraping block --------- Co-authored-by: shatfield4 <seanhatfield5@gmail.com>
23 lines
788 B
JavaScript
23 lines
788 B
JavaScript
const { validURL } = require("../utils/url");
|
|
const { scrapeGenericUrl } = require("./convert/generic");
|
|
|
|
async function processLink(link) {
|
|
if (!validURL(link)) return { success: false, reason: "Not a valid URL." };
|
|
return await scrapeGenericUrl(link);
|
|
}
|
|
|
|
/**
|
|
* Get the text content of a link
|
|
* @param {string} link - The link to get the text content of
|
|
* @param {('html' | 'text' | 'json')} captureAs - The format to capture the page content as
|
|
* @returns {Promise<{success: boolean, content: string}>} - Response from collector
|
|
*/
|
|
async function getLinkText(link, captureAs = "text") {
|
|
if (!validURL(link)) return { success: false, reason: "Not a valid URL." };
|
|
return await scrapeGenericUrl(link, captureAs, false);
|
|
}
|
|
|
|
module.exports = {
|
|
processLink,
|
|
getLinkText,
|
|
};
|