mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-03-13 05:32:24 +00:00
patches and fallbacks
This commit is contained in:
parent
f2b532c64d
commit
4063ad9d5d
3 changed files with 9 additions and 3 deletions
collector/processLink/convert
frontend/src/pages/Admin/AgentBuilder/BlockList
server/utils/agentFlows/executors
|
@ -83,7 +83,7 @@ async function getPageContent(link, captureAs = "text") {
|
|||
async evaluate(page, browser) {
|
||||
const result = await page.evaluate((captureAs) => {
|
||||
if (captureAs === "text") return document.body.innerText;
|
||||
if (captureAs === "html") return document.documentElement.outerHTML;
|
||||
if (captureAs === "html") return document.documentElement.innerHTML;
|
||||
return document.body.innerText;
|
||||
}, captureAs);
|
||||
await browser.close();
|
||||
|
|
|
@ -127,6 +127,7 @@ const BLOCK_INFO = {
|
|||
defaultConfig: {
|
||||
url: "",
|
||||
captureAs: "text",
|
||||
querySelector: "",
|
||||
resultVariable: "",
|
||||
},
|
||||
getSummary: (config) => config.url || "No URL specified",
|
||||
|
|
|
@ -62,11 +62,16 @@ async function executeWebScraping(config, context) {
|
|||
/**
|
||||
* Parse HTML with a CSS selector
|
||||
* @param {string} html - The HTML to parse
|
||||
* @param {string} selector - The CSS selector to use (as text string)
|
||||
* @param {string|null} selector - The CSS selector to use (as text string)
|
||||
* @param {{introspect: Function}} context - The context object
|
||||
* @returns {Object} The parsed content
|
||||
*/
|
||||
function parseHTMLwithSelector(html, selector, context) {
|
||||
function parseHTMLwithSelector(html, selector = null, context) {
|
||||
if (!selector || selector.length === 0) {
|
||||
context.introspect("No selector provided. Returning the entire HTML.");
|
||||
return { success: true, content: html };
|
||||
}
|
||||
|
||||
const Cheerio = require("cheerio");
|
||||
const $ = Cheerio.load(html);
|
||||
const selectedElements = $(selector);
|
||||
|
|
Loading…
Add table
Reference in a new issue