patches and fallbacks

This commit is contained in:
timothycarambat 2025-02-12 17:46:18 -08:00
parent f2b532c64d
commit 4063ad9d5d
3 changed files with 9 additions and 3 deletions
collector/processLink/convert
frontend/src/pages/Admin/AgentBuilder/BlockList
server/utils/agentFlows/executors

View file

@ -83,7 +83,7 @@ async function getPageContent(link, captureAs = "text") {
async evaluate(page, browser) {
const result = await page.evaluate((captureAs) => {
if (captureAs === "text") return document.body.innerText;
if (captureAs === "html") return document.documentElement.outerHTML;
if (captureAs === "html") return document.documentElement.innerHTML;
return document.body.innerText;
}, captureAs);
await browser.close();

View file

@ -127,6 +127,7 @@ const BLOCK_INFO = {
defaultConfig: {
url: "",
captureAs: "text",
querySelector: "",
resultVariable: "",
},
getSummary: (config) => config.url || "No URL specified",

View file

@ -62,11 +62,16 @@ async function executeWebScraping(config, context) {
/**
* Parse HTML with a CSS selector
* @param {string} html - The HTML to parse
* @param {string} selector - The CSS selector to use (as text string)
* @param {string|null} selector - The CSS selector to use (as text string)
* @param {{introspect: Function}} context - The context object
* @returns {Object} The parsed content
*/
function parseHTMLwithSelector(html, selector, context) {
function parseHTMLwithSelector(html, selector = null, context) {
if (!selector || selector.length === 0) {
context.introspect("No selector provided. Returning the entire HTML.");
return { success: true, content: html };
}
const Cheerio = require("cheerio");
const $ = Cheerio.load(html);
const selectedElements = $(selector);