Limit the number of urls the webscraper can extract for scraping

This commit is contained in:
sabaimran 2024-11-01 16:48:36 -07:00
parent 327fcb8f62
commit a213b593e8

View file

@ -54,6 +54,7 @@ OLOSTEP_QUERY_PARAMS = {
}
DEFAULT_MAX_WEBPAGES_TO_READ = 1
MAX_WEBPAGES_TO_INFER = 10
async def search_online(
@ -157,6 +158,7 @@ async def read_webpages(
query_images: List[str] = None,
agent: Agent = None,
tracer: dict = {},
max_webpages_to_read: int = DEFAULT_MAX_WEBPAGES_TO_READ,
):
"Infer web pages to read from the query and extract relevant information from them"
logger.info(f"Inferring web pages to read")
@ -165,6 +167,9 @@ async def read_webpages(
yield {ChatEvent.STATUS: event}
urls = await infer_webpage_urls(query, conversation_history, location, user, query_images)
# Get the top 10 web pages to read
urls = urls[:max_webpages_to_read]
logger.info(f"Reading web pages at: {urls}")
if send_status_func:
webpage_links_str = "\n- " + "\n- ".join(list(urls))