mirror of
https://github.com/Mintplex-Labs/anything-llm.git
synced 2025-04-17 18:18:11 +00:00
Handle non-ascii characters in single and bulk link scraper URLs (#2495)
handle non-ascii characters in urls
This commit is contained in:
parent
93d7ce6d34
commit
41522cdfb4
2 changed files with 4 additions and 2 deletions
collector
|
@ -27,7 +27,8 @@ async function scrapeGenericUrl(link, textOnly = false) {
|
|||
}
|
||||
|
||||
const url = new URL(link);
|
||||
const filename = (url.host + "-" + url.pathname).replace(".", "_");
|
||||
const decodedPathname = decodeURIComponent(url.pathname);
|
||||
const filename = `${url.hostname}${decodedPathname.replace(/\//g, '_')}`;
|
||||
|
||||
const data = {
|
||||
id: v4(),
|
||||
|
|
|
@ -108,7 +108,8 @@ async function bulkScrapePages(links, outFolderPath) {
|
|||
}
|
||||
|
||||
const url = new URL(link);
|
||||
const filename = (url.host + "-" + url.pathname).replace(".", "_");
|
||||
const decodedPathname = decodeURIComponent(url.pathname);
|
||||
const filename = `${url.hostname}${decodedPathname.replace(/\//g, '_')}`;
|
||||
|
||||
const data = {
|
||||
id: v4(),
|
||||
|
|
Loading…
Add table
Reference in a new issue