Escape special characters in the URL when adding a link to the remote file

This commit is contained in:
sabaimran 2023-07-02 09:13:28 -07:00
parent c747562897
commit 2a7e4f2b71

View file

@ -2,6 +2,7 @@
import glob import glob
import logging import logging
import re import re
import urllib3
from pathlib import Path from pathlib import Path
from typing import List from typing import List
@ -145,7 +146,8 @@ class MarkdownToJsonl(TextToJsonl):
# Check if raw_filename is a URL. If so, save it as is. If not, convert it to a Path. # Check if raw_filename is a URL. If so, save it as is. If not, convert it to a Path.
if type(raw_filename) == str and re.search(r"^https?://", raw_filename): if type(raw_filename) == str and re.search(r"^https?://", raw_filename):
entry_filename = raw_filename # Escape the URL to avoid issues with special characters
entry_filename = urllib3.util.parse_url(raw_filename).url
else: else:
entry_filename = str(Path(raw_filename)) entry_filename = str(Path(raw_filename))
stem = Path(raw_filename).stem stem = Path(raw_filename).stem