From 2a7e4f2b7181c85f996c81ea44143cca9ac9182c Mon Sep 17 00:00:00 2001 From: sabaimran Date: Sun, 2 Jul 2023 09:13:28 -0700 Subject: [PATCH] Escape special characters in the URL when adding a link to the remote file --- src/khoj/processor/markdown/markdown_to_jsonl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/khoj/processor/markdown/markdown_to_jsonl.py b/src/khoj/processor/markdown/markdown_to_jsonl.py index e7eae240..21cbda72 100644 --- a/src/khoj/processor/markdown/markdown_to_jsonl.py +++ b/src/khoj/processor/markdown/markdown_to_jsonl.py @@ -2,6 +2,7 @@ import glob import logging import re +import urllib3 from pathlib import Path from typing import List @@ -145,7 +146,8 @@ class MarkdownToJsonl(TextToJsonl): # Check if raw_filename is a URL. If so, save it as is. If not, convert it to a Path. if type(raw_filename) == str and re.search(r"^https?://", raw_filename): - entry_filename = raw_filename + # Escape the URL to avoid issues with special characters + entry_filename = urllib3.util.parse_url(raw_filename).url else: entry_filename = str(Path(raw_filename)) stem = Path(raw_filename).stem