mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-30 19:03:01 +01:00
Store Github URL of Markdown files on Github in file jsonl param
This commit is contained in:
parent
0c1c7583b5
commit
63ec84ad78
1 changed files with 6 additions and 1 deletions
|
@ -87,11 +87,16 @@ class GithubToJsonl(TextToJsonl):
|
||||||
for item in contents["tree"]:
|
for item in contents["tree"]:
|
||||||
# Find all markdown files in the repository
|
# Find all markdown files in the repository
|
||||||
if item["type"] == "blob" and item["path"].endswith(".md"):
|
if item["type"] == "blob" and item["path"].endswith(".md"):
|
||||||
|
# Create URL for each markdown file on Github
|
||||||
|
url_path = f'https://github.com/{self.config.repo_owner}/{self.config.repo_name}/blob/{self.config.repo_branch}/{item["path"]}'
|
||||||
|
|
||||||
# Get text from each markdown file
|
# Get text from each markdown file
|
||||||
file_content_url = f'{self.repo_url}/contents/{item["path"]}'
|
file_content_url = f'{self.repo_url}/contents/{item["path"]}'
|
||||||
headers["Accept"] = "application/vnd.github.v3.raw"
|
headers["Accept"] = "application/vnd.github.v3.raw"
|
||||||
markdown_file_contents = requests.get(file_content_url, headers=headers).content.decode("utf-8")
|
markdown_file_contents = requests.get(file_content_url, headers=headers).content.decode("utf-8")
|
||||||
markdown_files += [{"content": markdown_file_contents, "path": item["path"]}]
|
|
||||||
|
# Add markdown file contents and URL to list
|
||||||
|
markdown_files += [{"content": markdown_file_contents, "path": url_path}]
|
||||||
|
|
||||||
return markdown_files
|
return markdown_files
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue