mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-28 01:45:07 +01:00
Store Github URL of Markdown files on Github in file jsonl param
This commit is contained in:
parent
0c1c7583b5
commit
63ec84ad78
1 changed files with 6 additions and 1 deletions
|
@ -87,11 +87,16 @@ class GithubToJsonl(TextToJsonl):
|
|||
for item in contents["tree"]:
|
||||
# Find all markdown files in the repository
|
||||
if item["type"] == "blob" and item["path"].endswith(".md"):
|
||||
# Create URL for each markdown file on Github
|
||||
url_path = f'https://github.com/{self.config.repo_owner}/{self.config.repo_name}/blob/{self.config.repo_branch}/{item["path"]}'
|
||||
|
||||
# Get text from each markdown file
|
||||
file_content_url = f'{self.repo_url}/contents/{item["path"]}'
|
||||
headers["Accept"] = "application/vnd.github.v3.raw"
|
||||
markdown_file_contents = requests.get(file_content_url, headers=headers).content.decode("utf-8")
|
||||
markdown_files += [{"content": markdown_file_contents, "path": item["path"]}]
|
||||
|
||||
# Add markdown file contents and URL to list
|
||||
markdown_files += [{"content": markdown_file_contents, "path": url_path}]
|
||||
|
||||
return markdown_files
|
||||
|
||||
|
|
Loading…
Reference in a new issue