mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-27 17:35:07 +01:00
Extract logic to get file content from Github into a separate method
This commit is contained in:
parent
10d4c38ce9
commit
9c70af960c
1 changed files with 13 additions and 6 deletions
|
@ -98,16 +98,23 @@ class GithubToJsonl(TextToJsonl):
|
|||
# Create URL for each markdown file on Github
|
||||
url_path = f'https://github.com/{self.config.repo_owner}/{self.config.repo_name}/blob/{self.config.repo_branch}/{item["path"]}'
|
||||
|
||||
# Get text from each markdown file
|
||||
file_content_url = f'{self.repo_url}/contents/{item["path"]}'
|
||||
headers["Accept"] = "application/vnd.github.v3.raw"
|
||||
markdown_file_contents = requests.get(file_content_url, headers=headers).content.decode("utf-8")
|
||||
|
||||
# Add markdown file contents and URL to list
|
||||
markdown_files += [{"content": markdown_file_contents, "path": url_path}]
|
||||
markdown_files += [{"content": self.get_file_contents(item["url"]), "path": url_path}]
|
||||
|
||||
return markdown_files
|
||||
|
||||
def get_file_contents(self, file_url):
|
||||
# Get text from each markdown file
|
||||
headers = {"Authorization": f"{self.config.pat_token}", "Accept": "application/vnd.github.v3.raw"}
|
||||
response = requests.get(file_url, headers=headers)
|
||||
|
||||
# Wait for rate limit reset if needed
|
||||
result = self.wait_for_rate_limit_reset(response, self.get_file_contents, file_url)
|
||||
if result is not None:
|
||||
return result
|
||||
|
||||
return response.content.decode("utf-8")
|
||||
|
||||
def get_commits(self) -> List[Dict]:
|
||||
# Get commit messages from the repository using the Github API
|
||||
headers = {"Authorization": f"{self.config.pat_token}"}
|
||||
|
|
Loading…
Reference in a new issue