mirror of
https://github.com/khoj-ai/khoj.git
synced 2025-02-17 16:14:21 +00:00
Fix passing auth token to Github API to increase rate limits by x85
- Previously wasn't prefixing "token" to PAT token in Auth header This resulted in the request being considered unauthenticated - Unauthenticated requests to Github API are limited to 60 requests/hour Authenticated requests to Github API are allowed 5000 requests/hour
This commit is contained in:
parent
9c70af960c
commit
87975e589a
1 changed files with 5 additions and 7 deletions
|
@ -77,12 +77,9 @@ class GithubToJsonl(TextToJsonl):
|
|||
return entries_with_ids
|
||||
|
||||
def get_markdown_files(self):
|
||||
# set the url to get the contents of the repository
|
||||
# Get the contents of the repository
|
||||
repo_content_url = f"{self.repo_url}/git/trees/{self.config.repo_branch}"
|
||||
# set the headers to include the authentication token
|
||||
headers = {"Authorization": f"{self.config.pat_token}"}
|
||||
|
||||
# get the contents of the repository
|
||||
headers = {"Authorization": f"token {self.config.pat_token}"}
|
||||
response = requests.get(repo_content_url, headers=headers)
|
||||
contents = response.json()
|
||||
|
||||
|
@ -91,6 +88,7 @@ class GithubToJsonl(TextToJsonl):
|
|||
if result is not None:
|
||||
return result
|
||||
|
||||
# Extract markdown files from the repository
|
||||
markdown_files = []
|
||||
for item in contents["tree"]:
|
||||
# Find all markdown files in the repository
|
||||
|
@ -105,7 +103,7 @@ class GithubToJsonl(TextToJsonl):
|
|||
|
||||
def get_file_contents(self, file_url):
|
||||
# Get text from each markdown file
|
||||
headers = {"Authorization": f"{self.config.pat_token}", "Accept": "application/vnd.github.v3.raw"}
|
||||
headers = {"Authorization": f"token {self.config.pat_token}", "Accept": "application/vnd.github.v3.raw"}
|
||||
response = requests.get(file_url, headers=headers)
|
||||
|
||||
# Wait for rate limit reset if needed
|
||||
|
@ -117,8 +115,8 @@ class GithubToJsonl(TextToJsonl):
|
|||
|
||||
def get_commits(self) -> List[Dict]:
|
||||
# Get commit messages from the repository using the Github API
|
||||
headers = {"Authorization": f"{self.config.pat_token}"}
|
||||
commits_url = f"{self.repo_url}/commits"
|
||||
headers = {"Authorization": f"token {self.config.pat_token}"}
|
||||
commits = []
|
||||
|
||||
while commits_url is not None:
|
||||
|
|
Loading…
Add table
Reference in a new issue