mirror of
https://github.com/khoj-ai/khoj.git
synced 2024-11-24 07:55:07 +01:00
Set page size to 100 to reduce requests required to Github API to 1/3
- Default is 30. So number of paginated requests required to get all items (commits, files) will reduce by 67% - No need to increase page size for the get tree Github API request from `get_markdown_files' Get tree Github API doesn't support pagination and return 100K items in response. This should be way more than enough for our current use-cases
This commit is contained in:
parent
87975e589a
commit
6fdac24416
1 changed files with 2 additions and 1 deletions
|
@ -117,11 +117,12 @@ class GithubToJsonl(TextToJsonl):
|
||||||
# Get commit messages from the repository using the Github API
|
# Get commit messages from the repository using the Github API
|
||||||
commits_url = f"{self.repo_url}/commits"
|
commits_url = f"{self.repo_url}/commits"
|
||||||
headers = {"Authorization": f"token {self.config.pat_token}"}
|
headers = {"Authorization": f"token {self.config.pat_token}"}
|
||||||
|
params = {"per_page": 100}
|
||||||
commits = []
|
commits = []
|
||||||
|
|
||||||
while commits_url is not None:
|
while commits_url is not None:
|
||||||
# Get the next page of commits
|
# Get the next page of commits
|
||||||
response = requests.get(commits_url, headers=headers)
|
response = requests.get(commits_url, headers=headers, params=params)
|
||||||
raw_commits = response.json()
|
raw_commits = response.json()
|
||||||
|
|
||||||
# Wait for rate limit reset if needed
|
# Wait for rate limit reset if needed
|
||||||
|
|
Loading…
Reference in a new issue