Skip to content

Commit 98ee71e

Browse files
authored
GH-33241: [Archery] Replace github3 with pygithub (#48886)
### Rationale for this change Archery currently uses both pygithub and github3. It's unnecessary and increases maintenance burden. ### What changes are included in this PR? Replace all github3 usage with pygithub. ### Are these changes tested? Yes, unittests added. ### Are there any user-facing changes? No. * GitHub Issue: #33241 Authored-by: Fangchen Li <fangchen.li@outlook.com> Signed-off-by: Raúl Cumplido <raulcumplido@gmail.com>
1 parent 2aaac07 commit 98ee71e

7 files changed

Lines changed: 379 additions & 103 deletions

File tree

ci/conda_env_archery.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
click
2020

2121
# bot, crossbow
22-
github3.py
2322
jinja2
2423
jira
2524
pygit2

ci/conda_env_crossbow.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,10 @@
1616
# under the License.
1717

1818
click
19-
github3.py
2019
jinja2
2120
jira
2221
pygit2
22+
pygithub
2323
ruamel.yaml
2424
setuptools_scm
2525
toolz

dev/archery/README.md

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,6 @@ to use the functionality of it:
4141
To install: `pip install -e "arrow/dev/archery[release]"`
4242
* crossbow – to trigger + interact with the crossbow build system
4343
To install: `pip install -e "arrow/dev/archery[crossbow]"`
44-
* crossbow-upload
45-
To install: `pip install -e "arrow/dev/archery[crossbow-upload]"`
4644

4745
Additionally, if you would prefer to install everything at once,
4846
`pip install -e "arrow/dev/archery[all]"` is an alias for all of

dev/archery/archery/crossbow/cli.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -531,13 +531,13 @@ def need_download():
531531
return False
532532

533533
if need_download():
534-
import github3
534+
from github import GithubException
535535
max_n_retries = 5
536536
n_retries = 0
537537
while True:
538538
try:
539-
asset.download(path)
540-
except github3.exceptions.GitHubException as error:
539+
asset.download_asset(str(path))
540+
except GithubException as error:
541541
n_retries += 1
542542
if n_retries == max_n_retries:
543543
raise
@@ -565,12 +565,11 @@ def need_download():
565565
@click.argument('patterns', nargs=-1, required=True)
566566
@click.option('--sha', required=True, help='Target committish')
567567
@click.option('--tag', required=True, help='Target tag')
568-
@click.option('--method', default='curl', help='Use cURL to upload')
569568
@click.pass_obj
570-
def upload_artifacts(obj, tag, sha, patterns, method):
569+
def upload_artifacts(obj, tag, sha, patterns):
571570
queue = obj['queue']
572571
queue.github_overwrite_release_assets(
573-
tag_name=tag, target_commitish=sha, method=method, patterns=patterns
572+
tag_name=tag, target_commitish=sha, patterns=patterns
574573
)
575574

576575

dev/archery/archery/crossbow/core.py

Lines changed: 56 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
import time
2323
import logging
2424
import mimetypes
25-
import subprocess
2625
import textwrap
2726
import uuid
2827
from io import StringIO
@@ -34,11 +33,11 @@
3433
from ruamel.yaml import YAML
3534

3635
try:
37-
import github3
38-
_have_github3 = True
36+
from github import Github, GithubException
37+
from github import Auth as GithubAuth
38+
_have_github = True
3939
except ImportError:
40-
github3 = object
41-
_have_github3 = False
40+
_have_github = False
4241

4342
try:
4443
import pygit2
@@ -52,7 +51,7 @@
5251
from ..utils.source import ArrowSources
5352

5453

55-
for pkg in ["requests", "urllib3", "github3"]:
54+
for pkg in ["requests", "urllib3", "github"]:
5655
logging.getLogger(pkg).setLevel(logging.WARNING)
5756

5857
logger = logging.getLogger("crossbow")
@@ -448,110 +447,85 @@ def file_contents(self, commit_id, file):
448447
blob = self.repo[entry.id]
449448
return blob.data
450449

451-
def _github_login(self, github_token):
452-
"""Returns a logged in github3.GitHub instance"""
453-
if not _have_github3:
454-
raise ImportError('Must install github3.py')
450+
def _github_login(self, github_token=None):
451+
"""Returns a logged in Github instance using PyGithub"""
452+
if not _have_github:
453+
raise ImportError('Must install PyGithub')
455454
github_token = github_token or self.github_token
456-
session = github3.session.GitHubSession(
457-
default_connect_timeout=10,
458-
default_read_timeout=30
459-
)
460-
github = github3.GitHub(session=session)
461-
github.login(token=github_token)
462-
return github
455+
return Github(auth=GithubAuth.Token(github_token), timeout=30)
463456

464457
def as_github_repo(self, github_token=None):
465458
"""Converts it to a repository object which wraps the GitHub API"""
466459
if self._github_repo is None:
467460
github = self._github_login(github_token)
468461
username, reponame = _parse_github_user_repo(self.remote_url)
469-
self._github_repo = github.repository(username, reponame)
462+
self._github_repo = github.get_repo(f"{username}/{reponame}")
470463
return self._github_repo
471464

472465
def token_expiration_date(self, github_token=None):
473466
"""Returns the expiration date for the github_token provided"""
474467
github = self._github_login(github_token)
475-
# github3 hides the headers from us. Use the _get method
476-
# to access the response headers.
477-
resp = github._get(github.session.base_url)
478-
# Response in the form '2023-01-23 10:40:28 UTC'
479-
date_string = resp.headers.get(
480-
'github-authentication-token-expiration')
468+
# PyGithub doesn't expose the token expiration header through a
469+
# dedicated API, so request it via the public Requester escape hatch.
470+
headers, _ = github.requester.requestJsonAndCheck("GET", "/user")
471+
# Response header in the form '2023-01-23 10:40:28 UTC'
472+
date_string = headers.get('github-authentication-token-expiration')
481473
if date_string:
482474
return date.fromisoformat(date_string.split()[0])
475+
return None
483476

484477
def github_commit(self, sha):
485478
repo = self.as_github_repo()
486-
return repo.commit(sha)
479+
return repo.get_commit(sha)
487480

488481
def github_release(self, tag):
489482
repo = self.as_github_repo()
490483
try:
491-
return repo.release_from_tag(tag)
492-
except github3.exceptions.NotFoundError:
493-
return None
494-
495-
def github_upload_asset_requests(self, release, path, name, mime,
496-
max_retries=None, retry_backoff=None):
484+
return repo.get_release(tag)
485+
except GithubException as e:
486+
if e.status == 404:
487+
return None
488+
raise
489+
490+
def github_upload_asset(self, release, path, name, mime,
491+
max_retries=None, retry_backoff=None):
497492
if max_retries is None:
498493
max_retries = int(os.environ.get('CROSSBOW_MAX_RETRIES', 8))
499494
if retry_backoff is None:
500495
retry_backoff = int(os.environ.get('CROSSBOW_RETRY_BACKOFF', 5))
501496

502497
for i in range(max_retries):
503498
try:
504-
with open(path, 'rb') as fp:
505-
result = release.upload_asset(name=name, asset=fp,
506-
content_type=mime)
507-
except github3.exceptions.ResponseError as e:
499+
result = release.upload_asset(path, name=name,
500+
content_type=mime)
501+
logger.info(f"Attempt {i + 1} has finished.")
502+
return result
503+
except GithubException as e:
508504
logger.error(f"Attempt {i + 1} has failed with message: {e}.")
509-
logger.error(f"Error message {e.msg}")
510-
logger.error("List of errors provided by GitHub:")
511-
for err in e.errors:
512-
logger.error(f" - {err}")
505+
if hasattr(e, 'data'):
506+
logger.error(f"Error data: {e.data}")
513507

514-
if e.code == 422:
508+
if e.status == 422:
515509
# 422 Validation Failed, probably raised because
516510
# ReleaseAsset already exists, so try to remove it before
517511
# reattempting the asset upload
518-
for asset in release.assets():
512+
for asset in release.get_assets():
519513
if asset.name == name:
520514
logger.info(f"Release asset {name} already exists, "
521515
"removing it...")
522-
asset.delete()
516+
asset.delete_asset()
523517
logger.info(f"Asset {name} removed.")
524518
break
525-
except github3.exceptions.ConnectionError as e:
519+
except IOError as e:
520+
# Catch network and file I/O errors (includes requests exceptions)
526521
logger.error(f"Attempt {i + 1} has failed with message: {e}.")
527-
else:
528-
logger.info(f"Attempt {i + 1} has finished.")
529-
return result
530522

531523
time.sleep(retry_backoff)
532524

533525
raise RuntimeError('GitHub asset uploading has failed!')
534526

535-
def github_upload_asset_curl(self, release, path, name, mime):
536-
upload_url, _ = release.upload_url.split('{?')
537-
upload_url += f"?name={name}"
538-
539-
command = [
540-
'curl',
541-
'--fail',
542-
'-H', f"Authorization: token {self.github_token}",
543-
'-H', f"Content-Type: {mime}",
544-
'--data-binary', f'@{path}',
545-
upload_url
546-
]
547-
return subprocess.run(command, shell=False, check=True)
548-
549527
def github_overwrite_release_assets(self, tag_name, target_commitish,
550-
patterns, method='requests'):
551-
# Since github has changed something the asset uploading via requests
552-
# got instable, so prefer the cURL alternative.
553-
# Potential cause:
554-
# sigmavirus24/github3.py/issues/779#issuecomment-379470626
528+
patterns):
555529
repo = self.as_github_repo()
556530
if not tag_name:
557531
raise CrossbowError('Empty tag name')
@@ -560,13 +534,14 @@ def github_overwrite_release_assets(self, tag_name, target_commitish,
560534

561535
# remove the whole release if it already exists
562536
try:
563-
release = repo.release_from_tag(tag_name)
564-
except github3.exceptions.NotFoundError:
565-
pass
566-
else:
567-
release.delete()
568-
569-
release = repo.create_release(tag_name, target_commitish)
537+
release = repo.get_release(tag_name)
538+
release.delete_release()
539+
except GithubException as e:
540+
if e.status != 404:
541+
raise
542+
543+
release = repo.create_git_release(tag_name, tag_name, "",
544+
target_commitish=target_commitish)
570545
for pattern in patterns:
571546
for path in glob.glob(pattern, recursive=True):
572547
name = os.path.basename(path)
@@ -578,16 +553,7 @@ def github_overwrite_release_assets(self, tag_name, target_commitish,
578553
f"{size}..."
579554
)
580555

581-
if method == 'requests':
582-
self.github_upload_asset_requests(release, path, name=name,
583-
mime=mime)
584-
elif method == 'curl':
585-
self.github_upload_asset_curl(release, path, name=name,
586-
mime=mime)
587-
else:
588-
raise CrossbowError(
589-
f"Unsupported upload method {method}"
590-
)
556+
self.github_upload_asset(release, path, name=name, mime=mime)
591557

592558
def github_pr(self, title, head=None, base=None, body=None,
593559
github_token=None, create=False):
@@ -598,12 +564,11 @@ def github_pr(self, title, head=None, base=None, body=None,
598564
repo = self.as_github_repo(github_token=github_token)
599565
if create:
600566
return repo.create_pull(title=title, base=base, head=head,
601-
body=body)
567+
body=body or "")
602568
else:
603569
# Retrieve open PR for base and head.
604570
# There should be a single open one with that title.
605-
for pull in repo.pull_requests(state="open", head=head,
606-
base=base):
571+
for pull in repo.get_pulls(state="open", head=head, base=base):
607572
if title in pull.title:
608573
return pull
609574
raise CrossbowError(
@@ -1005,7 +970,7 @@ class TaskStatus:
1005970
1006971
Parameters
1007972
----------
1008-
commit : github3.Commit
973+
commit : github.Commit.Commit
1009974
Commit to query the combined status for.
1010975
1011976
Returns
@@ -1019,8 +984,8 @@ class TaskStatus:
1019984
"""
1020985

1021986
def __init__(self, commit):
1022-
status = commit.status()
1023-
check_runs = list(commit.check_runs())
987+
status = commit.get_combined_status()
988+
check_runs = list(commit.get_check_runs())
1024989
states = [s.state for s in status.statuses]
1025990

1026991
for check in check_runs:
@@ -1068,7 +1033,7 @@ def __init__(self, github_release, artifact_patterns,
10681033
if github_release is None:
10691034
github_assets = {} # no assets have been uploaded for the task
10701035
else:
1071-
github_assets = {a.name: a for a in github_release.assets()}
1036+
github_assets = {a.name: a for a in github_release.get_assets()}
10721037

10731038
if not validate_patterns:
10741039
# shortcut to avoid pattern validation and just set all artifacts
@@ -1088,9 +1053,10 @@ def __init__(self, github_release, artifact_patterns,
10881053
elif num_matches == 1:
10891054
self[pattern] = github_assets[matches[0].group(0)]
10901055
else:
1056+
matched_names = [m.group(0) for m in matches]
10911057
raise CrossbowError(
10921058
f"Only a single asset should match pattern `{pattern}`, "
1093-
f"there are multiple ones: {', '.join(matches)}"
1059+
f"there are multiple ones: {', '.join(matched_names)}"
10941060
)
10951061

10961062
def missing_patterns(self):

0 commit comments

Comments
 (0)