Skip to content

Commit

Permalink
Merge pull request #2873 from ytausch/continue-git-backend-part-2
Browse files Browse the repository at this point in the history
(Git-2) replace get_repo with new Git Backend Logic
  • Loading branch information
beckermr authored Sep 3, 2024
2 parents 1e7c5d1 + a5a8207 commit dd50121
Show file tree
Hide file tree
Showing 8 changed files with 341 additions and 109 deletions.
118 changes: 88 additions & 30 deletions conda_forge_tick/auto_tick.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import github
import github3
import github3.repos
import networkx as nx
import tqdm
from conda.models.version import VersionOrder
Expand All @@ -29,8 +30,11 @@
from conda_forge_tick.deploy import deploy
from conda_forge_tick.feedstock_parser import BOOTSTRAP_MAPPINGS
from conda_forge_tick.git_utils import (
DryRunBackend,
GitPlatformBackend,
RepositoryNotFoundError,
comment_on_pr,
get_repo,
github3_client,
github_backend,
is_github_api_limit_reached,
push_repo,
Expand Down Expand Up @@ -140,9 +144,55 @@ def _get_pre_pr_migrator_attempts(attrs, migrator_name, *, is_version):
return pri.get("pre_pr_migrator_attempts", {}).get(migrator_name, 0)


def _prepare_feedstock_repository(
backend: GitPlatformBackend,
context: ClonedFeedstockContext,
branch: str,
base_branch: str,
) -> bool:
"""
Prepare a feedstock repository for migration by forking and cloning it. The local clone will be present in
context.local_clone_dir.
Any errors are written to the pr_info attribute of the feedstock context and logged.
:param backend: The GitPlatformBackend instance to use.
:param context: The current context
:param branch: The branch to create in the forked repository.
:param base_branch: The base branch to branch from.
:return: True if the repository was successfully prepared, False otherwise.
"""
try:
backend.fork(context.git_repo_owner, context.git_repo_name)
except RepositoryNotFoundError:
logger.warning(
f"Could not fork {context.git_repo_owner}/{context.git_repo_name}: Not Found"
)

error_message = f"{context.feedstock_name}: Git repository not found."
logger.critical(
f"Failed to migrate {context.feedstock_name}, {error_message}",
)

with context.attrs["pr_info"] as pri:
pri["bad"] = error_message

return False

backend.clone_fork_and_branch(
upstream_owner=context.git_repo_owner,
repo_name=context.git_repo_name,
target_dir=context.local_clone_dir,
new_branch=branch,
base_branch=base_branch,
)
return True


def run_with_tmpdir(
context: FeedstockContext,
migrator: Migrator,
git_backend: GitPlatformBackend,
rerender: bool = True,
base_branch: str = "main",
dry_run: bool = False,
Expand All @@ -161,6 +211,7 @@ def run_with_tmpdir(
return run(
context=cloned_context,
migrator=migrator,
git_backend=git_backend,
rerender=rerender,
base_branch=base_branch,
dry_run=dry_run,
Expand All @@ -171,6 +222,7 @@ def run_with_tmpdir(
def run(
context: ClonedFeedstockContext,
migrator: Migrator,
git_backend: GitPlatformBackend,
rerender: bool = True,
base_branch: str = "main",
dry_run: bool = False,
Expand All @@ -184,6 +236,8 @@ def run(
The current feedstock context, already containing information about a temporary directory for the feedstock.
migrator: Migrator instance
The migrator to run on the feedstock
git_backend: GitPlatformBackend
The git backend to use. Use the DryRunBackend for testing.
rerender : bool
Whether to rerender
base_branch : str, optional
Expand All @@ -202,9 +256,6 @@ def run(
# sometimes we get weird directory issues so make sure we reset
os.chdir(BOT_HOME_DIR)

# get the repo
branch_name = migrator.remote_branch(context) + "_h" + uuid4().hex[0:6]

migrator_name = get_migrator_name(migrator)
is_version_migration = isinstance(migrator, Version)
_increment_pre_pr_migrator_attempt(
Expand All @@ -213,20 +264,25 @@ def run(
is_version=is_version_migration,
)

# TODO: run this in parallel
repo = get_repo(context=context, branch=branch_name, base_branch=base_branch)

feedstock_dir = str(context.local_clone_dir)
if not feedstock_dir or not repo:
logger.critical(
"Failed to migrate %s, %s",
context.feedstock_name,
context.attrs.get("pr_info", {}).get("bad"),
)
branch_name = migrator.remote_branch(context) + "_h" + uuid4().hex[0:6]
if not _prepare_feedstock_repository(
git_backend,
context,
branch_name,
base_branch,
):
# something went wrong during forking or cloning
return False, False

# need to use an absolute path here
feedstock_dir = os.path.abspath(feedstock_dir)
feedstock_dir = str(context.local_clone_dir.resolve())

# This is needed because we want to migrate to the new backend step-by-step
repo: github3.repos.Repository | None = github3_client().repository(
context.git_repo_owner, context.git_repo_name
)

assert repo is not None

migration_run_data = run_migration(
migrator=migrator,
Expand Down Expand Up @@ -566,7 +622,8 @@ def _run_migrator_on_feedstock_branch(
attrs,
base_branch,
migrator,
fctx,
fctx: FeedstockContext,
git_backend: GitPlatformBackend,
dry_run,
mctx,
migrator_name,
Expand All @@ -581,6 +638,7 @@ def _run_migrator_on_feedstock_branch(
migrator_uid, pr_json = run_with_tmpdir(
context=fctx,
migrator=migrator,
git_backend=git_backend,
rerender=migrator.rerender,
hash_type=attrs.get("hash_type", "sha256"),
base_branch=base_branch,
Expand Down Expand Up @@ -745,7 +803,9 @@ def _is_migrator_done(_mg_start, good_prs, time_per, pr_limit):
return False


def _run_migrator(migrator, mctx, temp, time_per, dry_run):
def _run_migrator(
migrator, mctx, temp, time_per, dry_run, git_backend: GitPlatformBackend
):
_mg_start = time.time()

migrator_name = get_migrator_name(migrator)
Expand Down Expand Up @@ -863,14 +923,15 @@ def _run_migrator(migrator, mctx, temp, time_per, dry_run):
)
):
good_prs, break_loop = _run_migrator_on_feedstock_branch(
attrs,
base_branch,
migrator,
fctx,
dry_run,
mctx,
migrator_name,
good_prs,
attrs=attrs,
base_branch=base_branch,
migrator=migrator,
fctx=fctx,
git_backend=git_backend,
dry_run=dry_run,
mctx=mctx,
migrator_name=migrator_name,
good_prs=good_prs,
)
if break_loop:
break
Expand Down Expand Up @@ -1119,14 +1180,11 @@ def main(ctx: CliContext) -> None:
),
flush=True,
)
git_backend = github_backend() if not ctx.dry_run else DryRunBackend()

for mg_ind, migrator in enumerate(migrators):
good_prs = _run_migrator(
migrator,
mctx,
temp,
time_per_migrator[mg_ind],
ctx.dry_run,
migrator, mctx, temp, time_per_migrator[mg_ind], ctx.dry_run, git_backend
)
if good_prs > 0:
pass
Expand Down
4 changes: 4 additions & 0 deletions conda_forge_tick/contexts.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ def __post_init__(self):
DEFAULT_BRANCHES.get(self.feedstock_name, "main"),
)

@property
def git_repo_owner(self) -> str:
return "conda-forge"

@property
def git_repo_name(self) -> str:
return f"{self.feedstock_name}-feedstock"
Expand Down
94 changes: 38 additions & 56 deletions conda_forge_tick/git_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from datetime import datetime
from functools import cached_property
from pathlib import Path
from typing import Dict, Literal, Optional, Union
from typing import Dict, Optional, Union

import backoff
import github
Expand All @@ -28,7 +28,7 @@
# and pull all the needed info from the various source classes)
from conda_forge_tick.lazy_json_backends import LazyJson

from .contexts import ClonedFeedstockContext, FeedstockContext
from .contexts import FeedstockContext
from .executors import lock_git_operation
from .os_utils import pushd
from .utils import get_bot_run_url, run_command_hiding_token
Expand Down Expand Up @@ -404,8 +404,8 @@ def does_repository_exist(self, owner: str, repo_name: str) -> bool:
"""
pass

@staticmethod
def get_remote_url(
self,
owner: str,
repo_name: str,
connection_mode: GitConnectionMode = GitConnectionMode.HTTPS,
Expand All @@ -416,6 +416,8 @@ def get_remote_url(
:param repo_name: The name of the repository.
:param connection_mode: The connection mode to use.
:raises ValueError: If the connection mode is not supported.
:raises RepositoryNotFoundError: If the repository does not exist. This is only raised if the backend relies on
the repository existing to generate the URL.
"""
# Currently we don't need any abstraction for other platforms than GitHub, so we don't build such abstractions.
match connection_mode:
Expand Down Expand Up @@ -641,6 +643,12 @@ class DryRunBackend(GitPlatformBackend):
def __init__(self):
super().__init__(GitCli())
self._repos: set[str] = set()
self._repos: dict[str, str] = {}
"""
_repos maps from repository name to the owner of the upstream repository.
If a remote URL of a fork is requested with get_remote_url, _USER (the virtual current user) is
replaced by the owner of the upstream repository. This allows cloning the forked repository.
"""

def get_api_requests_left(self) -> Bound:
return Bound.INFINITY
Expand All @@ -654,15 +662,40 @@ def does_repository_exist(self, owner: str, repo_name: str) -> bool:
self.get_remote_url(owner, repo_name, GitConnectionMode.HTTPS)
)

def get_remote_url(
self,
owner: str,
repo_name: str,
connection_mode: GitConnectionMode = GitConnectionMode.HTTPS,
) -> str:
if owner != self._USER:
return super().get_remote_url(owner, repo_name, connection_mode)
# redirect to the upstream repository
try:
upstream_owner = self._repos[repo_name]
except KeyError:
raise RepositoryNotFoundError(
f"Repository {owner}/{repo_name} appears to be a virtual fork but does not exist. Note that dry-run "
"forks are persistent only for the duration of the backend instance."
)

return super().get_remote_url(upstream_owner, repo_name, connection_mode)

@lock_git_operation()
def fork(self, owner: str, repo_name: str):
if repo_name in self._repos:
raise ValueError(f"Fork of {repo_name} already exists.")
logger.debug(f"Fork of {repo_name} already exists. Doing nothing.")
return

if not self.does_repository_exist(owner, repo_name):
raise RepositoryNotFoundError(
f"Cannot fork non-existing repository {owner}/{repo_name}."
)

logger.debug(
f"Dry Run: Creating fork of {owner}/{repo_name} for user {self._USER}."
)
self._repos.add(repo_name)
self._repos[repo_name] = owner

def _sync_default_branch(self, upstream_owner: str, upstream_repo: str):
logger.debug(
Expand Down Expand Up @@ -720,57 +753,6 @@ def feedstock_repo(fctx: FeedstockContext) -> str:
return fctx.feedstock_name + "-feedstock"


@lock_git_operation()
def get_repo(
context: ClonedFeedstockContext,
branch: str,
base_branch: str = "main",
) -> github3.repos.Repository | Literal[False]:
"""Get the feedstock repo
Parameters
----------
context : ClonedFeedstockContext
Feedstock context used for constructing feedstock urls, etc.
branch : str
The branch to be made.
base_branch : str, optional
The base branch from which to make the new branch.
Returns
-------
repo : github3 repository
The github3 repository object.
"""
backend = github_backend()
feedstock_repo_name = feedstock_repo(context)

try:
backend.fork("conda-forge", feedstock_repo_name)
except RepositoryNotFoundError:
logger.warning(f"Could not fork conda-forge/{feedstock_repo_name}")
with context.attrs["pr_info"] as pri:
pri["bad"] = f"{context.feedstock_name}: Git repository not found.\n"
return False, False

backend.clone_fork_and_branch(
upstream_owner="conda-forge",
repo_name=feedstock_repo_name,
target_dir=context.local_clone_dir,
new_branch=branch,
base_branch=base_branch,
)

# This is needed because we want to migrate to the new backend step-by-step
repo: github3.repos.Repository | None = github3_client().repository(
"conda-forge", feedstock_repo_name
)

assert repo is not None

return repo


@lock_git_operation()
def delete_branch(pr_json: LazyJson, dry_run: bool = False) -> None:
ref = pr_json["head"]["ref"]
Expand Down
1 change: 1 addition & 0 deletions conda_forge_tick/migrators_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,7 @@ class AttrsTypedDict_(TypedDict, total=False):
raw_meta_yaml: str
req: Set[str]
platforms: List[str]
pr_info: typing.Any
requirements: RequirementsTypedDict
source: SourceTypedDict
test: TestTypedDict
Expand Down
Loading

0 comments on commit dd50121

Please sign in to comment.