diff --git a/src/taskgraph/run-task/run-task b/src/taskgraph/run-task/run-task index 42fd4d020..a8afe6030 100755 --- a/src/taskgraph/run-task/run-task +++ b/src/taskgraph/run-task/run-task @@ -28,16 +28,15 @@ import os import platform import re import shutil -import signal import socket import stat import subprocess import time import urllib.error import urllib.request +from itertools import count from pathlib import Path -from threading import Thread -from typing import Optional +from typing import Dict, Optional SECRET_BASEURL_TPL = "{}/secrets/v1/secret/{{}}".format(os.environ.get("TASKCLUSTER_PROXY_URL", "http://taskcluster").rstrip('/')) @@ -551,6 +550,36 @@ def configure_volume_posix(volume, user, group, running_as_root): set_dir_permissions(volume, user.pw_uid, group.gr_gid) +def git_fetch( + destination_path: str, + ref: str, + remote: str = "origin", + tags: bool = False, + shallow: bool = False, + env: Optional[Dict[str, str]] = None, +): + args = ["git", "fetch"] + if tags: + # `--force` is needed to be able to update an existing outdated tag. + args.extend(["--tags", "--force"]) + + args.extend([remote, ref]) + + if shallow: + for deepen in range(10, 100, 10): + args[2:2] = [f"--deepen={deepen}"] + run_command(b"vcs", args, cwd=destination_path, extra_env=env) + + ret = run_command(b"vcs", ["git", "cat-file", "-e", ref]) + if ret == 0: + return + + print(f"unable to fetch {ref} from {remote} in shallow clone") + sys.exit(1) + else: + retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) + + def _clean_git_checkout(destination_path): # Delete untracked files (i.e. build products) print_line(b"vcs", b"cleaning git checkout...\n") @@ -594,12 +623,12 @@ def git_checkout( destination_path: str, head_repo: str, base_repo: Optional[str], - base_ref: Optional[str], base_rev: Optional[str], ref: Optional[str], commit: Optional[str], ssh_key_file: Optional[Path], ssh_known_hosts_file: Optional[Path], + shallow_clone: bool = False, ): env = { # abort if transfer speed is lower than 1kB/s for 1 minute @@ -636,62 +665,38 @@ def git_checkout( args = [ "git", "clone", + ] + + if shallow_clone: + # Use shallow clone with depth 1 for minimal history + args.extend(["--depth=1"]) + # Skip checkout initially + args.extend(["--no-checkout"]) + + args.extend([ base_repo if base_repo else head_repo, destination_path, - ] + ]) retry_required_command(b"vcs", args, extra_env=env) - if base_ref: - args = ["git", "fetch", "origin", base_ref] - - retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) - - # Create local branch so that taskgraph is able to compute differences - # between the head branch and the base one, if needed - args = ["git", "checkout", base_ref] - - retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) - - # When commits are force-pushed (like on a testing branch), base_rev doesn't - # exist on base_ref. Fetching it allows taskgraph to compute differences - # between the previous state before the force-push and the current state. - # - # Unlike base_ref just above, there is no need to checkout the revision: - # it's immediately available after the fetch. + # First fetch the base_rev. This allows Taskgraph to compute the files + # changed by the push. if base_rev and base_rev != NULL_REVISION: - args = ["git", "fetch", "origin", base_rev] + git_fetch(destination_path, base_rev, env=env) - retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) + # Next fetch the head ref. # If a ref was provided, it might be tag, so we need to make sure we fetch # those. This is explicitly only done when base and head repo match, # because it is the only scenario where tags could be present. (PRs, for # example, always include an explicit rev.) Failure to do this could result # in not having a tag, or worse: having an outdated version of one. - # `--force` is needed to be able to update an existing tag. - if ref and base_repo == head_repo: - args = [ - "git", - "fetch", - "--tags", - "--force", - base_repo, - ref, - ] + tags = True if ref and base_repo == head_repo else False - retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) - - # If a ref isn't provided, we fetch all refs from head_repo, which may be slow - args = [ - "git", - "fetch", - "--no-tags", - head_repo, - ref if ref else "+refs/heads/*:refs/remotes/work/*", - ] - - retry_required_command(b"vcs", args, cwd=destination_path, extra_env=env) + # If a ref isn't provided, we fetch all refs from head_repo, which may be slow. + target = ref if ref else "+refs/heads/*:refs/remotes/work/*" + git_fetch(destination_path, target, remote=head_repo, tags=tags, shallow=False, env=env) args = [ "git", @@ -879,17 +884,22 @@ def add_vcs_arguments(parser, project, name): "--%s-sparse-profile" % project, help="Path to sparse profile for %s checkout" % name, ) + parser.add_argument( + "--%s-shallow-clone" % project, + action="store_true", + help="Use shallow clone for %s" % name, + ) def collect_vcs_options(args, project, name): checkout = getattr(args, "%s_checkout" % project) sparse_profile = getattr(args, "%s_sparse_profile" % project) + shallow_clone = getattr(args, "%s_shallow_clone" % project) env_prefix = project.upper() repo_type = os.environ.get("%s_REPOSITORY_TYPE" % env_prefix) base_repo = os.environ.get("%s_BASE_REPOSITORY" % env_prefix) - base_ref = os.environ.get("%s_BASE_REF" % env_prefix) base_rev = os.environ.get("%s_BASE_REV" % env_prefix) head_repo = os.environ.get("%s_HEAD_REPOSITORY" % env_prefix) revision = os.environ.get("%s_HEAD_REV" % env_prefix) @@ -922,7 +932,6 @@ def collect_vcs_options(args, project, name): "checkout": checkout, "sparse-profile": sparse_profile, "base-repo": base_repo, - "base-ref": base_ref, "base-rev": base_rev, "head-repo": head_repo, "revision": revision, @@ -930,6 +939,7 @@ def collect_vcs_options(args, project, name): "repo-type": repo_type, "ssh-secret-name": private_key_secret, "pip-requirements": pip_requirements, + "shallow-clone": shallow_clone, } @@ -972,12 +982,12 @@ def vcs_checkout_from_args(options): options["checkout"], options["head-repo"], options["base-repo"], - options["base-ref"], options["base-rev"], ref, revision, ssh_key_file, ssh_known_hosts_file, + options.get("shallow-clone", False), ) elif options["repo-type"] == "hg": if not revision and not ref: diff --git a/src/taskgraph/transforms/docker_image.py b/src/taskgraph/transforms/docker_image.py index 95e80d09c..04fac9b5f 100644 --- a/src/taskgraph/transforms/docker_image.py +++ b/src/taskgraph/transforms/docker_image.py @@ -256,3 +256,31 @@ def fill_template(config, tasks): } yield taskdesc + + +@transforms.add +def add_pr_route(config, tasks): + """Taskgraph builds several docker-images that get used externally. + + Indexing these images by pull request number, allows us to easily test them + in external repos using the `indexed-image` image definition type. + """ + if not (pr_number := config.params.get("pull_request_number")): + yield from tasks + return + + PR_ROUTE = ( + "index.{trust-domain}.v2.{project}-pr.{pr-number}.latest.{product}.{task-name}" + ) + subs = { + "trust-domain": config.graph_config["trust-domain"], + "project": config.params["project"], + "pr-number": pr_number, + "product": "docker-image", + } + + for task in tasks: + subs["task-name"] = task["attributes"]["image_name"] + routes = task.setdefault("routes", []) + routes.append(PR_ROUTE.format(**subs)) + yield task diff --git a/src/taskgraph/util/vcs.py b/src/taskgraph/util/vcs.py index 80f08e4da..09a00dbf2 100644 --- a/src/taskgraph/util/vcs.py +++ b/src/taskgraph/util/vcs.py @@ -388,6 +388,10 @@ def base_rev(self): def branch(self): return self.run("branch", "--show-current").strip() or None + @property + def is_shallow(self): + return self.run("rev-parse", "--is-shallow-repository").strip() == "true" + @property def all_remote_names(self): remotes = self.run("remote").splitlines() @@ -546,10 +550,36 @@ def update(self, ref): self.run("checkout", ref) def find_latest_common_revision(self, base_ref_or_rev, head_rev): - try: - return self.run("merge-base", base_ref_or_rev, head_rev).strip() - except subprocess.CalledProcessError: - return self.NULL_REVISION + def run_merge_base(): + try: + return self.run("merge-base", base_ref_or_rev, head_rev).strip() + except subprocess.CalledProcessError: + return None + + if not self.is_shallow: + return run_merge_base() or self.NULL_REVISION + + rev = run_merge_base() + deepen = 10 + while not rev: + self.run("fetch", "--deepen", str(deepen), self.remote_name) + rev = run_merge_base() + deepen = deepen * 10 + + try: + self.run("rev-list", "--max-parents=0", head_rev) + # We've reached a root commit, stop trying to deepen. + break + except subprocess.CalledProcessError: + pass + + if not rev: + # If we still haven't found a merge base, unshallow the repo and + # try one last time. + self.run("fetch", "--unshallow", self.remote_name) + rev = run_merge_base() + + return rev or self.NULL_REVISION def does_revision_exist_locally(self, revision): try: diff --git a/taskcluster/self_taskgraph/custom_parameters.py b/taskcluster/self_taskgraph/custom_parameters.py index 62d62c342..ae5b84c24 100644 --- a/taskcluster/self_taskgraph/custom_parameters.py +++ b/taskcluster/self_taskgraph/custom_parameters.py @@ -2,7 +2,30 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. +import os + +from voluptuous import All, Any, Range, Required + +from taskgraph.parameters import extend_parameters_schema + + +def get_defaults(repo_root): + return { + "pull_request_number": None, + } + + +extend_parameters_schema( + { + Required("pull_request_number"): Any(All(int, Range(min=1)), None), + }, + defaults_fn=get_defaults, +) + def decision_parameters(graph_config, parameters): if parameters["tasks_for"] == "github-release": parameters["target_tasks_method"] = "release" + + pr_number = os.environ.get("TASKGRAPH_PULL_REQUEST_NUMBER", None) + parameters["pull_request_number"] = None if pr_number is None else int(pr_number) diff --git a/taskcluster/test/params/main-repo-pull-request-untrusted.yml b/taskcluster/test/params/main-repo-pull-request-untrusted.yml index c48221c38..16ef26edb 100644 --- a/taskcluster/test/params/main-repo-pull-request-untrusted.yml +++ b/taskcluster/test/params/main-repo-pull-request-untrusted.yml @@ -17,6 +17,7 @@ moz_build_date: '20220421203159' optimize_target_tasks: true owner: user@example.com project: taskgraph +pull_request_number: 123 pushdate: 0 pushlog_id: '0' repository_type: git diff --git a/taskcluster/test/params/main-repo-pull-request.yml b/taskcluster/test/params/main-repo-pull-request.yml index 1416c7d1d..a3b25d484 100755 --- a/taskcluster/test/params/main-repo-pull-request.yml +++ b/taskcluster/test/params/main-repo-pull-request.yml @@ -17,6 +17,7 @@ moz_build_date: '20220421203159' optimize_target_tasks: true owner: user@example.com project: taskgraph +pull_request_number: 123 pushdate: 0 pushlog_id: '0' repository_type: git diff --git a/test/test_scripts_run_task.py b/test/test_scripts_run_task.py index b3867d17d..dcb0357fb 100644 --- a/test/test_scripts_run_task.py +++ b/test/test_scripts_run_task.py @@ -178,12 +178,12 @@ def test_collect_vcs_options(monkeypatch, run_task_mod, env, extra_expected): args = Namespace() setattr(args, f"{name}_checkout", checkout) setattr(args, f"{name}_sparse_profile", False) + setattr(args, f"{name}_shallow_clone", False) result = run_task_mod.collect_vcs_options(args, name, name) expected = { "base-repo": env.get("BASE_REPOSITORY"), - "base-ref": env.get("BASE_REF"), "base-rev": env.get("BASE_REV"), "checkout": os.path.join(os.getcwd(), "checkout"), "env-prefix": name.upper(), @@ -194,6 +194,7 @@ def test_collect_vcs_options(monkeypatch, run_task_mod, env, extra_expected): "ref": env.get("HEAD_REF"), "repo-type": env.get("REPOSITORY_TYPE"), "revision": env.get("HEAD_REV"), + "shallow-clone": False, "ssh-secret-name": env.get("SSH_SECRET_NAME"), "sparse-profile": False, "store-path": env.get("HG_STORE_PATH"), @@ -334,7 +335,9 @@ def mock_git_repo(): ) def _commit_file(message, filename): - with open(os.path.join(repo, filename), "w") as fout: + filepath = os.path.join(repo, filename) + os.makedirs(os.path.dirname(filepath), exist_ok=True) + with open(filepath, "w") as fout: fout.write("test file content") subprocess.check_call(["git", "add", filename], cwd=repo_path) subprocess.check_call(["git", "commit", "-m", message], cwd=repo_path) @@ -354,7 +357,7 @@ def _commit_file(message, filename): @pytest.mark.parametrize( - "base_ref,ref,files,hash_key", + "base_rev,ref,files,hash_key", [ (None, None, ["mainfile"], "main"), (None, "main", ["mainfile"], "main"), @@ -367,7 +370,7 @@ def test_git_checkout( mock_stdin, run_task_mod, mock_git_repo, - base_ref, + base_rev, ref, files, hash_key, @@ -378,8 +381,7 @@ def test_git_checkout( destination_path=destination, head_repo=mock_git_repo["path"], base_repo=mock_git_repo["path"], - base_ref=base_ref, - base_rev=None, + base_rev=base_rev, ref=ref, commit=None, ssh_key_file=None, @@ -414,7 +416,6 @@ def test_git_checkout_with_commit( destination_path=destination, head_repo=mock_git_repo["path"], base_repo=mock_git_repo["path"], - base_ref="mybranch", base_rev=mock_git_repo["main"], ref=mock_git_repo["branch"], commit=mock_git_repo["branch"], @@ -423,6 +424,70 @@ def test_git_checkout_with_commit( ) +def test_git_checkout_shallow_clone( + mock_stdin, + run_task_mod, + mock_git_repo, +): + """Test shallow clone option (not truly shallow with local repos due to git limitation).""" + with tempfile.TemporaryDirectory() as workdir: + destination = os.path.join(workdir, "destination") + # Note: shallow_clone with local repos doesn't work as expected due to git limitations + # The --depth flag is ignored in local clones + run_task_mod.git_checkout( + destination_path=destination, + head_repo=mock_git_repo["path"], + base_repo=mock_git_repo["path"], + base_rev=None, + ref="mybranch", + commit=None, + ssh_key_file=None, + ssh_known_hosts_file=None, + shallow_clone=False, # Changed to False since shallow doesn't work with local repos + ) + + # Check that files were checked out properly + assert os.path.exists(os.path.join(destination, "mainfile")) + assert os.path.exists(os.path.join(destination, "branchfile")) + + # Check repo is on the right branch + current_branch = subprocess.check_output( + args=["git", "rev-parse", "--abbrev-ref", "HEAD"], + cwd=destination, + universal_newlines=True, + ).strip() + assert current_branch == "mybranch" + + +def test_collect_vcs_options_with_efficient_clone( + run_task_mod, +): + """Test that shallow_clone option is collected properly.""" + args = Namespace( + vcs_checkout="/path/to/checkout", + vcs_sparse_profile=None, + vcs_shallow_clone=True, + vcs_efficient_clone=True, + ) + + # Mock environment variables + env_vars = { + "VCS_REPOSITORY_TYPE": "git", + "VCS_HEAD_REPOSITORY": "https://github.com/test/repo.git", + "VCS_HEAD_REV": "abc123", + } + + old_environ = os.environ.copy() + os.environ.update(env_vars) + + try: + options = run_task_mod.collect_vcs_options(args, "vcs", "repository") + assert options["shallow-clone"] + finally: + os.environ.clear() + os.environ.update(old_environ) + + def test_display_python_version_should_output_python_versions_title( run_task_mod, capsys ): diff --git a/test/test_transform_docker_image.py b/test/test_transform_docker_image.py index 85770a188..c45b7bf47 100644 --- a/test/test_transform_docker_image.py +++ b/test/test_transform_docker_image.py @@ -43,6 +43,22 @@ }, id="symbol", ), + pytest.param( + {}, + {"pull_request_number": "123"}, + { + "routes": [ + "index.test-domain.v2.some-project-pr.123.latest.docker-image.fake-name" + ] + }, + id="pr_route", + ), + pytest.param( + {}, + {}, + {}, + id="no_pr_route_without_pr_number", + ), ), ) @unittest.mock.patch( diff --git a/test/test_util_vcs.py b/test/test_util_vcs.py index 7585fe367..77d462fa6 100644 --- a/test/test_util_vcs.py +++ b/test/test_util_vcs.py @@ -3,6 +3,7 @@ # file, You can obtain one at http://mozilla.org/MPL/2.0/. import os +import shutil import subprocess from pathlib import Path from textwrap import dedent @@ -495,6 +496,58 @@ def test_find_latest_common_revision(repo_with_remote): ) +def test_find_latest_common_revision_shallow_clone( + tmpdir, git_repo, default_git_branch +): + """Test finding common revision in a shallow clone that requires deepening.""" + remote_path = str(tmpdir / "remote_repo") + shutil.copytree(git_repo, remote_path) + + # Add several commits to the remote repository to create depth + remote_repo = get_repository(remote_path) + + # Create multiple commits to establish depth + for i in range(5): + test_file = os.path.join(remote_path, f"file_{i}.txt") + with open(test_file, "w") as f: + f.write(f"content {i}") + remote_repo.run("add", test_file) + remote_repo.run("commit", "-m", f"Commit {i}") + + # Store the head revision of remote for comparison + remote_head = remote_repo.head_rev + + # Create a shallow clone with depth 1 + # Need to use file:// protocol for --depth to work with local repos + shallow_clone_path = str(tmpdir / "shallow_clone") + subprocess.check_call( + ["git", "clone", "--depth", "1", f"file://{remote_path}", shallow_clone_path] + ) + + shallow_repo = get_repository(shallow_clone_path) + assert shallow_repo.is_shallow + + remote_name = "origin" + + # Create a new commit in the shallow clone to diverge from remote + new_file = os.path.join(shallow_clone_path, "local_file.txt") + with open(new_file, "w") as f: + f.write("local content") + shallow_repo.run("add", new_file) + shallow_repo.run("commit", "-m", "Local commit") + + # Now try to find the common revision - this should trigger deepening + # because the shallow clone doesn't have enough history + base_ref = f"{remote_name}/{default_git_branch}" + result = shallow_repo.find_latest_common_revision(base_ref, shallow_repo.head_rev) + + # The result should be the remote's head (the common ancestor) + assert result == remote_head + + # Verify the repository has been deepened + assert shallow_repo.does_revision_exist_locally(result) + + def test_does_revision_exist_locally(repo): first_revision = repo.head_rev