@@ -600,6 +600,8 @@ def git_checkout(
600
600
commit : Optional [str ],
601
601
ssh_key_file : Optional [Path ],
602
602
ssh_known_hosts_file : Optional [Path ],
603
+ efficient_clone : bool = False ,
604
+ sparse_dirs : Optional [str ] = None ,
603
605
):
604
606
env = {
605
607
# abort if transfer speed is lower than 1kB/s for 1 minute
@@ -636,22 +638,43 @@ def git_checkout(
636
638
args = [
637
639
"git" ,
638
640
"clone" ,
641
+ ]
642
+
643
+ if efficient_clone :
644
+ # Use blobless clone for faster initial clone
645
+ # This fetches commit and tree objects but not file contents
646
+ args .extend (["--filter=blob:none" ])
647
+ # Use shallow clone with depth 1 for minimal history
648
+ args .extend (["--depth=1" ])
649
+ # Skip checkout initially, we'll do sparse checkout later
650
+ args .extend (["--no-checkout" ])
651
+ elif sparse_dirs :
652
+ # For sparse checkout without efficient clone, still skip initial checkout
653
+ # so we can set up sparse checkout before checking out files
654
+ args .extend (["--no-checkout" ])
655
+
656
+ args .extend ([
639
657
base_repo if base_repo else head_repo ,
640
658
destination_path ,
641
- ]
659
+ ])
642
660
643
661
retry_required_command (b"vcs" , args , extra_env = env )
644
662
645
663
if base_ref :
646
- args = ["git" , "fetch" , "origin" , base_ref ]
664
+ args = ["git" , "fetch" ]
665
+ if efficient_clone :
666
+ # For shallow clones, we need to deepen to fetch more history
667
+ args .extend (["--depth=100" ])
668
+ args .extend (["origin" , base_ref ])
647
669
648
670
retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
649
671
650
672
# Create local branch so that taskgraph is able to compute differences
651
673
# between the head branch and the base one, if needed
652
- args = ["git" , "checkout" , base_ref ]
653
-
654
- retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
674
+ if not efficient_clone and not sparse_dirs :
675
+ # Only checkout if we didn't use --no-checkout initially
676
+ args = ["git" , "checkout" , base_ref ]
677
+ retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
655
678
656
679
# When commits are force-pushed (like on a testing branch), base_rev doesn't
657
680
# exist on base_ref. Fetching it allows taskgraph to compute differences
@@ -660,7 +683,11 @@ def git_checkout(
660
683
# Unlike base_ref just above, there is no need to checkout the revision:
661
684
# it's immediately available after the fetch.
662
685
if base_rev and base_rev != NULL_REVISION :
663
- args = ["git" , "fetch" , "origin" , base_rev ]
686
+ args = ["git" , "fetch" ]
687
+ if efficient_clone :
688
+ # For shallow clones, we need to deepen to fetch more history
689
+ args .extend (["--depth=100" ])
690
+ args .extend (["origin" , base_rev ])
664
691
665
692
retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
666
693
@@ -671,28 +698,44 @@ def git_checkout(
671
698
# in not having a tag, or worse: having an outdated version of one.
672
699
# `--force` is needed to be able to update an existing tag.
673
700
if ref and base_repo == head_repo :
674
- args = [
675
- "git" ,
676
- "fetch" ,
677
- "--tags" ,
678
- "--force" ,
679
- base_repo ,
680
- ref ,
681
- ]
701
+ args = ["git" , "fetch" ]
702
+ if efficient_clone :
703
+ # For shallow clones, we need to deepen to fetch more history
704
+ args .extend (["--depth=100" ])
705
+ args .extend (["--tags" , "--force" , base_repo , ref ])
682
706
683
707
retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
684
708
685
709
# If a ref isn't provided, we fetch all refs from head_repo, which may be slow
686
- args = [
687
- "git" ,
688
- "fetch" ,
689
- "--no-tags" ,
690
- head_repo ,
691
- ref if ref else "+refs/heads/*:refs/remotes/work/*" ,
692
- ]
710
+ args = ["git" , "fetch" ]
711
+ if efficient_clone :
712
+ # For shallow clones, we need to deepen to fetch more history
713
+ args . extend ([ "--depth=100" ])
714
+ # With blobless clones, we only fetch the blobs we need
715
+ args . extend ([ "--filter=blob:none" ])
716
+ args . extend ([ "--no-tags" , head_repo , ref if ref else "+refs/heads/*:refs/remotes/work/*" ])
693
717
694
718
retry_required_command (b"vcs" , args , cwd = destination_path , extra_env = env )
695
719
720
+ if sparse_dirs :
721
+ # When sparse directories/files are specified, set up sparse checkout
722
+ # The sparse_dirs should be a colon-separated list of directories or files
723
+ #
724
+ # Note: Git's sparse-checkout behavior in cone mode (default since Git 2.37):
725
+ # - Root-level files: Checked out exactly as specified
726
+ # - Files in subdirectories: Entire parent directory is included
727
+ # - Directories: All contents included
728
+
729
+ # Enable sparse checkout (cone mode is default since Git 2.37)
730
+ args = ["git" , "sparse-checkout" , "init" ]
731
+ run_required_command (b"vcs" , args , cwd = destination_path )
732
+
733
+ # Set the sparse entries
734
+ entries = sparse_dirs .split (":" )
735
+ args = ["git" , "sparse-checkout" , "set" ] + entries
736
+ run_required_command (b"vcs" , args , cwd = destination_path )
737
+
738
+ # Now do the actual checkout
696
739
args = [
697
740
"git" ,
698
741
"checkout" ,
@@ -879,11 +922,17 @@ def add_vcs_arguments(parser, project, name):
879
922
"--%s-sparse-profile" % project ,
880
923
help = "Path to sparse profile for %s checkout" % name ,
881
924
)
925
+ parser .add_argument (
926
+ "--%s-efficient-clone" % project ,
927
+ action = "store_true" ,
928
+ help = "Use efficient cloning strategies (blobless, shallow, no-checkout) for %s" % name ,
929
+ )
882
930
883
931
884
932
def collect_vcs_options (args , project , name ):
885
933
checkout = getattr (args , "%s_checkout" % project )
886
934
sparse_profile = getattr (args , "%s_sparse_profile" % project )
935
+ efficient_clone = getattr (args , "%s_efficient_clone" % project )
887
936
888
937
env_prefix = project .upper ()
889
938
@@ -896,6 +945,7 @@ def collect_vcs_options(args, project, name):
896
945
ref = os .environ .get ("%s_HEAD_REF" % env_prefix )
897
946
pip_requirements = os .environ .get ("%s_PIP_REQUIREMENTS" % env_prefix )
898
947
private_key_secret = os .environ .get ("%s_SSH_SECRET_NAME" % env_prefix )
948
+ sparse_dirs = os .environ .get ("%s_SPARSE_DIRS" % env_prefix )
899
949
900
950
store_path = os .environ .get ("HG_STORE_PATH" )
901
951
@@ -930,6 +980,8 @@ def collect_vcs_options(args, project, name):
930
980
"repo-type" : repo_type ,
931
981
"ssh-secret-name" : private_key_secret ,
932
982
"pip-requirements" : pip_requirements ,
983
+ "efficient-clone" : efficient_clone ,
984
+ "sparse-dirs" : sparse_dirs ,
933
985
}
934
986
935
987
@@ -978,6 +1030,8 @@ def vcs_checkout_from_args(options):
978
1030
revision ,
979
1031
ssh_key_file ,
980
1032
ssh_known_hosts_file ,
1033
+ options .get ("efficient-clone" , False ),
1034
+ options .get ("sparse-dirs" ),
981
1035
)
982
1036
elif options ["repo-type" ] == "hg" :
983
1037
if not revision and not ref :
0 commit comments