From fac07a05e15d53abfbf3c992958f5759e654d8e9 Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Tue, 17 Mar 2026 11:46:24 +0000 Subject: [PATCH 1/3] Use partial fetch (blob:none) for metadata session sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip downloading blobs when fetching the remote metadata branch during push recovery. The merge only needs the tree structure to combine entries — blobs are already local or fetched on demand by git. Co-Authored-By: Claude Opus 4.6 (1M context) Entire-Checkpoint: 56094f38982e --- cmd/entire/cli/strategy/push_common.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/cmd/entire/cli/strategy/push_common.go b/cmd/entire/cli/strategy/push_common.go index a76ec3c9a..f370b6dc0 100644 --- a/cmd/entire/cli/strategy/push_common.go +++ b/cmd/entire/cli/strategy/push_common.go @@ -155,8 +155,11 @@ func fetchAndRebaseSessionsCommon(ctx context.Context, target, branchName string fetchedRefName = plumbing.NewRemoteReferenceName(target, branchName) } - // Use git CLI for fetch (go-git's fetch can be tricky with auth) - fetchCmd := CheckpointGitCommand(ctx, target, "fetch", target, refSpec) + // Use git CLI for fetch (go-git's fetch can be tricky with auth). + // Use --filter=blob:none for a partial fetch that downloads only commits + // and trees, skipping blobs. The merge only needs the tree structure to + // combine entries; blobs are already local or fetched on demand. + fetchCmd := CheckpointGitCommand(ctx, target, "fetch", "--filter=blob:none", target, refSpec) if output, err := fetchCmd.CombinedOutput(); err != nil { return fmt.Errorf("fetch failed: %s", output) } From 4c4dd465b4803adeb30e2dd179e4e05e5bd61bc5 Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Thu, 9 Apr 2026 11:18:16 +0100 Subject: [PATCH 2/3] Use partial clone for all checkpoint fetches Signed-off-by: Paulo Gomes Entire-Checkpoint: d8c9d9506694 --- cmd/entire/cli/checkpoint/v2_read.go | 2 +- cmd/entire/cli/git_operations.go | 18 +++++++++++------- cmd/entire/cli/integration_test/testenv.go | 2 +- cmd/entire/cli/resume.go | 2 +- cmd/entire/cli/strategy/checkpoint_remote.go | 4 ++-- .../cli/strategy/checkpoint_token_test.go | 2 +- cmd/entire/cli/strategy/push_common.go | 2 +- cmd/entire/cli/strategy/push_v2.go | 7 +++++-- 8 files changed, 23 insertions(+), 16 deletions(-) diff --git a/cmd/entire/cli/checkpoint/v2_read.go b/cmd/entire/cli/checkpoint/v2_read.go index b8e0c4e59..e9f315a62 100644 --- a/cmd/entire/cli/checkpoint/v2_read.go +++ b/cmd/entire/cli/checkpoint/v2_read.go @@ -222,7 +222,7 @@ func (s *V2GitStore) fetchRemoteFullRefs(ctx context.Context) error { return nil } - args := append([]string{"fetch", s.FetchRemote}, refSpecs...) + args := append([]string{"fetch", "--no-tags", s.FetchRemote}, refSpecs...) fetchCmd := exec.CommandContext(ctx, "git", args...) if fetchOutput, fetchErr := fetchCmd.CombinedOutput(); fetchErr != nil { return fmt.Errorf("fetch failed: %s", fetchOutput) diff --git a/cmd/entire/cli/git_operations.go b/cmd/entire/cli/git_operations.go index 3be6a85f7..36a86d9d4 100644 --- a/cmd/entire/cli/git_operations.go +++ b/cmd/entire/cli/git_operations.go @@ -372,6 +372,8 @@ func FetchAndCheckoutRemoteBranch(ctx context.Context, branchName string) error // FetchMetadataBranch fetches the entire/checkpoints/v1 branch from origin and creates/updates the local branch. // This is used when the metadata branch exists on remote but not locally. +// The fetch is treeless (--filter=blob:none) because checkpoint metadata reads +// support on-demand blob retrieval. // Uses git CLI instead of go-git for fetch because go-git doesn't use credential helpers, // which breaks HTTPS URLs that require authentication. func FetchMetadataBranch(ctx context.Context) error { @@ -383,7 +385,7 @@ func FetchMetadataBranch(ctx context.Context) error { refSpec := fmt.Sprintf("+refs/heads/%s:refs/remotes/origin/%s", branchName, branchName) - fetchCmd := strategy.CheckpointGitCommand(ctx, "origin", "fetch", "origin", refSpec) + fetchCmd := strategy.CheckpointGitCommand(ctx, "origin", "fetch", "--no-tags", "--filter=blob:none", "origin", refSpec) if output, err := fetchCmd.CombinedOutput(); err != nil { if ctx.Err() == context.DeadlineExceeded { return errors.New("fetch timed out after 2 minutes") @@ -425,7 +427,7 @@ func FetchMetadataTreeOnly(ctx context.Context) error { refSpec := fmt.Sprintf("+refs/heads/%s:refs/remotes/origin/%s", branchName, branchName) - fetchCmd := strategy.CheckpointGitCommand(ctx, "origin", "fetch", "--depth=1", "--filter=blob:none", "origin", refSpec) + fetchCmd := strategy.CheckpointGitCommand(ctx, "origin", "fetch", "--no-tags", "--depth=1", "--filter=blob:none", "origin", refSpec) if output, err := fetchCmd.CombinedOutput(); err != nil { if ctx.Err() == context.DeadlineExceeded { return errors.New("treeless fetch timed out after 2 minutes") @@ -463,7 +465,7 @@ func FetchV2MainTreeOnly(ctx context.Context) error { refSpec := fmt.Sprintf("+%s:%s", paths.V2MainRefName, paths.V2MainRefName) - fetchCmd := strategy.CheckpointGitCommand(ctx, "origin", "fetch", "--depth=1", "--filter=blob:none", "origin", refSpec) + fetchCmd := strategy.CheckpointGitCommand(ctx, "origin", "fetch", "--no-tags", "--depth=1", "--filter=blob:none", "origin", refSpec) if output, err := fetchCmd.CombinedOutput(); err != nil { if ctx.Err() == context.DeadlineExceeded { return errors.New("v2 treeless fetch timed out after 2 minutes") @@ -474,7 +476,9 @@ func FetchV2MainTreeOnly(ctx context.Context) error { return nil } -// FetchV2MainRef fetches the v2 /main ref from origin (full fetch, including blobs). +// FetchV2MainRef fetches the v2 /main ref from origin. +// The fetch is treeless (--filter=blob:none) because /main is metadata-only and +// v2 checkpoint reads handle transcript retrieval separately. // Uses explicit refspec since v2 refs are under refs/entire/, not refs/heads/. func FetchV2MainRef(ctx context.Context) error { ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) @@ -482,7 +486,7 @@ func FetchV2MainRef(ctx context.Context) error { refSpec := fmt.Sprintf("+%s:%s", paths.V2MainRefName, paths.V2MainRefName) - fetchCmd := strategy.CheckpointGitCommand(ctx, "origin", "fetch", "origin", refSpec) + fetchCmd := strategy.CheckpointGitCommand(ctx, "origin", "fetch", "--no-tags", "--filter=blob:none", "origin", refSpec) if output, err := fetchCmd.CombinedOutput(); err != nil { if ctx.Err() == context.DeadlineExceeded { return errors.New("v2 fetch timed out after 2 minutes") @@ -543,9 +547,9 @@ func FetchBlobsByHash(ctx context.Context, hashes []plumbing.Hash) error { ctx, cancel := context.WithTimeout(ctx, 2*time.Minute) defer cancel() - // Build fetch args: "git fetch origin ..." + // Build fetch args: "git fetch --no-tags origin ..." // This uses the normal transport + credential helpers, unlike fetch-pack. - args := []string{"fetch", "--no-write-fetch-head", "origin"} + args := []string{"fetch", "--no-tags", "--no-write-fetch-head", "origin"} for _, h := range hashes { args = append(args, h.String()) } diff --git a/cmd/entire/cli/integration_test/testenv.go b/cmd/entire/cli/integration_test/testenv.go index fc5e7b248..4f7b22170 100644 --- a/cmd/entire/cli/integration_test/testenv.go +++ b/cmd/entire/cli/integration_test/testenv.go @@ -1901,7 +1901,7 @@ func (env *TestEnv) FetchMetadataBranch(remoteURL string) { branchName := paths.MetadataBranchName refSpec := "+refs/heads/" + branchName + ":refs/heads/" + branchName - cmd := exec.CommandContext(env.T.Context(), "git", "fetch", "--no-tags", remoteURL, refSpec) + cmd := exec.CommandContext(env.T.Context(), "git", "fetch", "--no-tags", "--filter=blob:none", remoteURL, refSpec) cmd.Dir = env.RepoDir cmd.Env = testutil.GitIsolatedEnv() diff --git a/cmd/entire/cli/resume.go b/cmd/entire/cli/resume.go index 78bca26bf..1bba0a264 100644 --- a/cmd/entire/cli/resume.go +++ b/cmd/entire/cli/resume.go @@ -769,7 +769,7 @@ func checkRemoteMetadata(ctx context.Context, w, errW io.Writer, checkpointID id } else { fmt.Fprintf(errW, "Checkpoint '%s' found in commit but the entire/checkpoints/v1 branch is not available locally or on the remote.\n", checkpointID) fmt.Fprintf(errW, "This can happen if the metadata branch was not pushed. Try:\n") - fmt.Fprintf(errW, " git fetch origin entire/checkpoints/v1:entire/checkpoints/v1\n") + fmt.Fprintf(errW, " git fetch --no-tags --filter=blob:none origin entire/checkpoints/v1:entire/checkpoints/v1\n") } return nil } diff --git a/cmd/entire/cli/strategy/checkpoint_remote.go b/cmd/entire/cli/strategy/checkpoint_remote.go index 15f113da8..6b108da71 100644 --- a/cmd/entire/cli/strategy/checkpoint_remote.go +++ b/cmd/entire/cli/strategy/checkpoint_remote.go @@ -368,7 +368,7 @@ func FetchMetadataBranch(ctx context.Context, remoteURL string) error { tmpRef := "refs/entire-fetch-tmp/" + branchName refSpec := fmt.Sprintf("+refs/heads/%s:%s", branchName, tmpRef) - fetchCmd := CheckpointGitCommand(fetchCtx, remoteURL, "fetch", "--no-tags", remoteURL, refSpec) + fetchCmd := CheckpointGitCommand(fetchCtx, remoteURL, "fetch", "--no-tags", "--filter=blob:none", remoteURL, refSpec) // Merge GIT_TERMINAL_PROMPT=0 into whatever env CheckpointGitCommand set. // If the token was injected, cmd.Env is already populated; otherwise use os.Environ(). if fetchCmd.Env == nil { @@ -414,7 +414,7 @@ func FetchV2MainFromURL(ctx context.Context, remoteURL string) error { defer cancel() refSpec := fmt.Sprintf("+%s:%s", paths.V2MainRefName, paths.V2MainRefName) - fetchCmd := CheckpointGitCommand(fetchCtx, remoteURL, "fetch", "--no-tags", remoteURL, refSpec) + fetchCmd := CheckpointGitCommand(fetchCtx, remoteURL, "fetch", "--no-tags", "--filter=blob:none", remoteURL, refSpec) if fetchCmd.Env == nil { fetchCmd.Env = os.Environ() } diff --git a/cmd/entire/cli/strategy/checkpoint_token_test.go b/cmd/entire/cli/strategy/checkpoint_token_test.go index 2fbfcfb8f..cb544d9dc 100644 --- a/cmd/entire/cli/strategy/checkpoint_token_test.go +++ b/cmd/entire/cli/strategy/checkpoint_token_test.go @@ -359,7 +359,7 @@ func TestCheckpointToken_GIT_TERMINAL_PROMPT_Coexistence(t *testing.T) { t.Setenv(CheckpointTokenEnvVar, "coexist-token") cmd := CheckpointGitCommand(context.Background(), "https://github.com/org/repo.git", - "fetch", "--no-tags", "https://github.com/org/repo.git", "refs/heads/main") + "fetch", "--no-tags", "--filter=blob:none", "https://github.com/org/repo.git", "refs/heads/main") require.NotNil(t, cmd.Env) // Simulate what fetchMetadataBranchIfMissing does: append GIT_TERMINAL_PROMPT diff --git a/cmd/entire/cli/strategy/push_common.go b/cmd/entire/cli/strategy/push_common.go index f370b6dc0..c304ad09e 100644 --- a/cmd/entire/cli/strategy/push_common.go +++ b/cmd/entire/cli/strategy/push_common.go @@ -159,7 +159,7 @@ func fetchAndRebaseSessionsCommon(ctx context.Context, target, branchName string // Use --filter=blob:none for a partial fetch that downloads only commits // and trees, skipping blobs. The merge only needs the tree structure to // combine entries; blobs are already local or fetched on demand. - fetchCmd := CheckpointGitCommand(ctx, target, "fetch", "--filter=blob:none", target, refSpec) + fetchCmd := CheckpointGitCommand(ctx, target, "fetch", "--no-tags", "--filter=blob:none", target, refSpec) if output, err := fetchCmd.CombinedOutput(); err != nil { return fmt.Errorf("fetch failed: %s", output) } diff --git a/cmd/entire/cli/strategy/push_v2.go b/cmd/entire/cli/strategy/push_v2.go index 5805d6306..3f5460b0b 100644 --- a/cmd/entire/cli/strategy/push_v2.go +++ b/cmd/entire/cli/strategy/push_v2.go @@ -117,7 +117,10 @@ func fetchAndMergeRef(ctx context.Context, target string, refName plumbing.Refer tmpRefName := plumbing.ReferenceName("refs/entire-fetch-tmp/" + tmpRefSuffix) refSpec := fmt.Sprintf("+%s:%s", refName, tmpRefName) - fetchCmd := CheckpointGitCommand(ctx, target, "fetch", target, refSpec) + // Use --filter=blob:none for a partial fetch that downloads only commits + // and trees, skipping blobs. The merge only needs the tree structure to + // combine entries; blobs are already local or fetched on demand. + fetchCmd := CheckpointGitCommand(ctx, target, "fetch", "--no-tags", "--filter=blob:none", target, refSpec) fetchCmd.Env = append(fetchCmd.Env, "GIT_TERMINAL_PROMPT=0") if output, err := fetchCmd.CombinedOutput(); err != nil { return fmt.Errorf("fetch failed: %s", output) @@ -244,7 +247,7 @@ func handleRotationConflict(ctx context.Context, target string, repo *git.Reposi // Fetch the latest archived generation archiveTmpRef := plumbing.ReferenceName("refs/entire-fetch-tmp/archive-" + latestArchive) archiveRefSpec := fmt.Sprintf("+%s:%s", archiveRefName, archiveTmpRef) - fetchCmd := CheckpointGitCommand(ctx, target, "fetch", target, archiveRefSpec) + fetchCmd := CheckpointGitCommand(ctx, target, "fetch", "--no-tags", "--filter=blob:none", target, archiveRefSpec) fetchCmd.Env = append(fetchCmd.Env, "GIT_TERMINAL_PROMPT=0") if output, fetchErr := fetchCmd.CombinedOutput(); fetchErr != nil { return fmt.Errorf("fetch archived generation failed: %s", output) From 73bea7be857cfcf0363d14060d8c801f37eecfce Mon Sep 17 00:00:00 2001 From: Paulo Gomes Date: Thu, 9 Apr 2026 11:35:23 +0100 Subject: [PATCH 3/3] Remove partial clone optimisation from user message Signed-off-by: Paulo Gomes --- cmd/entire/cli/resume.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/entire/cli/resume.go b/cmd/entire/cli/resume.go index 1bba0a264..78bca26bf 100644 --- a/cmd/entire/cli/resume.go +++ b/cmd/entire/cli/resume.go @@ -769,7 +769,7 @@ func checkRemoteMetadata(ctx context.Context, w, errW io.Writer, checkpointID id } else { fmt.Fprintf(errW, "Checkpoint '%s' found in commit but the entire/checkpoints/v1 branch is not available locally or on the remote.\n", checkpointID) fmt.Fprintf(errW, "This can happen if the metadata branch was not pushed. Try:\n") - fmt.Fprintf(errW, " git fetch --no-tags --filter=blob:none origin entire/checkpoints/v1:entire/checkpoints/v1\n") + fmt.Fprintf(errW, " git fetch origin entire/checkpoints/v1:entire/checkpoints/v1\n") } return nil }