Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,5 @@ docs/plans
docs/reviews
docs/superpowers
tmp/
.tmp/
.superpowers/
28 changes: 20 additions & 8 deletions cmd/entire/cli/checkpoint/checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ package checkpoint

import (
"context"
"encoding/json"
"errors"
"time"

Expand Down Expand Up @@ -282,6 +283,12 @@ type WriteCommittedOptions struct {
// comparing checkpoint tree (agent work) to committed tree (may include human edits)
InitialAttribution *InitialAttribution

// PromptAttributionsJSON is the raw PromptAttributions data, JSON-encoded.
// Persisted for diagnostic purposes — shows exactly which prompt recorded
// which "user" lines, enabling root cause analysis of attribution bugs.
// Uses json.RawMessage to avoid importing session package.
PromptAttributionsJSON json.RawMessage

// Summary is an optional AI-generated summary for this checkpoint.
// This field may be nil when:
// - summarization is disabled in settings
Expand Down Expand Up @@ -411,6 +418,10 @@ type CommittedMetadata struct {

// InitialAttribution is line-level attribution calculated at commit time
InitialAttribution *InitialAttribution `json:"initial_attribution,omitempty"`

// PromptAttributions is the raw per-prompt attribution data used to compute InitialAttribution.
// Diagnostic field — shows which prompt recorded which "user" lines.
PromptAttributions json.RawMessage `json:"prompt_attributions,omitempty"`
}

// GetTranscriptStart returns the transcript line offset at which this checkpoint's data begins.
Expand Down Expand Up @@ -452,14 +463,15 @@ type SessionFilePaths struct {
//
//nolint:revive // Named CheckpointSummary to avoid conflict with existing Summary struct
type CheckpointSummary struct {
CLIVersion string `json:"cli_version,omitempty"`
CheckpointID id.CheckpointID `json:"checkpoint_id"`
Strategy string `json:"strategy"`
Branch string `json:"branch,omitempty"`
CheckpointsCount int `json:"checkpoints_count"`
FilesTouched []string `json:"files_touched"`
Sessions []SessionFilePaths `json:"sessions"`
TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"`
CLIVersion string `json:"cli_version,omitempty"`
CheckpointID id.CheckpointID `json:"checkpoint_id"`
Strategy string `json:"strategy"`
Branch string `json:"branch,omitempty"`
CheckpointsCount int `json:"checkpoints_count"`
FilesTouched []string `json:"files_touched"`
Sessions []SessionFilePaths `json:"sessions"`
TokenUsage *agent.TokenUsage `json:"token_usage,omitempty"`
CombinedAttribution *InitialAttribution `json:"combined_attribution,omitempty"`
}

// SessionMetrics contains hook-provided session metrics from agents that report
Expand Down
135 changes: 127 additions & 8 deletions cmd/entire/cli/checkpoint/committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ func (s *GitStore) writeSessionToSubdirectory(ctx context.Context, opts WriteCom
TokenUsage: opts.TokenUsage,
SessionMetrics: opts.SessionMetrics,
InitialAttribution: opts.InitialAttribution,
PromptAttributions: opts.PromptAttributionsJSON,
Summary: redactSummary(opts.Summary),
CLIVersion: versioninfo.Version,
}
Expand Down Expand Up @@ -414,15 +415,25 @@ func (s *GitStore) writeCheckpointSummary(opts WriteCommittedOptions, basePath s
return fmt.Errorf("failed to aggregate session stats: %w", err)
}

var combinedAttribution *InitialAttribution
rootMetadataPath := basePath + paths.MetadataFileName
if entry, exists := entries[rootMetadataPath]; exists {
existingSummary, readErr := s.readSummaryFromBlob(entry.Hash)
if readErr == nil {
combinedAttribution = existingSummary.CombinedAttribution
}
}

summary := CheckpointSummary{
CheckpointID: opts.CheckpointID,
CLIVersion: versioninfo.Version,
Strategy: opts.Strategy,
Branch: opts.Branch,
CheckpointsCount: checkpointsCount,
FilesTouched: filesTouched,
Sessions: sessions,
TokenUsage: tokenUsage,
CheckpointID: opts.CheckpointID,
CLIVersion: versioninfo.Version,
Strategy: opts.Strategy,
Branch: opts.Branch,
CheckpointsCount: checkpointsCount,
FilesTouched: filesTouched,
Sessions: sessions,
TokenUsage: tokenUsage,
CombinedAttribution: combinedAttribution,
}

metadataJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ")
Expand All @@ -441,6 +452,76 @@ func (s *GitStore) writeCheckpointSummary(opts WriteCommittedOptions, basePath s
return nil
}

// UpdateCheckpointSummary updates root-level checkpoint metadata fields that depend
// on the full set of sessions already written to the checkpoint.
func (s *GitStore) UpdateCheckpointSummary(ctx context.Context, checkpointID id.CheckpointID, combinedAttribution *InitialAttribution) error {
if err := ctx.Err(); err != nil {
return err //nolint:wrapcheck // Propagating context cancellation
}

if err := s.ensureSessionsBranch(); err != nil {
return fmt.Errorf("failed to ensure sessions branch: %w", err)
}

parentHash, rootTreeHash, err := s.getSessionsBranchRef()
if err != nil {
return err
}

basePath := checkpointID.Path() + "/"
checkpointPath := checkpointID.Path()
entries, err := s.flattenCheckpointEntries(rootTreeHash, checkpointPath)
if err != nil {
return err
}

rootMetadataPath := basePath + paths.MetadataFileName
entry, exists := entries[rootMetadataPath]
if !exists {
return ErrCheckpointNotFound
}

summary, err := s.readSummaryFromBlob(entry.Hash)
if err != nil {
return fmt.Errorf("failed to read checkpoint summary: %w", err)
}
summary.CombinedAttribution = combinedAttribution

metadataJSON, err := jsonutil.MarshalIndentWithNewline(summary, "", " ")
if err != nil {
return fmt.Errorf("failed to marshal checkpoint summary: %w", err)
}
metadataHash, err := CreateBlobFromContent(s.repo, metadataJSON)
if err != nil {
return fmt.Errorf("failed to create checkpoint summary blob: %w", err)
}
entries[rootMetadataPath] = object.TreeEntry{
Name: rootMetadataPath,
Mode: filemode.Regular,
Hash: metadataHash,
}

newTreeHash, err := s.spliceCheckpointSubtree(rootTreeHash, checkpointID, basePath, entries)
if err != nil {
return err
}

authorName, authorEmail := GetGitAuthorFromRepo(s.repo)
commitMsg := fmt.Sprintf("Update checkpoint summary for %s", checkpointID)
newCommitHash, err := s.createCommit(newTreeHash, parentHash, commitMsg, authorName, authorEmail)
if err != nil {
return err
}

refName := plumbing.NewBranchReferenceName(paths.MetadataBranchName)
newRef := plumbing.NewHashReference(refName, newCommitHash)
if err := s.repo.Storer.SetReference(newRef); err != nil {
return fmt.Errorf("failed to set branch reference: %w", err)
}

return nil
}

// findSessionIndex returns the index of an existing session with the given ID,
// or the next available index if not found. This prevents duplicate session entries.
func (s *GitStore) findSessionIndex(ctx context.Context, basePath string, existingSummary *CheckpointSummary, entries map[string]object.TreeEntry, sessionID string) int {
Expand Down Expand Up @@ -773,6 +854,44 @@ func (s *GitStore) ReadCommitted(ctx context.Context, checkpointID id.Checkpoint
return &summary, nil
}

// ReadSessionMetadata reads only the metadata.json for a specific session within a checkpoint.
// This is a lightweight read that avoids fetching transcript/prompt blobs.
// sessionIndex is 0-based.
func (s *GitStore) ReadSessionMetadata(ctx context.Context, checkpointID id.CheckpointID, sessionIndex int) (*CommittedMetadata, error) {
if err := ctx.Err(); err != nil {
return nil, err //nolint:wrapcheck // Propagating context cancellation
}

ft, err := s.getFetchingTree(ctx)
if err != nil {
return nil, ErrCheckpointNotFound
}

checkpointPath := checkpointID.Path()
sessionPath := fmt.Sprintf("%s/%d", checkpointPath, sessionIndex)
sessionTree, err := ft.Tree(sessionPath)
if err != nil {
return nil, fmt.Errorf("session %d not found: %w", sessionIndex, err)
}

metadataFile, err := sessionTree.File(paths.MetadataFileName)
if err != nil {
return nil, fmt.Errorf("metadata.json not found for session %d: %w", sessionIndex, err)
}

content, err := metadataFile.Contents()
if err != nil {
return nil, fmt.Errorf("failed to read session metadata: %w", err)
}

var metadata CommittedMetadata
if err := json.Unmarshal([]byte(content), &metadata); err != nil {
return nil, fmt.Errorf("failed to parse session metadata: %w", err)
}

return &metadata, nil
}

// ReadSessionContent reads the actual content for a specific session within a checkpoint.
// sessionIndex is 0-based (0 for first session, 1 for second, etc.).
// Returns the session's metadata, transcript, prompts, and context.
Expand Down
1 change: 1 addition & 0 deletions cmd/entire/cli/checkpoint/v2_committed.go
Original file line number Diff line number Diff line change
Expand Up @@ -385,6 +385,7 @@ func (s *V2GitStore) writeMainSessionToSubdirectory(opts WriteCommittedOptions,
TokenUsage: opts.TokenUsage,
SessionMetrics: opts.SessionMetrics,
InitialAttribution: opts.InitialAttribution,
PromptAttributions: opts.PromptAttributionsJSON,
Summary: redactSummary(opts.Summary),
CLIVersion: versioninfo.Version,
}
Expand Down
61 changes: 57 additions & 4 deletions cmd/entire/cli/strategy/manual_commit_attribution.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/entireio/cli/cmd/entire/cli/checkpoint"
"github.com/entireio/cli/cmd/entire/cli/gitops"
"github.com/entireio/cli/cmd/entire/cli/logging"
"github.com/entireio/cli/cmd/entire/cli/paths"
"github.com/go-git/go-git/v6/plumbing/object"
"github.com/sergi/go-diff/diffmatchpatch"
)
Expand Down Expand Up @@ -179,6 +180,7 @@ type AttributionParams struct {
ParentCommitHash string // HEAD's first parent hash (preferred diff base for non-agent files)
AttributionBaseCommit string // Session base commit hash (fallback for non-agent file detection)
HeadCommitHash string // HEAD commit hash for git diff-tree
AllAgentFiles map[string]struct{} // Files touched by ALL agent sessions (cross-session exclusion)
}

// CalculateAttributionWithAccumulated computes final attribution using accumulated prompt data.
Expand Down Expand Up @@ -221,11 +223,19 @@ func CalculateAttributionWithAccumulated(ctx context.Context, p AttributionParam
// Phase 4: Classify accumulated edits as agent vs non-agent
classified := classifyAccumulatedEdits(accum, p.FilesTouched, nonAgent.committedNonAgentSet)

// Phase 4b: Compute baseline (PA1) contributions to subtract from human counts.
// PA1 captures pre-session worktree dirt — edits that existed before the agent started.
// These should not count as human contributions during the session.
baselineClassified := classifyBaselineEdits(accum.baselineUserAddedPerFile, p.FilesTouched, nonAgent.committedNonAgentSet)

// Phase 5: Compute derived metrics
totalAgentAdded := max(0, agentDiffs.totalAgentAndUserWorkAdded-classified.toAgentFiles)
postToNonAgentFiles := max(0, nonAgent.userEditsToNonAgentFiles-classified.toCommittedNonAgentFiles)

relevantAccumulatedUser := classified.toAgentFiles + classified.toCommittedNonAgentFiles
// Subtract baseline (PA1) from accumulated user edits to get session-only contributions
sessionAccumulatedToAgentFiles := max(0, classified.toAgentFiles-baselineClassified.toAgentFiles)
sessionAccumulatedToNonAgent := max(0, classified.toCommittedNonAgentFiles-baselineClassified.toCommittedNonAgentFiles)
relevantAccumulatedUser := sessionAccumulatedToAgentFiles + sessionAccumulatedToNonAgent
totalUserAdded := relevantAccumulatedUser + agentDiffs.postCheckpointUserAdded + postToNonAgentFiles
// Use per-file filtered removals (symmetric with totalUserAdded) to avoid
// double-counting non-agent removals that also appear in nonAgent.userRemovedFromNonAgentFiles.
Expand Down Expand Up @@ -277,13 +287,20 @@ type accumulatedEdits struct {
userRemoved int
addedPerFile map[string]int
removedPerFile map[string]int
// baseline tracks PA1 (CheckpointNumber <= 1) edits separately.
// PA1 captures pre-session worktree dirt that existed before the agent started,
// so it should be excluded from human contribution counts.
baselineUserRemoved int
baselineUserAddedPerFile map[string]int
}

// accumulatePromptEdits sums user additions and removals from all prompt attributions.
// It also tracks baseline (PA1) edits separately for later exclusion.
func accumulatePromptEdits(promptAttributions []PromptAttribution) accumulatedEdits {
result := accumulatedEdits{
addedPerFile: make(map[string]int),
removedPerFile: make(map[string]int),
addedPerFile: make(map[string]int),
removedPerFile: make(map[string]int),
baselineUserAddedPerFile: make(map[string]int),
}
for _, pa := range promptAttributions {
result.userAdded += pa.UserLinesAdded
Expand All @@ -294,6 +311,13 @@ func accumulatePromptEdits(promptAttributions []PromptAttribution) accumulatedEd
for filePath, removed := range pa.UserRemovedPerFile {
result.removedPerFile[filePath] += removed
}
// Track baseline (PA1) separately: pre-session dirt to exclude
if pa.CheckpointNumber <= 1 {
result.baselineUserRemoved += pa.UserLinesRemoved
for filePath, added := range pa.UserAddedPerFile {
result.baselineUserAddedPerFile[filePath] += added
}
}
}
return result
}
Expand Down Expand Up @@ -343,6 +367,7 @@ type nonAgentFileDiffs struct {
// diffNonAgentFiles enumerates files changed in the commit that weren't touched by the agent,
// and computes their user additions and removals.
// Prefers parentCommitHash→headCommitHash so only THIS commit's files count.
// Uses isAgentOrMetadataFile to skip files from other agent sessions.
func diffNonAgentFiles(ctx context.Context, p AttributionParams) (nonAgentFileDiffs, error) {
diffBaseCommit := p.ParentCommitHash
if diffBaseCommit == "" {
Expand All @@ -369,7 +394,7 @@ func diffNonAgentFiles(ctx context.Context, p AttributionParams) (nonAgentFileDi
committedNonAgentSet: make(map[string]struct{}, len(allChangedFiles)),
}
for _, filePath := range allChangedFiles {
if slices.Contains(p.FilesTouched, filePath) {
if isAgentOrMetadataFile(filePath, p.FilesTouched, p.AllAgentFiles) {
continue
}
result.committedNonAgentSet[filePath] = struct{}{}
Expand Down Expand Up @@ -412,6 +437,20 @@ func classifyAccumulatedEdits(accum accumulatedEdits, filesTouched []string, com
return result
}

// classifyBaselineEdits separates baseline (PA1) user additions into agent-file vs non-agent-file
// buckets. This is used to subtract pre-session dirt from human contribution counts.
func classifyBaselineEdits(baselineAddedPerFile map[string]int, filesTouched []string, committedNonAgentSet map[string]struct{}) classifiedEdits {
var result classifiedEdits
for filePath, added := range baselineAddedPerFile {
if slices.Contains(filesTouched, filePath) {
result.toAgentFiles += added
} else if _, ok := committedNonAgentSet[filePath]; ok {
result.toCommittedNonAgentFiles += added
}
}
return result
}

// computeAgentDeletions calculates agent-removed lines that actually remain deleted in the commit.
// Per-file: takes min(base→shadow removed, base→head removed) to avoid over-reporting when
// the user re-adds lines the agent deleted. Subtracts accumulated user removals to agent files.
Expand Down Expand Up @@ -517,3 +556,17 @@ func CalculatePromptAttribution(

return result
}

// isAgentOrMetadataFile returns true if the file was touched by any agent session
// (this session or another) or is CLI metadata that should be excluded from attribution.
func isAgentOrMetadataFile(filePath string, filesTouched []string, allAgentFiles map[string]struct{}) bool {
if slices.Contains(filesTouched, filePath) {
return true
}
if allAgentFiles != nil {
if _, ok := allAgentFiles[filePath]; ok {
return true
}
}
return strings.HasPrefix(filePath, ".entire/") || strings.HasPrefix(filePath, paths.EntireMetadataDir+"/")
}
Loading
Loading