diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index a680cc4..ebce281 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "github", - "version": "1.3.1", - "description": "GitHub CI/CD automation plugin for auto-detecting, analyzing, and fixing CI/CD failures on any branch", + "version": "1.4.0", + "description": "GitHub CI/CD automation plugin with autonomous fix loops, PR workflows, and code review", "author": { "name": "Ladislav Martincik", "url": "https://github.com/iamladi" @@ -24,7 +24,9 @@ "review-comments", "autonomous", "ai-review", - "confidence-scoring" + "confidence-scoring", + "ci-loop", + "auto-fix" ], "commands": { "fix-ci": { @@ -46,6 +48,10 @@ }, "agents": [ "./agents/ci-log-analyzer.md", - "./agents/ci-error-fixer.md" + "./agents/ci-error-fixer.md", + "./agents/ci-monitor.md" + ], + "skills": [ + "./skills/ci-fix-loop/SKILL.md" ] } diff --git a/CHANGELOG.md b/CHANGELOG.md index a5276b3..c88287e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,44 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [1.4.0] - 2025-12-13 + +### Added + +- **Autonomous CI Fix Loop** (`/fix-ci --loop` or `/fix-ci --auto`) + - Runs up to 10 fix-commit-push-wait cycles automatically + - Fully autonomous operation with no user prompts + - Background CI monitoring (polls every 60 seconds) + - Detailed history report on completion + - Early abort if same errors appear twice consecutively + +- **New `ci-monitor` agent** + - Lightweight haiku-based CI status poller + - Token-efficient (~50 tokens per poll vs ~500 for sonnet) + - Runs in background to free terminal + - Returns: SUCCESS, FAILURE, CANCELLED, or TIMEOUT + +- **New `ci-fix-loop` skill** + - Orchestrates the full autonomous loop + - State tracking: attempt count, error history, run IDs + - Smart progress detection to avoid infinite loops + - Comprehensive final reporting + +### Safety Features + +- Blocks on main/master branches (must use feature branch) +- Stashes uncommitted changes before starting +- Maximum 30 minute wait per CI run +- Maximum 10 retry attempts +- Detailed commit history for easy revert + +### Token Efficiency + +- ~6,000 tokens per iteration (vs ~15,000 without optimization) +- Haiku model for CI polling saves ~90% on wait cycles +- No context accumulation between iterations +- Background execution frees terminal for other work + ## [1.3.1] - 2024-11-16 ### Fixed diff --git a/agents/ci-monitor.md b/agents/ci-monitor.md new file mode 100644 index 0000000..68f358c --- /dev/null +++ b/agents/ci-monitor.md @@ -0,0 +1,94 @@ +--- +name: ci-monitor +description: Lightweight CI status poller for background monitoring. Uses haiku model for token efficiency. +tools: Bash +model: haiku +--- + +# CI Monitor Agent + +Minimal CI status checker. Poll GitHub Actions and report status. + +## Input + +- `branch`: Branch name to monitor +- `run_id`: Optional specific run ID (if known) + +## Behavior + +1. Check status: +```bash +gh run list --branch "${BRANCH}" --limit 1 --json databaseId,status,conclusion,createdAt +``` + +2. Parse and return ONE of: + - `QUEUED` - Run queued, not started + - `IN_PROGRESS` - Run executing + - `SUCCESS` - Completed successfully + - `FAILURE` - Completed with failures + - `CANCELLED` - Run cancelled + - `TIMEOUT` - Exceeded max wait (30 min) + +3. If `QUEUED` or `IN_PROGRESS`: wait 60 seconds, poll again +4. If terminal state: return immediately with result + +## Polling Loop + +```bash +MAX_WAIT=1800 # 30 minutes +START=$(date +%s) +POLL_INTERVAL=60 + +while true; do + RESULT=$(gh run list --branch "$BRANCH" --limit 1 --json databaseId,status,conclusion 2>/dev/null) + + STATUS=$(echo "$RESULT" | jq -r '.[0].status // "unknown"') + CONCLUSION=$(echo "$RESULT" | jq -r '.[0].conclusion // "null"') + RUN_ID=$(echo "$RESULT" | jq -r '.[0].databaseId // "unknown"') + + case "$STATUS" in + completed) + case "$CONCLUSION" in + success) echo "SUCCESS|$RUN_ID"; exit 0 ;; + failure) echo "FAILURE|$RUN_ID"; exit 1 ;; + cancelled) echo "CANCELLED|$RUN_ID"; exit 2 ;; + *) echo "FAILURE|$RUN_ID"; exit 1 ;; + esac + ;; + queued|waiting|pending) + # Still waiting to start + ;; + in_progress) + # Running + ;; + *) + # Unknown status, keep polling + ;; + esac + + ELAPSED=$(($(date +%s) - START)) + if [ $ELAPSED -gt $MAX_WAIT ]; then + echo "TIMEOUT|$RUN_ID" + exit 3 + fi + + sleep $POLL_INTERVAL +done +``` + +## Output Format + +Single line: `STATUS|RUN_ID` + +Examples: +- `SUCCESS|12345678` +- `FAILURE|12345678` +- `CANCELLED|12345678` +- `TIMEOUT|12345678` + +## Important + +- Be extremely concise +- No explanations - just return status +- Max poll time: 30 minutes +- Poll interval: 60 seconds diff --git a/commands/fix-ci.md b/commands/fix-ci.md index 9a68c96..79a707e 100644 --- a/commands/fix-ci.md +++ b/commands/fix-ci.md @@ -9,13 +9,56 @@ Auto-detect, analyze, and fix CI/CD failures on any branch using GitHub CLI and ## Usage ```bash -/fix-ci # Current branch -/fix-ci 123 # PR number -/fix-ci https://... # PR URL +/fix-ci # Current branch (single fix) +/fix-ci 123 # PR number (single fix) +/fix-ci https://... # PR URL (single fix) +/fix-ci --loop # Autonomous loop mode (up to 10 retries) +/fix-ci --auto # Alias for --loop +/fix-ci 123 --loop # Loop mode for specific PR ``` User provided: `$ARGUMENTS` +## Autonomous Loop Mode + +When `--loop` or `--auto` flag is present, this command runs in autonomous mode using the `ci-fix-loop` skill. + +**Detection:** +```bash +ARGS="$ARGUMENTS" +if [[ "$ARGS" == *"--loop"* ]] || [[ "$ARGS" == *"--auto"* ]]; then + # Extract PR number if provided (e.g., "123 --loop" → "123") + PR_NUM=$(echo "$ARGS" | grep -oE '^[0-9]+' || echo "") + + # Invoke ci-fix-loop skill + # The skill will handle the full autonomous loop: + # 1. Analyze CI errors + # 2. Apply fixes + # 3. Commit and push + # 4. Monitor CI in background (polling every 60s) + # 5. If CI fails, repeat (up to 10 times) + # 6. Report final status + + # IMPORTANT: Do not proceed with single-fix workflow below +fi +``` + +**Behavior:** +- Runs up to 10 fix-commit-push-wait cycles +- Fully autonomous (no user prompts) +- Background CI monitoring between iterations +- Reports detailed history when complete +- Aborts if same errors appear twice consecutively + +**Safety:** +- Will not run on main/master branch +- Stashes uncommitted changes before starting +- Maximum 30 minute wait per CI run + +--- + +When NOT in loop mode, proceed with single-fix workflow below. + ## Workflow ### Step 1: Detect Context diff --git a/skills/ci-fix-loop/SKILL.md b/skills/ci-fix-loop/SKILL.md new file mode 100644 index 0000000..b349ac8 --- /dev/null +++ b/skills/ci-fix-loop/SKILL.md @@ -0,0 +1,333 @@ +--- +name: ci-fix-loop +description: Autonomous CI fix loop with background monitoring and retry logic. Runs up to 10 fix-commit-push-wait cycles until CI passes or max retries reached. +--- + +# CI Fix Loop Skill + +Orchestrates autonomous CI repair: analyze → fix → commit → push → monitor → repeat until success. + +## When to Use + +This skill is invoked when: +- User runs `/fix-ci --loop` or `/fix-ci --auto` +- Multiple CI fix iterations are needed +- User wants hands-off CI repair + +## Configuration + +| Setting | Value | Description | +|---------|-------|-------------| +| max_attempts | 10 | Maximum fix iterations | +| poll_interval | 60 | Seconds between CI status checks | +| ci_start_timeout | 120 | Seconds to wait for CI run to start | +| ci_run_timeout | 1800 | Max seconds to wait for CI completion (30 min) | + +## Workflow + +### Phase 1: Initialize + +Get context and validate: + +```bash +BRANCH=$(git branch --show-current) +REPO=$(gh repo view --json nameWithOwner -q .nameWithOwner 2>/dev/null || echo "unknown") +``` + +**Safety checks:** + +1. Block on protected branches: +```bash +if [[ "$BRANCH" == "main" || "$BRANCH" == "master" ]]; then + echo "Cannot run autonomous fixes on $BRANCH" + echo "Create a feature branch: git checkout -b fix/ci-errors" + # STOP - do not proceed +fi +``` + +2. Handle uncommitted changes: +```bash +if [[ -n $(git status --porcelain) ]]; then + echo "Stashing uncommitted changes..." + git stash push -m "pre-ci-fix-loop-$(date +%Y%m%d_%H%M%S)" +fi +``` + +Initialize state: +``` +attempt = 1 +max_attempts = 10 +last_errors = [] +history = [] +started_at = now +``` + +### Phase 2: Fix Loop + +For each attempt from 1 to 10: + +#### Step 2.1: Display Progress + +``` +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +CI Fix Loop - Attempt ${attempt}/${max_attempts} +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +Branch: ${branch} +Repository: ${repo} +``` + +#### Step 2.2: Fetch CI Logs + +Get most recent failed run: +```bash +RUN_ID=$(gh run list --branch "$BRANCH" --limit 5 --json databaseId,conclusion \ + --jq '[.[] | select(.conclusion == "failure")][0].databaseId') + +if [ -z "$RUN_ID" ]; then + echo "No failed runs found - checking if CI is passing..." + # May already be fixed, verify +fi +``` + +Fetch logs for failed jobs: +```bash +FAILED_JOBS=$(gh run view $RUN_ID --json jobs --jq '.jobs[] | select(.conclusion == "failure") | .databaseId') + +for JOB_ID in $FAILED_JOBS; do + gh api repos/${REPO}/actions/jobs/${JOB_ID}/logs > /tmp/ci-logs-${JOB_ID}.txt 2>/dev/null || true +done +``` + +#### Step 2.3: Analyze Errors + +Invoke the `ci-log-analyzer` agent: +- Parse CI logs from /tmp/ci-logs-*.txt +- Extract structured error list with type, file, line, message +- Returns JSON with errors categorized by type (lint/test/type/build) + +#### Step 2.4: Check for Progress + +Compare current errors with previous attempt: + +``` +if current_errors == last_errors AND attempt > 1: + # Same errors after fix attempt = likely unfixable + consecutive_same_errors += 1 + + if consecutive_same_errors >= 2: + echo "Same errors detected after 2 fix attempts - aborting" + echo "These errors may require manual intervention" + # STOP - exit loop with failure report +fi + +if current_errors is empty: + # No errors found - CI might be passing + # Skip to monitoring phase +``` + +#### Step 2.5: Apply Fixes + +Invoke the `ci-error-fixer` agent with error list: +- Applies targeted fixes based on error type +- Shows diffs for each change +- Reports fixed vs flagged-for-manual-review counts + +Track results: +``` +errors_fixed = count of successfully fixed errors +errors_flagged = count of errors needing manual review +``` + +#### Step 2.6: Commit & Push + +Stage and commit changes: +```bash +git add . + +# Create descriptive commit message +git commit -m "fix(ci): automated fix attempt ${attempt} + +Errors addressed: +- ${error_summary_list} + +Attempt ${attempt} of ${max_attempts} (ci-fix-loop)" +``` + +Push to trigger CI: +```bash +PUSH_TIME=$(date -u +%Y-%m-%dT%H:%M:%SZ) +git push origin ${BRANCH} +``` + +#### Step 2.7: Wait for CI Run to Start + +Poll until new run appears (max 2 minutes): +```bash +TIMEOUT=120 +START=$(date +%s) + +while true; do + RUN_JSON=$(gh run list --branch "$BRANCH" --limit 1 --json databaseId,status,createdAt) + CREATED=$(echo "$RUN_JSON" | jq -r '.[0].createdAt') + + # Check if this run was created after our push + if [[ "$CREATED" > "$PUSH_TIME" ]]; then + NEW_RUN_ID=$(echo "$RUN_JSON" | jq -r '.[0].databaseId') + echo "CI run started: $NEW_RUN_ID" + break + fi + + ELAPSED=$(($(date +%s) - START)) + if [ $ELAPSED -gt $TIMEOUT ]; then + echo "Warning: No CI run started after ${TIMEOUT}s" + echo "Check if workflows are enabled for this branch" + break + fi + + sleep 5 +done +``` + +#### Step 2.8: Monitor CI (Background) + +Spawn the `ci-monitor` agent with `run_in_background: true`: + +The monitor will: +- Poll `gh run list` every 60 seconds +- Return when CI reaches terminal state +- Output: `SUCCESS|RUN_ID`, `FAILURE|RUN_ID`, `CANCELLED|RUN_ID`, or `TIMEOUT|RUN_ID` + +Wait for monitor result using `TaskOutput` tool. + +#### Step 2.9: Handle Result + +Parse monitor output: +``` +case "$RESULT" in + SUCCESS*) + # CI passed! Exit loop with success + ;; + FAILURE*) + # CI still failing - continue to next attempt + ;; + CANCELLED*) + # Run was cancelled - warn and exit + echo "CI run was cancelled externally" + # EXIT with warning + ;; + TIMEOUT*) + # Exceeded 30 min wait + echo "CI run timed out after 30 minutes" + # Ask if should continue waiting or abort + ;; +esac +``` + +#### Step 2.10: Record History + +``` +history.append({ + attempt: attempt, + errors_found: len(current_errors), + errors_fixed: errors_fixed, + errors_flagged: errors_flagged, + run_id: run_id, + result: conclusion, + duration: attempt_duration +}) + +last_errors = current_errors +attempt += 1 +``` + +### Phase 3: Final Report + +``` +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ +CI Fix Loop Complete +━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ + +Result: [SUCCESS|FAILURE] after ${attempts} attempt(s) + +Summary: + Total time: ${total_duration} + Commits created: ${commit_count} + Errors fixed: ${total_errors_fixed} + +History: +``` + +For each entry in history: +``` + Attempt ${n}: Found ${errors_found} errors, fixed ${fixed} → ${result} +``` + +If FAILURE: +``` +Remaining Issues (require manual intervention): + - ${file}:${line} - ${message} + Type: ${type} + +Suggested next steps: + 1. Review errors above + 2. Check CI logs: gh run view ${last_run_id} --log-failed + 3. Fix manually and push +``` + +If SUCCESS: +``` +CI is now passing! + +Next steps: + 1. Review automated commits: git log --oneline -${commit_count} + 2. Squash if desired: git rebase -i HEAD~${commit_count} + 3. Create PR: /github:create-pr +``` + +## Error Handling + +### Network/API Failures +- Retry `gh` commands 3 times with 5s backoff +- If persistent, abort and report + +### Git Conflicts +- If push fails due to upstream changes: +``` +echo "Upstream changes detected" +echo "Pull and retry: git pull --rebase && /fix-ci --loop" +``` +- Abort loop + +### Unfixable Errors +- Track errors persisting across 2+ attempts +- Mark as "unfixable" in final report +- Continue attempting other errors + +### Timeout +- CI run timeout (30 min): report and suggest `gh run watch` +- CI start timeout (2 min): check workflow configuration + +## Safety Mechanisms + +1. **Branch protection**: Never run on main/master +2. **Max attempts**: Hard limit of 10 iterations +3. **Stash protection**: Uncommitted changes are preserved +4. **Progress detection**: Abort if same errors repeat twice +5. **Timeout limits**: 30 min max CI wait per attempt +6. **Commit tracking**: Report all commits for easy revert + +## Token Efficiency + +Estimated per iteration: +- Analysis (sonnet): ~2000 tokens +- Fix application (sonnet): ~3000 tokens +- CI monitoring (haiku): ~500 tokens +- State/reporting: ~500 tokens +- **Total: ~6000 tokens/iteration** +- **10 iterations max: ~60,000 tokens** + +Key optimizations: +- Haiku model for CI polling (10x cheaper than sonnet) +- No context accumulation between iterations +- Minimal state tracking +- Background execution frees terminal