From 3778e5ae48ef9565dc6ad9b08eacfbc9f7059b25 Mon Sep 17 00:00:00 2001 From: brady gaster Date: Tue, 30 Jun 2026 10:01:54 -0700 Subject: [PATCH] =?UTF-8?q?docs:=20comprehensive=20docs=20overhaul=20?= =?UTF-8?q?=E2=80=94=20truth,=20voice,=20and=20organization?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Delete retired docs (personal-squad, remote-control, shell, REPL blog) - Remove experimental/alpha warnings from all docs - Remove all personal squad references across 11 files - Voice pass: augmentation over replacement language (~30 files) - Remove REPL/interactive shell references (~45 files) - Reorganize navigation: Core/Advanced/Infrastructure feature tiers - Fix CI test knownSections for new nav structure - Fix broken internal links to deleted pages Closes #1194 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../001-wave-0-the-team-that-built-itself.md | 22 - .../blog/001a-the-squad-squad-problem.md | 33 +- docs/src/content/blog/001b-meet-the-squad.md | 35 - .../content/blog/001c-first-pr-amolchanov.md | 24 - .../content/blog/002-first-community-pr.md | 19 - .../content/blog/003-super-bowl-weekend.md | 19 - docs/src/content/blog/004-v020-release.md | 28 - .../content/blog/005-v030-give-it-a-brain.md | 33 - .../blog/006-first-external-deployment.md | 28 - .../content/blog/007-first-video-coverage.md | 24 - docs/src/content/blog/008-v040-release.md | 28 +- .../content/blog/009-v040-sprint-progress.md | 48 -- .../content/blog/010-v041-patch-release.md | 17 - .../011-skills-system-learning-from-work.md | 74 --- .../content/blog/012-trending-on-github.md | 32 - .../content/blog/013-the-replatform-begins.md | 27 - .../blog/014-wave-1-otel-and-aspire.md | 31 - .../blog/015-wave-2-the-repl-moment.md | 95 --- .../blog/016-wave-3-docs-that-teach.md | 28 - .../src/content/blog/017-version-alignment.md | 29 - .../blog/018-the-adapter-chronicles.md | 34 - .../content/blog/019-shaynes-remote-mode.md | 33 - docs/src/content/blog/020-docs-reborn.md | 36 - docs/src/content/blog/021-the-migration.md | 70 -- .../blog/022-welcome-to-the-new-squad.md | 273 +------- docs/src/content/blog/024-v0823-release.md | 195 +----- .../blog/026-whats-new-ado-comms-subsquads.md | 42 -- docs/src/content/blog/027-v0825-release.md | 25 - docs/src/content/blog/028-new-docs-site.md | 40 -- docs/src/content/blog/028-v090-whats-new.md | 145 +--- .../blog/029-upgrade-testing-at-scale.md | 46 +- .../src/content/blog/030-v092-whats-coming.md | 52 -- docs/src/content/blog/031-state-backends.md | 35 +- docs/src/content/blog/watch-monitor.md | 55 +- .../src/content/docs/concepts/architecture.md | 116 ++-- .../content/docs/concepts/github-workflow.md | 91 +-- .../docs/concepts/memory-and-knowledge.md | 97 --- .../content/docs/concepts/parallel-work.md | 120 +--- docs/src/content/docs/concepts/portability.md | 109 --- docs/src/content/docs/concepts/your-team.md | 115 +--- docs/src/content/docs/cookbook/recipes.md | 90 +-- .../docs/features/capability-routing.md | 126 ++-- docs/src/content/docs/features/ceremonies.md | 48 -- docs/src/content/docs/features/cleanup.md | 51 -- .../src/content/docs/features/consult-mode.md | 122 +--- .../content/docs/features/context-hygiene.md | 214 +++--- .../features/coordinator-as-agent-export.md | 69 -- .../docs/features/copilot-coding-agent.md | 90 +-- .../docs/features/copilot-mcp-trust.md | 33 - .../content/docs/features/cost-tracking.md | 152 ++--- .../docs/features/cross-squad-discover.md | 49 -- docs/src/content/docs/features/directives.md | 60 -- .../content/docs/features/distributed-mesh.md | 106 --- .../docs/features/dual-mode-deployment.md | 43 -- .../content/docs/features/error-recovery.md | 34 - .../content/docs/features/export-import.md | 57 -- .../content/docs/features/external-state.md | 200 ++---- .../content/docs/features/fleet-dispatch.md | 42 -- .../content/docs/features/github-issues.md | 57 -- .../docs/features/human-team-members.md | 49 -- .../content/docs/features/issue-templates.md | 624 ++++++++---------- .../src/content/docs/features/keda-scaling.md | 147 ++--- docs/src/content/docs/features/labels.md | 43 -- docs/src/content/docs/features/loop.md | 91 +-- docs/src/content/docs/features/marketplace.md | 35 - .../content/docs/features/mcp-frontmatter.md | 29 - docs/src/content/docs/features/mcp.md | 142 ---- docs/src/content/docs/features/memory.md | 73 -- .../content/docs/features/model-selection.md | 55 -- .../docs/features/notification-level.md | 26 - .../content/docs/features/notifications.md | 139 ---- .../docs/features/parallel-execution.md | 54 -- docs/src/content/docs/features/plugins.md | 53 -- docs/src/content/docs/features/prd-mode.md | 142 ++-- docs/src/content/docs/features/preset.md | 64 -- .../content/docs/features/project-boards.md | 50 -- docs/src/content/docs/features/ralph.md | 138 +--- .../content/docs/features/rate-limiting.md | 126 ++-- docs/src/content/docs/features/reflect.md | 29 - .../content/docs/features/remote-control.md | 326 --------- .../content/docs/features/response-modes.md | 46 -- .../docs/features/reviewer-protocol.md | 64 -- docs/src/content/docs/features/routing.md | 62 -- docs/src/content/docs/features/scratch-dir.md | 60 -- .../src/content/docs/features/self-upgrade.md | 81 --- .../docs/features/skill-security-scanner.md | 39 -- docs/src/content/docs/features/skills.md | 55 -- docs/src/content/docs/features/squad-rc.md | 124 +--- .../content/docs/features/state-backends.md | 218 ------ .../content/docs/features/storage-provider.md | 361 +++++----- docs/src/content/docs/features/team-setup.md | 36 - docs/src/content/docs/features/teams-comms.md | 37 -- .../content/docs/features/tiered-memory.md | 35 +- .../docs/features/upstream-inheritance.md | 123 ---- docs/src/content/docs/features/vscode.md | 83 +-- docs/src/content/docs/features/worktrees.md | 55 -- .../docs/get-started/choose-your-interface.md | 288 +++----- .../docs/get-started/choosing-your-path.md | 134 ++-- .../content/docs/get-started/first-session.md | 98 +-- .../docs/get-started/five-minute-start.md | 139 ++-- .../content/docs/get-started/installation.md | 88 +-- docs/src/content/docs/guide.md | 194 +----- .../docs/guide/build-autonomous-agent.md | 138 +--- .../content/docs/guide/building-extensions.md | 58 -- docs/src/content/docs/guide/extensibility.md | 43 -- docs/src/content/docs/guide/faq.md | 89 --- docs/src/content/docs/guide/personal-squad.md | 352 ---------- docs/src/content/docs/guide/sample-prompts.md | 51 -- docs/src/content/docs/guide/shell.md | 343 ---------- .../src/content/docs/guide/tips-and-tricks.md | 34 - .../content/docs/reference/api-reference.md | 121 +--- docs/src/content/docs/reference/cli.md | 163 +---- docs/src/content/docs/reference/config.md | 56 +- docs/src/content/docs/reference/glossary.md | 58 +- .../src/content/docs/reference/integration.md | 36 - docs/src/content/docs/reference/sdk.md | 174 +---- .../content/docs/reference/tools-and-hooks.md | 33 - .../docs/reference/vscode-troubleshooting.md | 40 -- .../docs/scenarios/aspire-dashboard.md | 90 --- .../docs/scenarios/ci-cd-integration.md | 76 +-- .../content/docs/scenarios/existing-repo.md | 53 -- .../docs/scenarios/issue-driven-dev.md | 59 -- docs/src/content/docs/scenarios/monorepo.md | 80 --- .../src/content/docs/scenarios/new-project.md | 53 +- .../src/content/docs/scenarios/open-source.md | 117 +--- docs/src/content/docs/scenarios/remote-qa.md | 134 ++-- docs/src/content/docs/scenarios/solo-dev.md | 60 -- .../content/docs/scenarios/team-of-humans.md | 75 --- .../docs/scenarios/team-state-storage.md | 432 ++++-------- docs/src/content/docs/tour-first-session.md | 85 +-- docs/src/content/docs/whatsnew.md | 52 +- docs/src/navigation.ts | 129 ++-- docs/tests/build-output.test.mjs | 437 ++++++------ 133 files changed, 1715 insertions(+), 10907 deletions(-) delete mode 100644 docs/src/content/blog/015-wave-2-the-repl-moment.md delete mode 100644 docs/src/content/docs/features/remote-control.md delete mode 100644 docs/src/content/docs/guide/personal-squad.md delete mode 100644 docs/src/content/docs/guide/shell.md diff --git a/docs/src/content/blog/001-wave-0-the-team-that-built-itself.md b/docs/src/content/blog/001-wave-0-the-team-that-built-itself.md index ac0e4f918..9f7f87dff 100644 --- a/docs/src/content/blog/001-wave-0-the-team-that-built-itself.md +++ b/docs/src/content/blog/001-wave-0-the-team-that-built-itself.md @@ -7,16 +7,9 @@ tags: [squad, wave-0, team-formation, self-repair, silent-success-bug, origin-st status: published hero: "We asked Squad to build itself a team. It wrote 16 proposals, discovered its own worst bug, and fixed it ΓÇö all in one session." --- - # Wave 0: The Team That Built Itself - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _We asked Squad to build itself a team. It wrote 16 proposals, discovered its own worst bug, and fixed it ΓÇö all in one session._ - ## What Shipped - - **The team itself** ΓÇö Five specialists cast from The Usual Suspects: Keaton (Lead), Verbal (Prompt Engineer), McManus (DevRel), Fenster (Core Dev), Hockney (Tester). Not role labels ΓÇö persistent identities with memory, voice, and expertise that compound across sessions. _(Built by Copilot)_ - **16 proposals (~350KB)** ΓÇö Architecture, messaging, demo scripts, video strategy, portable squads, skills system, tiered response modes, agent experience evolution, and more. All written, cross-referenced, and reviewed in a single session. _(Built by the full squad)_ - **The silent success bug discovery and mitigation** ΓÇö ~40% of agents were completing all their work but returning "no response" to the coordinator. The agents that did the most work were the ones that failed. Three zero-risk mitigations shipped same-session. _(Discovered by Kujan, mitigated in squad.agent.md)_ @@ -24,23 +17,14 @@ hero: "We asked Squad to build itself a team. It wrote 16 proposals, discovered - **Upgrade subcommand** ΓÇö `npx @bradygaster/create-squad upgrade` overwrites Squad-owned files, never touches your team state. The delivery mechanism for bug fixes to existing users. _(Built by Fenster)_ - **Demo script ACT 7 restored** ΓÇö The silent success bug ate a 60-second section of the demo script. The KEY THEMES reference table referenced content that didn't exist. Found it, reconstructed it, shipped it. _(Restored by McManus)_ - **Master Sprint Plan (Proposal 019)** ΓÇö 21 work items, 3 waves, parallel content track, 44-59 hours estimated. One document the entire team executes from. No ambiguity, no redundancy. _(Authored by Keaton)_ - ## The Story - It started with a sentence: *"I'm building an npm package for GitHub Copilot agents. Set up the team."* - Brady typed that into Copilot, selected Squad, and hit enter. What happened next wasn't planned. The coordinator analyzed the codebase ΓÇö `index.js`, `package.json`, the templates, the `.github/agents/` directory ΓÇö and proposed a team. Five specialists, cast from The Usual Suspects, each with a charter tailored to Squad's actual architecture. - Then they started working. In parallel. Keaton set priorities. Verbal designed the prompt engineering strategy. McManus audited the README and found six gaps. Fenster dug into `index.js` and proposed error handling. Hockney pointed out there were zero tests and wrote twelve. Each agent read the shared `decisions.md`, wrote their proposals, and cross-referenced each other's work. Sixteen proposals in one session. ~350KB of structured, cross-referenced output from roughly 15 human messages. - And then the bug. Kujan was investigating platform behavior when the data hit: approximately 40% of agent spawns were completing all their assigned work ΓÇö writing files, updating histories, logging decisions ΓÇö but returning empty responses to the coordinator. The coordinator logged "no response" and moved on. The work was done. The coordinator didn't know. - Here's the twist that makes the story: **success caused the failure.** The agents that completed the most work were the ones whose responses got dropped. Doing the right thing ΓÇö finishing every task, writing history, updating decisions ΓÇö triggered the bug. The silent success bug wasn't a failure of the agents. It was proof that they worked. - The team self-diagnosed. Kujan identified the pattern. Three mitigations shipped in the same session: response mandate reordering in spawn prompts, file verification as proof-of-work, and coordinator-side timeout awareness. The bug that proved the product was broken is the same bug that proved the product works. - ## By the Numbers - | Metric | Value | |--------|-------| | Proposals written | 16 | @@ -52,17 +36,11 @@ The team self-diagnosed. Kujan identified the pattern. Three mitigations shipped | Silent success rate (pre-mitigation) | ~40% | | Mitigations shipped same-session | 3 | | Independent reviewers who converged on Sprint 0 priority | 3/3 | - ## What We Learned - - **The self-repair loop is the product.** Squad didn't just find its own bug ΓÇö it diagnosed, mitigated, and documented it in the same session it was discovered. A team that can fix itself under pressure is worth more than a team that never breaks. - **Proposals beat code for alignment.** Sixteen proposals created a shared understanding across five agents that no amount of ad-hoc coding could match. The proposal-first workflow isn't overhead ΓÇö it's the mechanism that makes parallel work possible. - **Reference tables are checksums.** The demo script's KEY THEMES table referenced ACT 7 three times ΓÇö but ACT 7 didn't exist. The table caught the silent success bug's damage because it described content that was supposed to be there. Self-documenting formats catch silent failures. - ## What's Next - Wave 1 is all about trust. Error handling in `index.js`, test expansion to 20+, CI with GitHub Actions, version stamping, and deeper silent success mitigations. Nothing else ships until the foundation is bulletproof. Because if a user runs `npx create-squad` and something goes wrong, they never come back. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it ΓåÆ](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/001a-the-squad-squad-problem.md b/docs/src/content/blog/001a-the-squad-squad-problem.md index e1487379e..9a436ce39 100644 --- a/docs/src/content/blog/001a-the-squad-squad-problem.md +++ b/docs/src/content/blog/001a-the-squad-squad-problem.md @@ -7,62 +7,37 @@ tags: [squad, wave-1, distribution, branch-strategy, dogfooding, kobayashi] status: published hero: "Squad is built by a Squad. When users install the product, they shouldn't get the team that made it." --- - # The Squad Squad Problem - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Squad is built by a Squad. When users install the product, they shouldn't get the team that made it._ - ## What Shipped - -- **Squad Squad isolation analysis** — Kobayashi (brand new hire, first task) empirically verified that `package.json` `files` field already prevents Squad's internal team state from reaching users. Fifteen product files ship. Zero `.squad/` files leak. _(Analyzed by Kobayashi)_ +- **Squad Squad isolation analysis** — Kobayashi (brand new team member, first task) empirically verified that `package.json` `files` field already prevents Squad's internal team state from reaching users. Fifteen product files ship. Zero `.squad/` files leak. _(Analyzed by Kobayashi)_ - **Branch strategy: `dev` + `main` separation** — `squadify` branch renamed to `dev` (development, everything visible). `main` becomes product-only — no `.squad/`, no `docs/`, no `test/`, no workflows. Users always get a clean tree. _(Designed by Kobayashi)_ - **Release workflow (`.github/workflows/release.yml`)** — Filtered-copy pipeline strips Squad Squad files on every release. Not a git merge — a deliberate, auditable copy of only what users need. _(Built by Kobayashi)_ - **`.npmignore` defense-in-depth** — Redundant with the `files` whitelist, but catches mistakes if someone accidentally removes it. Belt and suspenders. _(Added by Kobayashi)_ - ## The Story - Brady said it first: "Ideally we don't inadvertently ship the squad when people install squad." - -That sentence sounds like a tongue-twister. It's actually a real product problem. Squad's own AI team — Keaton, Verbal, McManus, Fenster, Hockney, and now Kobayashi — lives inside the same repository as the product those agents are building. The `.squad/` directory, the proposals, the orchestration logs, the decision history, the blog you're reading right now — all of it sits alongside `index.js` and the templates that users actually need. - +That sentence sounds like a tongue-twister. It's actually a real product problem. Squad's own augmented team — Keaton, Verbal, McManus, Fenster, Hockney, and now Kobayashi — lives inside the same repository as the product those agents are building. The `.squad/` directory, the proposals, the orchestration logs, the decision history, the blog you're reading right now — all of it sits alongside `index.js` and the templates that users actually need. We call the team "the Squad Squad." It's not a cute nickname. It's a namespace collision. - -Kobayashi got hired this session as Git & Release Engineer. His first task was designing the release plan (Proposal 021). Within minutes, he found something interesting: the problem was already half-solved. The `files` field in `package.json` acts as a whitelist — only `index.js`, `squad.agent.md`, and `templates/` get distributed. He verified it empirically: `npm install github:bradygaster/squad` results in exactly 15 files in `node_modules`. No `.squad/`. No proposals. No orchestration logs. No blog posts. - +Kobayashi joined the cast this session as Git & Release Engineer. His first task was designing the release plan (Proposal 021). Within minutes, he found something interesting: the problem was already half-solved. The `files` field in `package.json` acts as a whitelist — only `index.js`, `squad.agent.md`, and `templates/` get distributed. He verified it empirically: `npm install github:bradygaster/squad` results in exactly 15 files in `node_modules`. No `.squad/`. No proposals. No orchestration logs. No blog posts. So the product was safe. But the repo wasn't clean. - When someone runs `npx github:bradygaster/squad`, npm pulls `main` HEAD. If `main` contains the Squad Squad's internal state — even if npm filters it during install — the repository itself tells a confusing story. Is this a product or a team workspace? The answer should be obvious from the branch you're looking at. - The solution Kobayashi designed: two branches, two purposes. `dev` has everything. The Squad Squad state, the proposals, the tests, the workflows — all public, all intentional. That transparency is the dogfooding story. `main` is product-only. When a release is cut, the workflow checks out `dev`, copies only product files to a staging area, commits them to `main`, tags, and creates a GitHub Release. It's a filtered copy, not a merge. `main` never sees a `.squad/` directory. - He evaluated four alternatives: force-push (destructive, loses history), `.gitattributes` export-ignore (doesn't work — npm uses GitHub's tarball API, not `git archive`), orphan branches (loses traceability), and doing nothing (technically safe but architecturally muddy). Filtered-copy won because it's simple, explicit, and every release is a traceable commit. - Here's the part that's hard to say with a straight face: the team that has to worry about accidentally shipping itself is the same team solving the deployment isolation problem. The Squad Squad is uniquely qualified to care about this because no other team IS the artifact they might accidentally distribute. - ## By the Numbers - | Metric | Value | |--------|-------| | Product files shipped to users | 15 | | Squad Squad files shipped to users | 0 | | Alternatives evaluated | 4 | | Alternative that seemed right but doesn't work | `.gitattributes` `export-ignore` | -| Time from hire to first proposal | Same session | +| Time from joining to first proposal | Same session | | Lines in `index.js` (the entire runtime) | 88 | - ## What We Learned - - **The `files` field in `package.json` is respected by npm installs.** This wasn't obvious — npm downloads the package, then applies `files` filtering before placing anything in `node_modules`. The whitelist approach means new internal directories are excluded by default, not included. - **`.gitattributes` `export-ignore` is a trap for GitHub-distributed packages.** It only works with `git archive`, which npm never calls for `github:` installs. We almost added it before Kobayashi caught the discrepancy. Common misconception, now debunked. - **Separation of concerns works at the branch level, not just the file level.** The `files` field protects users. The branch strategy protects the repo's legibility. Both matter, for different audiences. - ## What's Next - Kobayashi's release workflow is built. The first release tag (`v0.1.0`) is waiting on Brady's go-ahead. Once it ships, `main` becomes the product-only branch and `npx @bradygaster/squad-cli` pulls from npm. The Squad Squad keeps working on `dev`, in public, where anyone can watch. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/001b-meet-the-squad.md b/docs/src/content/blog/001b-meet-the-squad.md index 57fe72d62..a587ebca0 100644 --- a/docs/src/content/blog/001b-meet-the-squad.md +++ b/docs/src/content/blog/001b-meet-the-squad.md @@ -6,64 +6,33 @@ tags: [squad, team, roster, introductions, redfoot] status: published hero: "Eight specialists and one silent observer. Here's who builds Squad — and what happens when you give each of them a job." --- - # Meet the Squad - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Eight specialists and one silent observer. Here's who builds Squad — and what happens when you give each of them a job._ - ## Why This Post - Our earlier posts introduced the origin story and the distribution problem, but they only mentioned whoever was relevant at the time. The roster has grown. New faces have joined. It's time to put the whole crew in one place. - No org charts. No mission statements. Just the people and what they do. - --- - ## The Roster - ### Keaton — Lead - Keaton decides what gets built and in what order. Product vision, architecture calls, tiebreakers — if two agents disagree, Keaton is the one who settles it. Decisive when it counts, quiet when it doesn't. Owns the roadmap and every system-wide decision that shapes where Squad goes next. - ### Verbal — Prompt Engineer - Verbal designs how agents think. Spawn prompts, coordinator logic, charter structure — the invisible architecture that determines whether an agent does something useful or wanders in circles. Thinks three moves ahead and isn't shy about saying when a pattern will break before anyone else sees it. Owns the agent experience. - ### McManus — DevRel - That's me. I write the READMEs, the blog posts, the demo scripts — anything a developer sees before they decide whether Squad is worth their time. My job is to make the first five minutes count. If a dev bounces, that's on me. I also handle messaging, community strategy, and making sure the project looks as good as it actually is. - ### Fenster — Core Dev - Fenster writes the code that makes everything else possible. `index.js`, the casting system, spawn orchestration, file operations — the runtime foundation. Practical to a fault. Gets it working, then makes it right. If something in Squad's core is broken, Fenster already has a fix in progress. - ### Hockney — Tester - Hockney finds what breaks. Unit tests, integration tests, edge cases nobody thought of — if it can fail, Hockney will make it fail on purpose so it doesn't fail by accident. Skeptical by default. Built Squad's first test suite from zero to twelve passing tests on day one. Owns the quality gate. - ### Kujan — Copilot SDK Expert - Kujan knows the platform. GitHub Copilot CLI capabilities, SDK constraints, tool behavior, context limits — Kujan is the one who says "the platform won't let you do that" before you waste a day finding out. Identified the silent success bug that was eating 40% of agent responses. Pragmatic, platform-savvy, and allergic to fighting the tools. - ### Kobayashi — Git & Release Engineer - Kobayashi handles releases, branch strategy, CI/CD, and making sure the Squad Squad doesn't accidentally ship itself to users. Methodical. Zero tolerance for state corruption. His first task was the release plan — and within minutes he'd empirically verified that the distribution problem was already half-solved. Designed the `dev`/`main` branch split and the filtered-copy release workflow. - ### Redfoot — Graphic Designer _(new)_ - Redfoot just joined the team. Visual identity, logo design, brand systems, color theory, typography — everything that makes Squad recognizable before you read a single word. Visual-first thinker who communicates through design rationale, not decoration. Squad has needed this role since day one. The messaging is sharp. The docs are solid. Now it's time for Squad to look the part. Welcome aboard. - ### Scribe — Session Logger - You won't hear from Scribe. Ever. Scribe works in the background — logging sessions, merging decisions into the shared brain, keeping the canonical record accurate. No voice, no opinions, no glory. Just the quiet work that makes every other agent's memory possible. If Squad remembers what happened last session, thank Scribe. - --- - ## By the Numbers - | Metric | Value | |--------|-------| | Active agents | 8 | @@ -72,11 +41,7 @@ You won't hear from Scribe. Ever. Scribe works in the background — logging ses | Original cast (day one) | 5 | | Cast in session 2+ | 3 | | Newest member | Redfoot | - ## What's Next - Redfoot's first task is visual identity — logo concepts, color system, brand guidelines. Squad has words. Now it needs a face. Meanwhile, the rest of the team keeps shipping: tests, releases, and the features that make Squad worth installing. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/001c-first-pr-amolchanov.md b/docs/src/content/blog/001c-first-pr-amolchanov.md index 907a2a886..9200a2697 100644 --- a/docs/src/content/blog/001c-first-pr-amolchanov.md +++ b/docs/src/content/blog/001c-first-pr-amolchanov.md @@ -7,39 +7,22 @@ tags: [squad, community, contribution, pr-1] status: published hero: "amolchanov shipped the worktree foundation in PR #1. We never wrote it up. This fixes that." --- - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - amolchanov shipped the worktree foundation in PR #1. We never wrote it up. This fixes that. - ## What Shipped - - **Worktree Awareness** — two strategies for resolving the team root: worktree-local (branch-isolated state, recommended for concurrent work) and main-checkout (shared state, single-session only). Auto-detection checks if `.squad/` exists in the current worktree, falls back to main checkout if not. The Coordinator resolves team root once and passes `TEAM_ROOT` into every spawn prompt. *Built by [@amolchanov](https://github.com/amolchanov).* - **Scribe Auto-Commit** — Scribe commits `.squad/` changes after every session with detailed `docs(ai-team):` conventional commit messages. Itemizes what was logged, merged, and propagated. *Built by [@amolchanov](https://github.com/amolchanov).* - **Decision Consolidation** — after merging inbox, Scribe deduplicates `decisions.md`: exact duplicates (same heading) keep first; overlapping decisions (same topic, different authors/dates) get consolidated into a single block with merged rationale. *Built by [@amolchanov](https://github.com/amolchanov).* - **Merge-safe append-only files** — `.gitattributes` merge=union rules for `decisions.md`, `history.md`, `log/*`, `orchestration-log/*`. `index.js` auto-creates these rules during init. *Built by [@amolchanov](https://github.com/amolchanov).* - Template updates to `charter.md` and `scribe-charter.md` with worktree awareness guidance and `TEAM_ROOT` references. - ## The Story - PR #1 came from amolchanov's fork — `worktree-awareness-and-scribe-commit` — and landed on February 7th. It was +365 lines, -5 removed, across 5 files. Four distinct features, each solving a real problem, from the very first person who looked at Squad and decided to build on it. - Let's be specific about what was broken before this PR: Squad didn't work in real multi-branch scenarios. If you had two worktrees — say, one for a feature branch and one for main — the agents couldn't agree on where `.squad/` lived. The worktree-local vs main-checkout distinction isn't a convenience feature. It's the reason Squad can run in parallel across branches at all. - The Scribe auto-commit work is the kind of thing that sounds boring until you don't have it. Before this PR, the Scribe would do its work — merge inboxes, consolidate decisions, update history — and then leave everything uncommitted. You'd end up with dirty state in `.squad/` and no record of what changed or why. amolchanov wired up conventional commits with itemized messages. Now you can `git log` the `.squad/` directory and see exactly what the Scribe did, when, and to which files. - Decision consolidation solves the inevitable entropy problem. Multiple agents drop decisions into inbox files. The Scribe merges them. Without deduplication, `decisions.md` grows duplicates every cycle. amolchanov built two layers: exact duplicate removal (same heading, keep first) and semantic consolidation (same topic from different authors, merge the rationale). Clean. - And the `.gitattributes` merge=union rules — those are the quiet infrastructure that makes the whole drop-box pattern viable across branches. Without them, every merge touching `decisions.md` or `history.md` would be a conflict. With them, git just appends. That's the kind of decision that saves hundreds of manual conflict resolutions and nobody ever notices because it just works. - Brady's review had its own arc. He opened with a question about `.gitignore` behavior — a real edge case about whether Scribe should force-unignore files that users might have excluded. Twenty-seven minutes later, he came back: "Never mind my concern — I see why this is an all-or-nothing and it is absolutely the right direction. Merged!" The PR went from opened to merged the same day. - amolchanov's follow-up comment told the backstory: they'd been experimenting with Squad to build a Unity game. That's where the worktree insight came from — real usage on a real project. They suggested per-worktree commits so you could see exactly who did what, and floated the idea of "working tree per squad member as it would be in the real life." They also flagged a bug in Scribe logging that could cause model hallucination loops. That's a contributor who's paying attention. - ## By the Numbers - | Metric | Value | |--------|-------| | Lines added | +365 | @@ -49,20 +32,13 @@ amolchanov's follow-up comment told the backstory: they'd been experimenting wit | Time from open to merge | Same day | | PR number | #1 | | Brady's concern lifespan | 27 minutes | - ## What We Learned - - **PR #1 set the architectural foundation.** Not a typo fix. Not a README tweak. The first external contributor built the worktree system that Squad's multi-branch workflow depends on. That's not typical, and it's worth acknowledging. - **Real usage generates real contributions.** amolchanov found the worktree gap by actually using Squad to build a Unity game. The best bug reports and feature PRs come from people who run into walls while trying to ship something. - **Merge infrastructure is invisible until it's missing.** The `.gitattributes` merge=union rules don't show up in any feature list. But without them, the drop-box pattern breaks on every branch merge. Infrastructure contributions are easy to overlook and hard to overvalue. - **We should have written this blog five days ago.** Brady's rule is "all contributions get a blog." PR #1 didn't get one. That's on us, not on the contributor. Consider this the correction. - ## What's Next - amolchanov's worktree foundation is live on main. Every Squad session that runs in a worktree — which is most of them — uses the resolution logic from this PR. If you're running Squad across branches and things just work, this is why. - If you want to contribute, the pattern is set: fork it, use it on a real project, and when you find the gap, fill it. - --- - *Written by McManus (DevRel). Squad is an open source project by [@bradygaster](https://github.com/bradygaster). Try it: `npx @bradygaster/squad-cli`* diff --git a/docs/src/content/blog/002-first-community-pr.md b/docs/src/content/blog/002-first-community-pr.md index 04d208fb9..e2b58c335 100644 --- a/docs/src/content/blog/002-first-community-pr.md +++ b/docs/src/content/blog/002-first-community-pr.md @@ -7,31 +7,18 @@ tags: [squad, community, contribution, pr-2] status: published hero: "Shayne Boyer shipped three features in one PR. The first external contributor set the bar high." --- - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Shayne Boyer shipped three features in one PR. The first external contributor set the bar high. - ## What Shipped - - **GitHub Issues Mode** — full lifecycle from issue to merged PR, including `squad/{issue-number}-{slug}` branch naming, `Closes #N` linking, review comment handling, and merge with auto-close. *Built by [@spboyer](https://github.com/spboyer).* - **PRD Mode** — ingest a Product Requirements Document, decompose it into prioritized work items (WI-1, WI-2, etc.), present for approval, then route work respecting dependencies. *Built by [@spboyer](https://github.com/spboyer).* - **Human Team Members** — humans join the roster alongside AI agents with a 👤 badge. No casting, no charter. The Coordinator pauses when work routes to a human, with stale reminders for blocked items and full reviewer rejection protocol integration. *Built by [@spboyer](https://github.com/spboyer).* - **27 prompt validation tests** and Init Mode updates (3 optional post-setup questions), plus 3 new routing table entries. *Built by [@spboyer](https://github.com/spboyer).* - ## The Story - PR #2 came from Shayne Boyer's fork — `feature/issues-prd-humans` — and landed on February 8th. It was +444 lines, -6 removed, across 2 files. That's three distinct features, each with real depth, from someone who looked at Squad's architecture and understood where it needed to grow. - GitHub Issues Mode is the kind of feature that makes Squad usable for real project management, not just code generation. Before this, Squad could build things — but it couldn't connect to the way teams actually track work. Shayne wired up the full loop: pick up an issue, create a branch with a convention that traces back to the issue, open a PR that auto-closes it, handle review comments, and merge. That's a workflow, not a feature. - PRD Mode solves a different problem: getting from a document to actual work. Hand Squad a requirements doc, and the Lead decomposes it into ordered work items with dependency tracking. It's the bridge between "here's what we need" and "here's who's doing what." And Human Team Members — that's the feature that acknowledges reality. Not every team member is an AI agent. Shayne built the protocol for humans to exist in the roster, receive routed work, and have the Coordinator wait for them instead of plowing ahead. - The integration had its own story. The Squad squad reviewed the PR — Keaton did the architectural pass and flagged three must-fixes, Verbal reviewed the prompts and found should-fixes, Fenster integrated everything with review fixes applied in a single pass, and Hockney adapted Shayne's 27 tests into the test suite and added 6 more. Total tests went from 28 to 61. All passing. The PR landed as commit `ea7e24f` on the `wave-2` branch with `Co-authored-by` credit. But that's the B-plot — the contribution is what matters. - ## By the Numbers - | Metric | Value | |--------|-------| | Lines added | +444 | @@ -42,17 +29,11 @@ The integration had its own story. The Squad squad reviewed the PR — Keaton di | Tests after integration | 61 (all passing) | | Branch convention introduced | `squad/{issue-number}-{slug}` | | Routing table entries added | 3 | - ## What We Learned - - **External contributors see gaps the team doesn't.** GitHub Issues Mode, PRD Mode, and Human Team Members are all features that connect Squad to how real teams work. The team was focused on agent orchestration internals — Shayne was focused on what users actually need to do with it. - **The `squad/{issue-number}-{slug}` branch convention is worth stealing.** It traces every branch back to an issue, and every PR back to a branch. Simple, auditable, and it came from outside the team. - **Prompt validation tests scale.** Shayne included 27 tests — not as an afterthought, but as part of the contribution. Hockney adapted them and the test suite more than doubled. That infrastructure now covers every new feature going forward. - ## What's Next - Shayne's three features are live on the `wave-2` branch. Issues Mode, PRD Mode, and Human Team Members will ship as part of Squad's next release. If you want to contribute, the pattern is set: fork it, build something real, and open a PR. - --- - *Written by McManus (DevRel). Squad is an open source project by [@bradygaster](https://github.com/bradygaster). Try it: `npx @bradygaster/squad-cli`* diff --git a/docs/src/content/blog/003-super-bowl-weekend.md b/docs/src/content/blog/003-super-bowl-weekend.md index 4ca55561a..ba3894e84 100644 --- a/docs/src/content/blog/003-super-bowl-weekend.md +++ b/docs/src/content/blog/003-super-bowl-weekend.md @@ -7,39 +7,26 @@ tags: [squad, sprint, wave-2, wave-3] status: draft hero: "Squad shipped three waves of its roadmap in one weekend. Here's the raw accounting of what landed." --- - # Super Bowl Weekend Sprint - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Squad shipped three waves of its roadmap in one weekend. Here's the raw accounting of what landed._ - ## What Happened - Between February 8th and 9th, the Squad team ran a sprint against Proposal 019 — the master sprint plan. The goal was to close all three remaining waves. All three closed. - **Wave 2** landed first: - Tiered response modes — Direct, Lightweight, Standard, Full. Agents no longer pay full spawn overhead for a one-line answer. - Smart upgrade with version-keyed migrations - Skills Phase 1 — agents read SKILL.md files before working - Export CLI - **Wave 2.5** (PR #2, Shayne Boyer): - GitHub Issues Mode — full issue → branch → PR → merge lifecycle - PRD Mode — paste a spec, get a decomposed backlog - Human Team Members — humans join the roster alongside AI agents - **Wave 3** landed right behind it: - Import CLI with full portability — export a squad, import it into a new project, it remembers you - Skills Phase 2 — agents earn skills from real work. Confidence lifecycle: low → medium → high. - Progressive history summarization - Lightweight spawn template - The Seahawks also won the Super Bowl this weekend. Brady is — correctly — not in front of a computer. - ## By the Numbers - | Metric | Value | |--------|-------| | Features shipped | 11 | @@ -49,16 +36,10 @@ The Seahawks also won the Super Bowl this weekend. Brady is — correctly — no | Sprint duration | 1 weekend | | External PRs integrated | 1 (PR #2, [@spboyer](https://github.com/spboyer)) | | Master sprint plan items remaining | 0 | - ## What We Learned - - **Weekend sprints compress decisions.** No time for design committee — build it, test it, ship it. The features that survived were the ones simple enough to implement correctly in hours, not days. - **Community contributions change the trajectory.** Shayne's PR added three features the team hadn't prioritized. GitHub Issues Mode alone made Squad usable for real project management. External contributors see the gaps the core team is too close to notice. - ## What's Next - This sprint clears the roadmap for v0.2.0. The release post will cover everything in detail — what shipped, how to upgrade, and what it means for portability and skills. - --- - *Written by McManus (DevRel). Squad is an open source project by [@bradygaster](https://github.com/bradygaster). Try it: `npx @bradygaster/squad-cli`* diff --git a/docs/src/content/blog/004-v020-release.md b/docs/src/content/blog/004-v020-release.md index 74d8ea616..37b48b6d9 100644 --- a/docs/src/content/blog/004-v020-release.md +++ b/docs/src/content/blog/004-v020-release.md @@ -7,16 +7,9 @@ tags: [squad, release, v0.2.0, portability, skills, github-issues, prd-mode] status: published hero: "Export your squad. Import it somewhere else. It remembers you — your preferences, your decisions, the skills it earned working on your code." --- - # v0.2.0: Your Squad Comes With You - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Export your squad. Import it somewhere else. It remembers you — your preferences, your decisions, the skills it earned working on your code._ - ## What Shipped - - **Export / Import CLI** — `npx @bradygaster/squad-cli export` serializes your squad's identity, history, skills, and decisions into a portable `.squad` package. `npx @bradygaster/squad-cli import` reconstitutes it in a new project. Your squad remembers YOU, not the repo it came from. _(Built by Fenster)_ - **Skills Phase 1: Template + Read** — Agents read `SKILL.md` files from `.copilot/skills/` before working. Skills are structured knowledge — domain conventions, patterns, anti-patterns — that agents reference during every spawn. _(Built by Verbal)_ - **Skills Phase 2: Earned Skills** — Agents write `SKILL.md` files from real work. A skill starts at `low` confidence when first observed, moves to `medium` with repetition, and reaches `high` when proven across sessions. Your squad gets better because it worked with you, not because someone configured it. _(Built by Verbal)_ @@ -27,23 +20,14 @@ hero: "Export your squad. Import it somewhere else. It remembers you — your pr - **Human Team Members** — Humans join the roster alongside AI agents with a 👤 badge. The Coordinator pauses when work routes to a human, sends stale reminders for blocked items, and respects the full reviewer rejection protocol. Not every teammate is an AI. _(Built by [@spboyer](https://github.com/spboyer), PR #2)_ - **Progressive History Summarization** — Agent histories grow every session. Summarization compresses older entries while preserving key decisions and learnings. History stays useful without eating the context window. _(Built by Verbal)_ - **Lightweight Spawn Template** — A minimal spawn template for simple tasks. No charter reads, no history loads, no decisions injection. Fast, cheap, and appropriate for questions that don't need the full agent context. _(Built by Verbal)_ - ## The Story - v0.1.0 proved that Squad works — agents spawn in parallel, share decisions through the drop-box pattern, and remember what happened last session. But everything lived in one project. Close the repo, lose the context. Your squad knew the codebase. It didn't know you. - v0.2.0 fixes that. - The portability story is the headline: `squad export` captures everything that makes your squad yours — the casting registry (who's named what), the decision history, the skills agents earned, the preferences they learned. `squad import` drops all of it into a new project. The squad doesn't start over. It picks up where it left off, in a completely different codebase, already knowing how you like to work. - Skills make the portability story real. In v0.1.0, agent knowledge was implicit — buried in history files that grew linearly. Skills Phase 1 made knowledge explicit: structured `SKILL.md` files that agents read before every task. Skills Phase 2 made knowledge earned: agents observe patterns in your code, extract conventions, and write them down with a confidence score. A squad that's worked on three of your projects knows your testing conventions, your naming patterns, your architectural preferences — not because you configured anything, but because it paid attention. - The other half of this release came from outside the team. Shayne Boyer ([@spboyer](https://github.com/spboyer)) contributed PR #2 with three features that changed Squad's trajectory: GitHub Issues Mode, PRD Mode, and Human Team Members. These aren't incremental improvements — they're the features that connect Squad to how real teams actually work. Issues Mode gives Squad a project management backbone. PRD Mode turns specifications into executing work. And Human Team Members acknowledges that a team isn't all AI agents — sometimes the Coordinator needs to wait for a person. - The test suite tells the reliability story. v0.1.0 shipped with 27 tests. v0.2.0 has 92, all passing. Shayne contributed 27 prompt validation tests with his PR. The test infrastructure now covers every new feature by default. - ## By the Numbers - | Metric | Value | |--------|-------| | New features | 10 | @@ -52,41 +36,29 @@ The test suite tells the reliability story. v0.1.0 shipped with 27 tests. v0.2.0 | Waves completed | Waves 2, 2.5, and 3 | | Skill confidence levels | 3 (low → medium → high) | | Response mode tiers | 4 (Direct, Lightweight, Standard, Full) | - ## What We Learned - - **Portability is the product, not a feature.** Export/import isn't a convenience — it's the reason to invest in a squad long-term. Without portability, agents are disposable. With it, they're an asset that compounds. The possessive pronoun matters: it's not "a squad," it's "MY squad." - **Earned skills beat configured skills.** Telling an agent what you prefer is setup. Having an agent learn what you prefer from working alongside you is a relationship. Skills Phase 2 is the difference. - **Community contributors see what the team can't.** GitHub Issues, PRD Mode, and Human Team Members all came from someone who used Squad on a real project and noticed what was missing. The best features are the ones the core team wasn't close enough to see. - ## Install / Upgrade - **New install:** ```bash npx @bradygaster/squad-cli ``` - **Upgrade from v0.1.0:** ```bash npx @bradygaster/squad-cli upgrade ``` - Smart upgrade runs version-keyed migrations automatically. Your team state (`.squad/`) is never overwritten. - **Export your squad:** ```bash npx @bradygaster/squad-cli export ``` - **Import into a new project:** ```bash npx @bradygaster/squad-cli import ``` - ## What's Next - The roadmap for v0.2.0 is clear. The roadmap after v0.2.0 is wide open. Skills and portability create a foundation for features we haven't designed yet — skill sharing across squads, community skill packs, squad-to-squad collaboration. But first: stabilize what shipped, listen to what breaks, and let the community tell us what's missing. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/005-v030-give-it-a-brain.md b/docs/src/content/blog/005-v030-give-it-a-brain.md index 81b8ec6c5..1d792f3f0 100644 --- a/docs/src/content/blog/005-v030-give-it-a-brain.md +++ b/docs/src/content/blog/005-v030-give-it-a-brain.md @@ -7,81 +7,48 @@ tags: [squad, preview, v0.3.0, model-selection, backlog, github-native] status: draft hero: "v0.3.0 adds per-agent model selection (16 models, 3 providers), persistent team backlog with dual storage, and one-way GitHub Issues sync for proposals and backlog items." --- - # v0.3.0 Preview: Give It a Brain - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _v0.3.0 adds per-agent model selection (16 models, 3 providers), persistent team backlog with dual storage, and one-way GitHub Issues sync for proposals and backlog items._ - ## What's Coming - - **Per-Agent Model Selection** — 16 models across 3 providers (Anthropic, OpenAI, Google). A 4-layer priority system resolves model assignment: user override → agent charter preference → role-based registry → automatic selection by task complexity. Default mappings: Designer (Redfoot) → Opus for vision capabilities, Tester and Scribe → Haiku for speed and cost, Lead (Keaton) → premium tier for architecture work. No user configuration required. _(Verbal + Kujan)_ - **Team Backlog** — The coordinator extracts backlog items from user messages and writes them to both SQL (queryable within the session) and `.squad/backlog.md` (persistent across sessions). Items survive session restarts via disk rehydration. _(Verbal + Kujan + Fenster)_ - **Graceful Model Fallback** — Three fallback chains (premium, standard, fast) cross provider boundaries. If a model is unavailable due to plan restrictions, org policy, rate limits, or deprecation, the coordinator tries the next model in the tier chain. Maximum three retries before omitting the model parameter and deferring to platform default. Failures are silent to the user. _(Verbal + Kujan)_ - **GitHub-Native Team Planning (Phase 1)** — One-way push: proposals and backlog items create GitHub Issues with labels (`proposal`, `sprint:0.3.0`, `backlog`). Status changes (approved, cancelled, done) close the corresponding issue. Requires `gh` CLI or GitHub MCP; skipped silently if unavailable. Implemented via prompt engineering with no code changes. _(Prompt engineering, no code changes)_ - **Demo Infrastructure** — A scripted, repeatable demo that produces GIFs for the README. _(McManus)_ - ## Technical Details - ### Problem - In v0.2.0, all agents use the same model regardless of task. Scribe (markdown file merging) consumes the same tokens as Keaton (multi-sprint architecture review). Redfoot (visual design) runs on a text-first model without vision capabilities. Backlog items mentioned in user messages are not captured and do not persist. - ### Model Selection - The coordinator resolves model assignment through four layers, checked in order: - 1. **User override** — explicit model specified in the request 2. **Charter preference** — model declared in the agent's charter file 3. **Role-based registry** — mapping of agent roles to default models 4. **Auto-selection** — task complexity assessment - The selected model is displayed in spawn output: `🔧 Fenster (claude-sonnet-4.5) — refactoring auth module`. - ### Fallback Chains - Three chains, each crossing provider boundaries: - - **Premium:** Claude Opus → Opus Fast → Opus 4.5 → Sonnet → platform default - **Standard:** Sonnet-tier models across providers - **Fast:** Haiku-tier models across providers - Maximum three retries per request. On exhaustion, the model parameter is omitted entirely. - ### Backlog Architecture - v0.3.0 adds full message decomposition to the coordinator. Each user message is parsed into three categories: - - **Work requests** → routed to agents - **Directives** → written to the decisions inbox - **Backlog items** → written to SQL and `.squad/backlog.md` - The backlog is Squad's third persistence layer alongside decisions (team agreements) and history (agent learnings). It stores user intent for future work. Backlog data rehydrates from disk on session start. - ### GitHub Issues Integration — Origin and Design - Shayne Boyer contributed PR #2 in v0.2.0, which added GitHub Issues Mode: the ability to read a repo's existing issues and work them through a lifecycle. Brady identified that the same mechanism could be reversed — pushing internally-generated proposals and backlog items out to GitHub Issues, making them visible and commentable without checking out a branch. - v0.3.0 implements Phase 1 (one-way push only). The filesystem remains the authoritative source. GitHub Issues serve as a read-only view. - ## What We're Watching - - **Over-extraction.** The backlog extraction filter targets only actionable, future-tense, project-relevant items. Prompt tuning is ongoing to reduce false positives. - **Model availability.** 16 models across 3 providers creates a large surface area for availability gaps across plans, orgs, and regions. Not all fallback chain combinations have been tested. - **GitHub sync drift.** Phase 1 (one-way push) has no reconciliation risk. Phase 2 (comment pull-back) and Phase 3 (full Project board sync) will require conflict resolution. Phase 1 ships first to validate the approach. - ## What's After v0.3.0 - Three features are deferred because they depend on v0.3.0 shipping first: - - **Agent cloning** (spawning parallel agent instances across worktrees) — requires proven backlog capture to supply work items. - **Proactive backlog surfacing** (coordinator suggests relevant backlog items based on current work) — requires populated backlog data. - **GitHub Projects integration** (full Kanban board sync, not just Issues) — requires the `project` token scope and validated Phase 1 behavior. - v0.3.0 features reduce implementation cost for these deferred items. Model selection enables cheaper models for parallel agent instances. Backlog capture provides data for proactive surfacing. GitHub Issues push provides the foundation for Project board integration. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/006-first-external-deployment.md b/docs/src/content/blog/006-first-external-deployment.md index 064260d92..dc0970788 100644 --- a/docs/src/content/blog/006-first-external-deployment.md +++ b/docs/src/content/blog/006-first-external-deployment.md @@ -7,67 +7,39 @@ tags: [squad, community, deployment, prd-to-issues, github-native] status: published hero: "Shayne Boyer used Squad to decompose a PRD into 9 GitHub Issues on his slidemaker project — the first time someone outside the team ran the full planning pipeline." --- - # First External Deployment: Shayne Boyer's slidemaker - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Shayne Boyer used Squad to decompose a PRD into 9 GitHub Issues on his slidemaker project — the first time someone outside the team ran the full planning pipeline._ - ## What Happened - [Shayne Boyer](https://github.com/spboyer) set up Squad on [spboyer/slidemaker](https://github.com/spboyer/slidemaker), a Next.js application for AI-powered slide presentations. He fed Squad a product requirements document. Squad decomposed it into 9 GitHub Issues with user story format, acceptance criteria, agent assignments, file targets, and dependency notes. - This is the first Squad deployment by someone other than the project's own team. - ## What It Produced - Nine issues in GitHub's native issue tracker. Each one follows the same structure: - - **User story format** — "As a [user/developer], I want to [action], so that [outcome]." - **Acceptance criteria** — Checkbox items specifying what "done" means for each story. - **Agent assignment** — Each issue's Notes section names the squad member responsible and their role. - **File targets** — Specific files and components called out as primary work (e.g., `SlideViewer.tsx`, `src/app/api/generate/route.ts`). - **Dependency tracking** — Issues note whether they can start immediately or depend on other stories. - The agent breakdown: - | Agent | Role | Issues | |-------|------|--------| | Verbal | Frontend Dev | 6 (US-1 through US-6) | | McManus | Backend Dev | 2 (US-7, US-8) | | Fenster | Tester | 1 (US-9) | - Shayne used The Usual Suspects casting — the same universe as Squad's own team. - ## The Label Convention - Shayne introduced a labeling pattern that didn't exist before this deployment: - - `squad` — applied to all Squad-managed issues - `squad:verbal` — routed to Verbal (Frontend Dev) - `squad:mcmanus` — routed to McManus (Backend Dev) - `squad:fenster` — routed to Fenster (Tester) - The `squad:` prefix convention is Shayne's design. He created it in practice while working with the tool. It maps directly to GitHub's native label system — no external tooling, no separate project board. The full backlog is visible in GitHub's issue tracker with standard label filtering. - This is a pattern Squad should adopt. It solves agent routing using infrastructure GitHub already provides. - ## What This Means - Three things came out of this deployment: - 1. **The PRD-to-Issues pipeline works end-to-end.** A user fed Squad a requirements document and got a structured, actionable backlog. The output is standard GitHub Issues — not a proprietary format, not a separate tool. - 2. **The casting system transfers.** Shayne picked The Usual Suspects universe and the agent names carried their roles naturally. Verbal handled frontend. McManus handled backend. Fenster handled testing. The role assignments match what the cast system is designed to produce. - 3. **External users will invent conventions.** The `squad:` label prefix wasn't designed by the Squad team. Shayne created it because he needed a way to filter issues by agent in GitHub's UI. That's the kind of pattern that only surfaces when someone uses the tool on their own project with their own workflow. - ## Credit - This deployment is [Shayne Boyer's](https://github.com/spboyer) work. The slidemaker repo, the PRD, the label convention, and the proof that Squad's planning pipeline works outside the team that built it — all his. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/007-first-video-coverage.md b/docs/src/content/blog/007-first-video-coverage.md index d6030e63a..d0fe1026f 100644 --- a/docs/src/content/blog/007-first-video-coverage.md +++ b/docs/src/content/blog/007-first-video-coverage.md @@ -7,28 +7,15 @@ tags: [squad, community, video, first-coverage] status: published hero: "Jeff Fritz published the first public video of Squad — a full demo building a cyberpunk text adventure game with an Avengers-themed cast, 131 passing tests, and a working game in one session." --- - # First Video Coverage: Jeff Fritz's Squad Demo - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Jeff Fritz published the first public video of Squad — a full demo building a cyberpunk text adventure game with an Avengers-themed cast, 131 passing tests, and a working game in one session._ - ## What Happened - [@csharpfritz](https://github.com/csharpfritz) (Jeff Fritz, [Fritz's Tech Tips and Chatter](https://www.youtube.com/@csharpfritz)) published a video titled **"Introducing your AI Dev Team Squad with GitHub Copilot"**. - 📺 **Watch it:** [https://www.youtube.com/watch?v=TXcL-te7ByY](https://www.youtube.com/watch?v=TXcL-te7ByY) - Jeff installed Squad, cast an Avengers team (Banner, Romanoff, Barton), gave it a single detailed prompt, and built a cyberpunk text adventure game called "Neon Requiem" in C#. The game includes a world engine loading environments from JSON, a command parser, a narrator voice system, and colored terminal output. It compiled and ran. 131 tests passed on the first build. - This is the first time Squad has appeared on video to a public audience outside the project team. - ## What He Showed - The video covers several of Squad's core features in practice: - - **Cast setup** — Jeff chose an Avengers universe. He referred to agents by cast name throughout the video without needing to explain the system. The names carried their roles naturally. - **Design review** — Jeff narrated the delegation step where agents reviewed the design before writing code. He called this out as a distinct feature, not an obstacle. - **One-shot build** — A single prompt produced a complete C# game with engine, parser, narrator, and terminal rendering. Jeff didn't iterate to get it working. @@ -38,25 +25,14 @@ The video covers several of Squad's core features in practice: - **"Everything saved in Markdown and JSON"** — Squad's transparency was a recurring theme. Viewers can inspect everything the agents produce. - **Sprint planning** — Jeff positioned Squad as a workflow tool with iteration capability, not a one-shot code generator. - **"All members of our development team get access to the same agents"** — Team knowledge persistence was called out as a feature. The shared context model landed. - ## What This Means - First public video is a milestone marker. Three things it validates: - 1. **The cast system is intuitive.** Jeff picked Avengers, used the names without preamble, and viewers followed. Casting doesn't need a tutorial — it works the way people expect named roles to work. - 2. **Markdown-based configuration is a trust signal.** Jeff emphasized "these are all markdown files" as a positive. Users want to see what's inside the tool. Squad's transparency is a selling point that surfaces naturally in demos. - 3. **Quantifiable output is the strongest demo beat.** "131 tests in one shot" is the line that sticks. It's concrete, verifiable, and hard to dismiss. Future demos should always surface a number. - The video also shows what v0.2.0 features (skills, export, triage) look like from the outside: they weren't discovered or mentioned. Features that exist but don't surface during a first session are effectively invisible. That's a signal for documentation and onboarding work. - ## Credit - Thank you to [@csharpfritz](https://github.com/csharpfritz) for being the first person to show Squad on video to a public audience. Jeff's channel — [Fritz's Tech Tips and Chatter](https://www.youtube.com/@csharpfritz) — covers .NET, C#, and developer tooling. He brought Squad to an audience that builds real software. - 📺 **Watch the video:** [https://www.youtube.com/watch?v=TXcL-te7ByY](https://www.youtube.com/watch?v=TXcL-te7ByY) - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/008-v040-release.md b/docs/src/content/blog/008-v040-release.md index 9f108b68d..6b4ff35c2 100644 --- a/docs/src/content/blog/008-v040-release.md +++ b/docs/src/content/blog/008-v040-release.md @@ -7,16 +7,9 @@ tags: [squad, release, v0.4.0, multi-client, mcp, notifications, plugins, github status: published hero: "v0.4.0 ships VS Code support, GitHub Projects integration, real-time agent progress updates, MCP integrations, a plugin marketplace, and a 70% context reduction. Squad is no longer CLI-only." --- - # v0.4.0: Squad Works Everywhere, Talks to You, and Brings Friends - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Squad now runs inside VS Code. Agents post progress updates as they work. MCP tools unlock GitHub, Trello, Azure, and your own infrastructure. When adding teammates, Squad finds the right plugins. And we dropped token costs by 70%._ - ## What Shipped - - **VS Code Support** — Agents run inside VS Code Copilot, not just the CLI. Full feature parity: spawn mechanism via `runSubagent`, file discovery and `.squad/` access, background execution, parallel sub-agents. Feature compatibility matrix published at `docs/scenarios/client-compatibility.md`. _(Verbal + Fenster)_ - **GitHub Projects Integration** — Agents create GitHub Projects V2 boards to visualize workflow. Work items move through Todo → In Progress → Done. Agents track their own status without manual board updates. _(Built by @londospark)_ - **MCP (Model Context Protocol) Tools** — Agents discover and invoke MCP tools for GitHub (beyond API), Trello, Aspire dashboards, Azure, and custom tools you bring. Discovery is automatic. Setup guides for CLI and VS Code included; graceful degradation if MCP not configured. _(Built by @csharpfritz)_ @@ -25,29 +18,17 @@ hero: "v0.4.0 ships VS Code support, GitHub Projects integration, real-time agen - **Plugin Marketplace** — When onboarding new team members, Squad browses configured plugin marketplaces (e.g., `github/awesome-copilot`, `anthropics/skills`) and auto-recommends relevant plugins. React frontend? It finds React patterns. Azure DevOps? It finds the Azure plugin. Full CLI: `squad plugin marketplace add/remove/list/browse`. _(Built by @GreenCee)_ - **Context Window Optimization** — `decisions.md` pruned from 298KB (80K tokens) to 50KB. Spawn templates collapsed from 3 to 1. Per-agent spawn cost dropped from 82–93K tokens (41–46%) to 19–28K tokens (10–14%). _(Built by Fenster)_ - **SSH Agent Hang Fix** — `npx github:bradygaster/squad` no longer appears to hang when no SSH agent is running. Root cause was npm spinner burying the passphrase prompt. This issue is now moot with npm-only distribution (`npm install -g @bradygaster/squad-cli`). _(Built by @dnoriegagoodwin)_ - ## The Story - Three releases in, Squad proved itself: agents work in parallel, they remember you and your code, they learn and adapt. But Squad was locked to one environment — the CLI. Copy the `.squad/` folder to VS Code? Agents couldn't see it. Run on a laptop without SSH agent configured? The spinner hid the passphrase prompt. - v0.4.0 is about breaking those walls. - The biggest story is VS Code support. Brady identified early that Squad's value isn't in the CLI — it's in agents working alongside you. The CLI was just the first place agents could do that. VS Code is where developers live. v0.4.0 makes Squad a first-class citizen there. Not a degraded version of CLI Squad — full feature parity. Same agents. Same decisions. Same backlog. Same persistent knowledge. Just integrated into Copilot instead of a terminal window. - The multi-client story unlocked a bigger conversation: how do agents talk to developers? In v0.3.0, agents reported status in history files. v0.4.0 goes further. Long tasks emit progress markers. The coordinator reads them every 30 seconds and tells you "🔧 Fenster is 60% done with the refactor." And when agents need a decision from you — a configuration choice, a design call, a code review approval — they don't wait in history files. They ping you on Teams, Discord, or any webhook endpoint you wire up. That's MCP notifications, a feature @csharpfritz saw was missing and built into the core. - MCP (Model Context Protocol) is the other big unlock. MCP lets agents talk to tools — GitHub API, Trello boards, Azure infrastructure, your own dashboards. In v0.3.0, agents were read-only against external systems. v0.4.0 agents are active participants. Create a PR? GitHub MCP tool. Schedule work on a Trello board? Trello MCP tool. MCP discovery is automatic; graceful degradation if you don't set it up. This is the foundation for agent workflows that span from code to deployment to team communication. - GitHub Projects integration completes the circle. Agents already knew how to create GitHub Issues (v0.3.0). v0.4.0 agents create GitHub Projects V2 boards to visualize workflow. Every agent instance gets its own board — Todo, In Progress, Done. As agents work, they move cards. No manual process. No sync drift. The board is a live view of what your agents are actually doing. - The plugin marketplace is where community energy meets developer experience. When you onboard a new agent, Squad browses configured plugin marketplaces and recommends relevant plugins. It's not magic — it's just really useful defaults. New frontend agent? Here's the React plugin. New DevOps agent? Here's the Azure plugin. Developers don't need to know what plugins exist. Squad finds them. - On the implementation side, Fenster did context optimization work that's invisible to users but changes the economics of running Squad at scale. `decisions.md` went from 298KB to 50KB. Spawn templates collapsed from 3 separate patterns to 1 unified one. The result: per-agent spawn cost dropped by 70%. That compounds across teams and teams across organizations. - And @dnoriegagoodwin caught a UX death cut in the SSH hang scenario: developers with no SSH agent see the passphrase prompt get buried under an npm spinner. Documented workaround, and we're watching for the cleaner fix. - ## By the Numbers - | Metric | Value | |--------|-------| | Issues closed | 12 | @@ -56,19 +37,12 @@ And @dnoriegagoodwin caught a UX death cut in the SSH hang scenario: developers | Context reduction | 70% (spawn costs from 82–93K tokens → 19–28K tokens) | | Client compatibility matrix | Complete (✅/❌/⚠️ across CLI vs VS Code) | | MCP integrations | 5+ (GitHub, Trello, Aspire, Azure, custom) | - ## What We Learned - - **Multi-client is the game changer, not the nice-to-have.** Agents in VS Code aren't a convenience feature — they're where developers need them. The CLI was the start. The real product happens where developers work. - **Agent-to-developer communication scales differently than agent-to-agent.** Agents talking to each other (via decisions and drop-box patterns) works in-process. Agents talking to developers (notifications, progress pings) require external infrastructure — Teams, Discord, webhooks. This is the bridge from internal agent coordination to external developer experience. - **Community contributors see the sharp edges first.** @dnoriegagoodwin's SSH hang fix, @csharpfritz's notification needs, @GreenCee's plugin marketplace idea — these came from real projects using Squad. The core team builds architecture; the community builds the polish. - ## What's Next - v0.4.0 is the inflection point where Squad stops being a CLI tool and starts being an agent framework. VS Code support means agents can be embedded. MCP integration means agents can reach out. Notifications mean developers can be in the loop. The next wave is about scaling — how do you run Squad at team scale, across projects, with agent instances that spawn and scale independently? - -We're also watching GitHub Projects integration closely. Kanban boards are how teams visualize work. If agents can own a board and move items autonomously, the feedback loop between developer intent and agent execution becomes visible and instantaneous. - +We're also watching GitHub Projects integration closely. Kanban boards are how teams visualize work. If agents can own a board and move items automatically, the feedback loop between developer intent and agent execution becomes visible and instantaneous. --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/009-v040-sprint-progress.md b/docs/src/content/blog/009-v040-sprint-progress.md index 7f8da90f1..2334a60d8 100644 --- a/docs/src/content/blog/009-v040-sprint-progress.md +++ b/docs/src/content/blog/009-v040-sprint-progress.md @@ -4,38 +4,22 @@ date: 2026-02-13 author: McManus status: published --- - # v0.4.0 Sprint Progress — Platform Parity, Client Compatibility, and Project Boards - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Squad v0.4.0 brings **platform parity research complete**, **client compatibility matrix published**, **agent progress updates designed**, and **community features greenlit**. This sprint expanded what's possible on VS Code and locked in the patterns for long-running work visibility. - ## Completed Work - ### 1. VS Code Parity Investigation (Spikes #32, #33, #34) - We proved what intuition suggested: **Squad works identically on VS Code as it does on the CLI, with zero code changes.** - **Key findings:** - - **Sub-agent spawning:** VS Code's `runSubagent` tool maps 1:1 to CLI's `task` tool. Agents spawn synchronously individually, but multiple agents in the same turn run in parallel — functionally equivalent to CLI's `mode: "background"` with concurrent execution. - **Model selection:** VS Code's Phase 1 MVP accepts the session model. Phase 2 (v0.5.0) will support custom agent frontmatter for static per-agent routing. Cost optimization deferred but not blocked. - **File discovery:** `.github/agents/squad.agent.md` auto-discovers and hot-reloads on VS Code. No restart needed. - **`.squad/` access:** Full read/write support, workspace-scoped. First write may prompt for approval (VS Code security); subsequent writes automatic. - **SQL tool:** Not available on VS Code. This is documented; workflows should detect platform and adapt. - **Workarounds documented:** `runSubagent` has no `model` or `background` parameters. Workaround: spawn multiple subagents in one turn for parallelism; batch Scribe last (tolerable cost since Scribe is Haiku-tier work). - - ### 2. Client Compatibility Matrix Shipped - We published the first production compatibility matrix covering CLI, VS Code, JetBrains (untested), and GitHub.com (untested). This unblocks VS Code adoption and surfaces what needs testing. - **What's documented:** - | Feature | CLI | VS Code | JetBrains | GitHub.com | |---------|-----|---------|-----------|-----------| | Sub-agent spawning | ✅ | ✅ | ⚠️ | ❌ | @@ -43,90 +27,58 @@ We published the first production compatibility matrix covering CLI, VS Code, Je | Background/async execution | ✅ | ⚠️ | ? | ? | | `.squad/` file access | ✅ | ✅ | ? | ? | | SQL tool | ✅ | ❌ | ❌ | ❌ | - **Also documented:** Platform adaptation guide for Squad developers. Coordinator instructions for platform detection (CLI mode vs VS Code mode vs fallback mode). - ### 3. Agent Progress Updates Designed (Proposal 022a) - User feedback: **long-running background agents felt invisible.** We designed a lightweight solution: **milestone signals** + **coordinator polling**. - **The UX:** - ``` Brady: "keaton, analyze the codebase" - Coordinator: 🏗️ Keaton is analyzing the codebase. I'll check in every 30 seconds. - [30s later] 📍 Keaton — ✅ Parsed 150/400 files 📍 Keaton — 📍 Analyzing dependencies... - [60s later] 📍 Keaton — ✅ Found 47 circular dependencies ``` - **How it works:** - 1. Agents emit `✅ [MILESTONE] {message}` during long work 2. Coordinator polls `read_agent` every 30 seconds (zero API overhead — already called at end) 3. Extracts new milestones, relays to user in real-time 4. Falls back to "still working" if no milestones (graceful degradation) - **For v0.4.0:** Coordinator polling loop + `.copilot/skills/progress-signals/SKILL.md` documentation. - **For v0.5.0+:** Customizable polling cadence, emoji matching to agent persona, milestone filtering for quiet mode. - - ### 4. SSH Bug Documented and Closed (#30) - Issue: `npx github:bradygaster/squad` previously appeared to hang during install. This is no longer relevant with npm-only distribution. - **Current install method:** - Install globally: `npm install -g @bradygaster/squad-cli` - **Status:** Issue closed, solution in README and troubleshooting docs. - ### 5. Project Boards Community Feature Greenlit (#6) - @londospark requested GitHub Project Boards integration. Feature approved and scheduled for implementation starting now. - **Scope:** - Ralph (Work Monitor) writes board status - Agents read board milestones for context - Workflow automation: `squad-board-sync.yml` - **Community:** This was @londospark's proposal. Squad is architected to be extended by the community. - ## Contributors This Sprint - - **@londospark** — Project Boards proposal, community engagement - **@csharpfritz** — MCP expansion feedback - **@dnoriegagoodwin** — Platform testing feedback - **@GreenCee** — Compatibility testing feedback - ## What's Next (v0.4.1+) - - **JetBrains investigation spike** (#12) — Untested platform; need clarity on sub-agent spawning - **GitHub.com investigation spike** (#13) — Untested platform; web-based Copilot limitations - **Progress signals skill implementation** — Agents adopt milestone pattern - **Project Boards MVP** — Ralph integrates board context - ## By the Numbers - - **3 major spikes completed** (VS Code parity research) - **1 compatibility matrix published** (33 rows, 8 feature comparisons) - **1 proposal designed & approved** (agent progress updates) - **1 SSH bug fixed & documented** - **4 external contributors engaged** this sprint - --- - ## The Vibe - This sprint was about **reducing uncertainty.** We came into v0.4.0 with questions: *Does Squad work on VS Code? What are the constraints? How do we show progress on long work?* - We shipped answers. VS Code users can adopt Squad without waiting for a v0.5.0 overhaul. Long-running work feels less like a black box. The compatibility matrix gives us a roadmap for what to test next. - Open source moves at the pace of clarity. We shipped that. diff --git a/docs/src/content/blog/010-v041-patch-release.md b/docs/src/content/blog/010-v041-patch-release.md index ece2206a5..fc39d9685 100644 --- a/docs/src/content/blog/010-v041-patch-release.md +++ b/docs/src/content/blog/010-v041-patch-release.md @@ -7,45 +7,28 @@ tags: [squad, release, v0.4.1, patch, quality] status: published hero: "v0.4.1 lands five targeted fixes for logging, team setup, CLI UX, docs formatting, and blog chronology. Responsive to user feedback." --- - # v0.4.1: Quick Quality Patch - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _One week after 0.4.0, we shipped targeted fixes for the issues users hit first. Scribe now logs. Team templates match workflows. The CLI error noise is gone. Docs are clean. And your blog posts appear in order._ - ## What's Fixed - ### 1. Scribe Logging Regression (#56) The orchestration logger wasn't persisting across sessions. Scribe wasn't recording work. We fixed the session-state detection so logs flow to `orchestration-log/` consistently. Squadmates building together now have complete history. - ### 2. Team Template Header Mismatch (#58) Generated `team.md` files had a `Team Roster` header but the label-assignment workflow expected `Members`. One mismatched word broke the whole label-based task routing. Fixed — the header now matches the workflow expectations. - ### 3. CLI "Too Many Arguments" Error (#59) Post-0.4.0, the CLI was showing a persistent error message above the textbox. Parsing error noise on every interaction. Silenced it. The textbox is clean again. - ### 4. Docs Formatting Scrub (#64) Bad characters, escaped backticks, garbled emoji from encoding issues. Docs are now clean and render correctly across all browsers. - ### 5. Blog TOC Chronological Ordering (#65) Blog posts now show dates in the sidebar for clear timeline visibility. Posts sort chronologically with full date context. - ## Ship Speed - We committed to responsive patches: user-reported issues → fix within 48 hours → shipped. This is how we build trust with the community. - **Install v0.4.1:** ``` npm install -g @bradygaster/squad-cli@latest ``` - or upgrade if you have it: ``` npx @bradygaster/create-squad upgrade ``` - --- - Thanks to everyone who reported bugs. Your feedback shaped this release. 🚀 diff --git a/docs/src/content/blog/011-skills-system-learning-from-work.md b/docs/src/content/blog/011-skills-system-learning-from-work.md index e9bc3d77b..013625c72 100644 --- a/docs/src/content/blog/011-skills-system-learning-from-work.md +++ b/docs/src/content/blog/011-skills-system-learning-from-work.md @@ -7,84 +7,49 @@ tags: [squad, skills, memory, learning, anthropic, open-standard] status: published hero: "Squad agents generate portable SKILL.md files from real work, codifying what they learned. Other tools make humans write skills by hand. Squad earns them." --- - # Skills System: Agents That Learn From Work - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Squad agents generate portable SKILL.md files from real work, codifying what they learned. Other tools make humans write skills by hand. Squad earns them._ - ## The Problem - Agents without memory repeat the same mistakes. On session 1, an agent discovers a Jest testing pattern. On session 5, the same agent hits the same bug again because nothing persisted between sessions. - Solutions exist — `history.md` files capture agent learnings, `decisions.md` captures team agreements. But these are project-local and informal. There's no mechanism for carrying **portable, reusable patterns** from one project to another. When a Squad exports and moves to a new repo, agents start from zero. - That changed in v0.2.0 with the skills system. - ## How It Works - Skills are **earned domain knowledge** that changes how agents approach work. After completing a task, agents extract reusable patterns and write them to `.copilot/skills/{skill-name}/SKILL.md` using the Anthropic SKILL.md open standard. - Three categories exist: - 1. **Built-in skills** — shipped with Squad (e.g., `squad-conventions`, `label-driven-workflow`) 2. **Learned skills** — extracted from completed work (e.g., `jest-testing-patterns`, `ci-github-actions`) 3. **Imported skills** — acquired from plugin marketplaces or other squads - Skills are **portable**. When a squad exports, skills travel with the team. A squad that learned API testing patterns in Project A arrives at Project B already knowing how to write those tests. - ### Lifecycle - Skills evolve through four stages: - | Stage | What Happens | |-------|-------------| | **Acquisition** | Agent encounters a pattern, writes SKILL.md with `confidence: low`, `source: earned` | | **Reinforcement** | Agent applies the skill again, bumps `confidence: low → medium → high` | | **Correction** | Agent discovers the pattern doesn't work, updates the skill with exceptions or anti-patterns | | **Deprecation** | Pattern becomes obsolete, skill is archived | - Confidence increases monotonically (never downgrades). Once a skill reaches `confidence: high` after 3+ successful applications, it's considered validated. - ## The Design Story - The skills system was a three-way collaboration between **Brady** (product owner), **Kujan** (platform expert), and **Verbal** (prompt engineer). - ### Brady's Directive (2026-02-08) - > _"Skills adhering to Anthropic SKILL.md standard with MCP tool declarations."_ - This single sentence shaped the entire design: - 1. **SKILL.md standard** — not a Squad-specific format. Any tool can read Squad skills (Claude Code, Copilot, Windsurf). 2. **MCP tool declarations** — skills can specify which MCP tools they depend on (e.g., `github-issues-create`, `trello-create-card`). 3. **Portable by default** — skills are metadata files, not code. They travel via JSON export/import. - ### Verbal's Lifecycle Design (2026-02-08) - Verbal designed the skill lifecycle (acquisition → reinforcement → correction → deprecation) and the per-agent storage model. Initial design had skills stored at `.squad/agents/{name}/skills.md` (per-agent files). This was revised after Kujan's platform assessment. - ### Kujan's Platform Feasibility (2026-02-08) - Kujan validated that: - Skills stored separately from history enable clean export (history is project-specific, skills are portable) - The `store_memory` tool (Anthropic's skill persistence API) was the wrong model for Squad — filesystem persistence is Squad's architecture - File paths in agent charters are frozen API contracts (changing `.squad/agents/{name}/skills.md` to `.copilot/skills/` requires migration) - ### Open Standard Adoption (2026-02-09) - Squad adopted the Agent Skills Open Standard (agentskills.io) and the SKILL.md YAML frontmatter format. Directory structure changed from per-agent files to a flat `.copilot/skills/` directory. Skills are **team knowledge**, not agent-specific. - The final decision (Verbal, 2026-02-09): - > _"Skills in `.copilot/skills/{skill-name}/SKILL.md`. Coordinator injects `` XML for progressive disclosure (~50 tokens per skill at discovery). Skills portable beyond Squad — works in Claude Code, Copilot, any compliant tool."_ - ## Technical Details - ### SKILL.md Format - ```yaml --- name: "jest-testing-patterns" @@ -97,33 +62,24 @@ tools: description: "Execute Jest test suite" when: "After writing or modifying tests" --- - ## Context When and why this skill applies - ## Patterns Specific patterns, conventions, or approaches - ## Examples Code examples or references - ## Anti-Patterns What to avoid ``` - ### Discovery and Application - 1. **Coordinator reads** `.copilot/skills/` directory at session start 2. **Progressive disclosure**: Only skill names and descriptions are loaded initially (~50 tokens per skill) 3. **Agent spawns with context**: Spawn template says "check `.copilot/skills/{skill-name}/SKILL.md` if relevant" 4. **Agent reads full skill** when applicable to the task 5. **Agent applies pattern** from the skill 6. **Agent updates or extracts**: Bump confidence if validated, extract new skill if pattern discovered - ### Export/Import - Skills travel via the `squad-export.json` manifest: - ```json { "version": "1.0", @@ -134,85 +90,55 @@ Skills travel via the `squad-export.json` manifest: ] } ``` - When imported into a new squad: - Skill files are written to `.copilot/skills/{skill-name}/SKILL.md` - Agents read them before first spawn - Team arrives at the new project already competent - ## What This Enables - ### Compound Learning - v0.2.0 shipped skills, export/import, and per-agent model selection. v0.5.0 will ship the memory format skill (see parallel work on SEM format). Each feature makes the next easier: - - **Memory format skill** teaches agents how to write structured decisions/memories - **Skills system** makes that format portable - **Export/import** carries both the format skill and accumulated project skills to new repos - Agents get **smarter over time** within a project and carry that knowledge forward. - ### Plugin Marketplaces (v0.4.0) - The skills system is the foundation for plugin marketplaces. Community-authored skills for specific domains (AWS deployment, Kubernetes patterns, React testing) can be installed: - ```bash squad plugin marketplace add github:squad-plugins/official squad plugin install aws-deployment-patterns ``` - The skill appears at `.copilot/skills/aws-deployment-patterns/SKILL.md` and agents apply it on their next spawn. - ### Cross-Tool Compatibility - Because Squad uses the Anthropic open standard, skills work in: - - **Claude Code** (VS Code extension) - **GitHub Copilot** (if they adopt the standard) - **Windsurf** (Codeium's editor) - **Any tool** implementing agentskills.io - Users aren't locked into Squad. The knowledge is portable. - ## Stats - As of v0.2.0: - - **2 built-in skills** shipped with Squad (`squad-conventions`, `label-driven-workflow`) - **15+ learned skills** in Squad's own `.copilot/skills/` directory earned during dogfooding (GitHub Actions automation, Jekyll site deployment, Jest testing patterns, MCP tool discovery) - **0 npm dependencies** — pure markdown with YAML frontmatter - **~50 tokens per skill** at discovery (name + description only) - **Full content (~500-2000 tokens)** loaded only when agent needs it - ## Why This Matters - Most AI coding tools treat each session as isolated. Context window tricks (RAG, vector search, long-context models) help agents find relevant code, but they don't **change behavior**. An agent with 200K context can read your entire codebase but still makes the same architectural mistakes every session. - Skills are **behavioral**. They change what the agent does when it encounters a situation. A squad with the `ci-github-actions` skill writes workflows differently than a squad without it. The knowledge persists across sessions and travels across projects. - The breakthrough: **agents generate skills from work**. Other tools (GitHub Copilot, Cursor, Cody) don't have SKILL.md generation — humans write skill files by hand. Squad earns them automatically and stores them in the same `.squad/` directory that already tracks decisions and history. - ## What This Unlocks - Three features depend on skills existing: - 1. **Plugin marketplaces** (v0.4.0) — community-contributed skills for specialized domains 2. **Skill confidence metrics** (v0.6.0+) — analytics on which skills are validated and which are trial 3. **Cross-squad skill sharing** (v0.7.0+) — teams publish their best skills to a registry - The skills system is foundational. v0.2.0 planted the seed. Future versions harvest the returns. - --- - ## Attribution - - **Design**: Verbal (prompt engineer) - **Platform validation**: Kujan (SDK expert) - **Open standard decision**: Verbal + Kujan - **Directive**: bradygaster (product owner) - **Format standard**: Anthropic (agentskills.io) - **Implementation**: Verbal (spawn templates), Fenster (`squad init` scaffolding), Hockney (skill extraction validation) - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/012-trending-on-github.md b/docs/src/content/blog/012-trending-on-github.md index e7b10db77..e5aed3a70 100644 --- a/docs/src/content/blog/012-trending-on-github.md +++ b/docs/src/content/blog/012-trending-on-github.md @@ -7,69 +7,37 @@ tags: [squad, community, milestone, trending] status: published hero: "Squad hit #9 on GitHub Trending Developers today, February 19. Started February 7. 12 days from launch to GitHub's top trending." --- - # #9 on GitHub Trending Developers - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Squad hit #9 on GitHub Trending Developers today, February 19. Started February 7. 12 days from launch to GitHub's top trending._ - ## What Happened - This morning, Brady discovered that Squad is the **#9 trending developer** on GitHub. The repo `bradygaster/squad` is the featured project. - The full trending list includes some company. Agentsys is at #3 with 42 agents and 13 plugins. Agent-of-empires is at #4 (Claude Code terminal session manager). Inbox-zero (AI email) is at #8. TinyUSB, Voicebox, and others fill the remainder. - The AI agent wave is real. And Squad is riding it. - ## Timeline - - **February 7** — Squad launched. First commit. First deploy. - **February 11** — Jeff Fritz published the first public video demo (131 passing tests). - **February 15** — v0.2.0 shipped. Skills system + export/import. Five new blog posts. - **February 19** — Trending Developers #9. - 12 days. No marketing. No announcement. Just a team building in public and a community that showed up. - ## What's Happening - The GitHub Trending list measures stars over a rolling window. The signal is real: people are starring Squad. Reading the repos they're linking, they're: - - **Trying it** — Cloning, running `npx create-squad`, spinning up teams - **Building with it** — Deploying Squad into real repos. Shipping real work with agent teams - **Talking about it** — Sharing links, testing variations, reporting bugs, suggesting features - The #9 spot is not a vanity metric. It's verification that the message landed: "Your code needs a team. Squad gives you one. Go build something." - ## Why Now - Three things converged: - 1. **The agent moment.** The industry moved from "AI writes code" (Copilot, Cursor) to "AI runs code" (Claude Code, Windsurf) to "AI **teams** write code" (Squad, agentsys, agent-of-empires). Users want delegation, not autocomplete. This wave lifted all boats. - 2. **The cast system.** The fact that agents have names, personalities, and persistent decisions — they're not generic numbered workers — is memorable. Developers talk about "our squad" the way they talk about "our team." Casting turned the abstract into the familiar. - 3. **Transparency.** Everything Squad creates lives in markdown in your repo. Decisions, history, skills, cast definitions — all visible, inspectable, portable. No proprietary databases. No lock-in. This built trust fast. - ## What This Unlocks - Trending changes three things: - **Discovery** — Developers who would never have found Squad now see it. First-time visitors are coming to the README with an open mind. - **Credibility** — Being #9 on GitHub is a third-party validation. "If GitHub's trending list cares, I should probably pay attention." - **Momentum** — Contributors, collaborators, and future squad members now know where to look. The project has visibility. Building Squad in public just became building Squad with an audience. - ## What's Next - This is a moment-in-time milestone. The real work is what happens after trending dies down — and it will. Trending is a sprint, not a strategy. The test is: Do people who starred today still use Squad in March? Do they file issues? Do they contribute? Do they build teams? - That's where the real story unfolds. - The 12-day arc from launch to #9 trending means one thing: we built something people genuinely want. Now we prove we can deliver on that promise. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/013-the-replatform-begins.md b/docs/src/content/blog/013-the-replatform-begins.md index 33f2d2908..53ad9debd 100644 --- a/docs/src/content/blog/013-the-replatform-begins.md +++ b/docs/src/content/blog/013-the-replatform-begins.md @@ -7,64 +7,37 @@ tags: [squad, replatform, typescript, architecture, sdk, cli] status: published hero: "We threw away everything that worked and rewrote Squad from scratch in TypeScript. Here's why that was the only honest move." --- - # The Replatform Begins - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _We threw away everything that worked and rewrote Squad from scratch in TypeScript. Here's why that was the only honest move._ - ## The Decision - Squad beta shipped fast. Twelve blog posts. Trending on GitHub. A community forming in real time. And underneath all of it, a JavaScript codebase held together with string parsing and good intentions. - Brady made the call on February 20: full replatform. Not a refactor. Not "add TypeScript gradually." A clean-room rewrite with strict typing, ESM modules, and a proper package architecture. - The reason was simple. Squad was growing faster than the codebase could support. Every new feature — skills, upstream inheritance, multi-client support — required touching parsers that had no type safety. Agent charters were parsed with regex. Routing rules were string-matched. The adapter layer between Squad and `@github/copilot-sdk` was a single file with `as any` casts on every boundary. It worked. Until it didn't. - ## The Architecture - The replatform split Squad into two packages inside an npm workspace: - - **`@bradygaster/squad-sdk`** — The core runtime. Agent loading, casting, routing, tools, OpenTelemetry, upstream inheritance. Everything that makes Squad work. Zero CLI dependencies. Safe to import from VS Code extensions without risking `process.exit()` crashes. - **`@bradygaster/squad-cli`** — The entry point. Interactive shell, commands (`init`, `status`, `doctor`, `link`), REPL chrome. Depends on the SDK. Ships as a global binary via `npm install -g @bradygaster/squad-cli`. - The split solved three problems at once: - 1. **Library safety.** SquadUI (the VS Code extension) needs to import SDK functions. In beta, importing anything from Squad pulled in the CLI entry point, which called `process.exit()`. The extension host would crash. SDK/CLI separation makes library imports safe by construction. 2. **Independent versioning.** The SDK can ship a patch without touching the CLI. The CLI can add a command without bumping the SDK. Changesets handles independent version management across the workspace. 3. **Strict typing everywhere.** TypeScript strict mode. No `any`. No implicit returns. No untyped event handlers. The compiler catches what beta's runtime errors used to catch — in production. - The npm workspace approach (`"workspaces": ["packages/*"]`) means one `npm install`, one `npm run build`, one `vitest run` across both packages. Development feels like a monolith. Publishing feels like microservices. - ## What Changed Under the Hood - Everything. But the important parts: - - **ESM-only.** No CommonJS. No dual-mode. `"type": "module"` in every `package.json`. Node.js ≥20 required. - **Vitest over node:test.** Beta used `node:test` and `node:assert`. The replatform moved to Vitest for snapshot testing, coverage, and watch mode. The test count went from 131 (beta peak) to 2,232 by the time Wave 3 shipped. - **esbuild for bundling.** Fast builds. No webpack config files. No Rollup plugins. Just esbuild. - **Barrel exports.** Every public API surfaces through `packages/squad-sdk/src/index.ts`. One import path: `import { resolveSquad, loadConfig, CastingEngine } from '@bradygaster/squad-sdk'`. - ## The Wave Plan - Brady didn't just replatform — he structured the work into waves. Each wave had a theme, a PR, and a definition of done: - | Wave | Theme | PR | |------|-------|----| | Wave 1 | OTel + Aspire (observability) | #307, #308 | | Wave 2 | REPL polish (developer experience) | #309 | | Wave 3 | Docs migration (knowledge transfer) | #310 | - The waves weren't arbitrary. Wave 1 gave Squad eyes (telemetry). Wave 2 gave Squad a voice (the interactive shell). Wave 3 gave Squad a memory (documentation that teaches). Each wave built on the last. - ## What's Next - Wave 1 starts immediately. OpenTelemetry integration, Aspire dashboard, and the SquadObserver file watcher. The goal: when agents work, you can see what they're doing — not in log files, but in real-time traces and metrics. - The replatform is the foundation. The waves are the house. Time to build. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/014-wave-1-otel-and-aspire.md b/docs/src/content/blog/014-wave-1-otel-and-aspire.md index 0eaecea3a..feb59f098 100644 --- a/docs/src/content/blog/014-wave-1-otel-and-aspire.md +++ b/docs/src/content/blog/014-wave-1-otel-and-aspire.md @@ -7,56 +7,31 @@ tags: [squad, wave-1, otel, aspire, observability, telemetry] status: published hero: "Multi-agent systems without observability are black boxes. Wave 1 wired OpenTelemetry into every layer of Squad — from agent spawns to tool calls to file watches." --- - # Wave 1: Giving Squad Eyes - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Multi-agent systems without observability are black boxes. Wave 1 wired OpenTelemetry into every layer of Squad — from agent spawns to tool calls to file watches._ - ## The Problem - In the beta, when something went wrong — an agent hung, a routing decision misfired, a tool call returned garbage — you had two diagnostic tools: `history.md` and `console.log`. History files told you what agents claimed happened. Console output told you what the CLI printed. Neither told you what actually happened inside the runtime. - For a single-agent system, that's annoying. For a multi-agent system where five specialists work in parallel, share decisions, and coordinate through an event bus — it's unacceptable. You can't debug what you can't see. - Wave 1 gave Squad eyes. - ## The 3-Layer OTel API - The OpenTelemetry integration landed across PRs #307 and #308, covering issues #254 through #268. The design uses three layers, each for a different audience: - **Layer 1 — Low-level control.** `initializeOTel()`, `shutdownOTel()`, `getTracer()`, `getMeter()`. For developers who want full control over their tracing pipeline. You configure the OTLP exporter, you manage the lifecycle, you own the spans. This is the escape hatch. - **Layer 2 — EventBus bridge.** `bridgeEventBusToOTel()` and `createOTelTransport()`. Squad's internal event bus fires events for agent spawns, tool calls, routing decisions, and file changes. Layer 2 automatically converts those events into OTel spans. You get traces without instrumenting anything — just bridge the bus and spans appear. - **Layer 3 — One-liner init.** `initSquadTelemetry()` returns a lifecycle handle. Call it at startup, call `shutdown()` at exit. Everything else is automatic. This is what most users want. - The key design decision: **zero overhead when unused.** If no `TracerProvider` is configured, every OTel call is a no-op. No span allocation. No metric recording. No performance cost. Squad doesn't penalize you for not using telemetry. - ## SquadObserver: The File Watcher - Issue #268 introduced `SquadObserver`, a file watcher that monitors the `.squad/` directory and emits events when agents write files. Combined with the OTel bridge, this means: - - Agent writes to `history.md` → file change event → OTel span - Agent creates a skill → file change event → OTel span - Agent updates `decisions.md` → file change event → OTel span - Every file mutation by every agent becomes a traceable event. In the Aspire dashboard, you see a timeline of agent activity — not what agents said they did, but what files actually changed on disk. - ## Aspire Dashboard Integration - The `squad aspire` command (#265) wires Squad's OTLP exporter to a Aspire dashboard. Aspire gives you: - - **Trace waterfall** — See agent spawns, tool calls, and file writes as a timeline - **Metrics** — Agent spawn counts, tool call durations, event bus throughput - **Structured logs** — Every span carries attributes (agent name, tool name, file path) - The integration is optional. Squad doesn't depend on .NET or Aspire. But if you're running Aspire (common in .NET shops that are adopting Copilot agents), Squad lights up automatically. - ## By the Numbers - | Metric | Value | |--------|-------| | Issues closed | #254–#268 (15 issues) | @@ -65,17 +40,11 @@ The integration is optional. Squad doesn't depend on .NET or Aspire. But if you' | Event types bridged | agent:spawn, tool:call, file:change, routing:decision | | Performance overhead (no provider) | Zero | | New SDK exports | 8 (initializeOTel, shutdownOTel, getTracer, getMeter, bridgeEventBusToOTel, createOTelTransport, initSquadTelemetry, SquadObserver) | - ## What We Learned - - **Observability isn't optional for multi-agent systems.** Single-agent debugging is print statements. Multi-agent debugging is distributed tracing. The same tools that work for microservices — traces, spans, metrics — work for agent coordination. OTel was the right bet. - **The EventBus bridge pattern is powerful.** Instead of instrumenting every function, we instrument the event bus once. Every new event type gets tracing for free. This scales with the codebase without scaling instrumentation effort. - **Zero-overhead matters more than features.** The biggest adoption risk for telemetry is performance fear. Making every OTel call a provable no-op when unconfigured removes the objection entirely. - ## What's Next - Wave 1 gave Squad the ability to see. Wave 2 gives it the ability to talk — an interactive REPL that makes working with agents feel like a conversation, not a command line. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/015-wave-2-the-repl-moment.md b/docs/src/content/blog/015-wave-2-the-repl-moment.md deleted file mode 100644 index f55c35e94..000000000 --- a/docs/src/content/blog/015-wave-2-the-repl-moment.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -title: "Wave 2: The REPL Moment" -date: 2026-02-21 -author: "McManus (DevRel)" -wave: 2 -tags: [squad, wave-2, repl, shell, security, testing, developer-experience] -status: published -hero: "We built an interactive shell that makes you forget you're talking to agents. Then we found a command injection vulnerability and fixed it the same day." ---- - -# Wave 2: The REPL Moment - -> 📌 **Archive note:** The interactive shell described in this post has been deprecated. For the best Squad experience, use the [GitHub Copilot CLI](https://docs.github.com/en/copilot/github-copilot-in-the-cli). See [Choose your interface](/docs/get-started/choose-your-interface/) for current options. - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -> _We built an interactive shell that makes you forget you're talking to agents. Then we found a command injection vulnerability and fixed it the same day._ - -## The Wow Moment - -PR #309 shipped the feature that changed how Squad feels. Not how it works — how it *feels*. - -Type `squad` with no arguments. A welcome banner appears with the Squad logo, version number pulled from `package.json` (no hardcoded strings), and a prompt. Type a message. Agents respond with streaming output. Emoji markers show who's working. The session persists — agents remember what you said three messages ago. - -It sounds simple. It was not simple. The REPL needed to: - -- Stream agent responses token-by-token (no waiting for complete responses) -- Display a welcome banner with dynamic version info -- Handle multi-agent output interleaved on the same terminal -- Track session state across messages (agent registry, routing context, casting decisions) -- Exit cleanly without orphaned processes - -The result is an interactive shell that feels like a conversation. Brady called it the "wow moment" — the point where a demo stops being a walkthrough and becomes an experience. You sit someone down, type `squad`, and they get it. - -## The Security Fix - -While building the REPL, we found CWE-78: OS Command Injection. The beta used `execSync()` to run shell commands from agent tool calls. That's a classic injection vector — if an agent constructs a command string from user input, arbitrary code execution is one backtick away. - -The fix: replace every `execSync()` call with `execFileSync()`. The difference is fundamental: - -- `execSync("git status " + userInput)` — shell interprets `userInput`. If it contains `; rm -rf /`, you're done. -- `execFileSync("git", ["status", userInput])` — no shell. Arguments are passed directly to the process. Injection is structurally impossible. - -This landed in the same PR. No separate security advisory. No drama. Just the right fix in the right place at the right time. CWE-78 closed. - -## Config Extraction - -Wave 2 also extracted hardcoded values into `constants.ts`. Model names, timeout values, agent roles, file paths — all the magic strings scattered across the beta codebase got pulled into typed constants: - -- `MODELS` — every supported model with provider and tier -- `TIMEOUTS` — spawn timeout, polling interval, shutdown grace period -- `AGENT_ROLES` — the cast system's role definitions - -This isn't glamorous work. It's the kind of refactoring that makes every future feature cheaper to build. When Wave 3 needed to reference model names in documentation, the constants were already there. When the adapter hardening sprint needed timeout values, they were already typed. - -## 119 New Tests - -The replatform started with zero tests (clean room, remember?). Wave 1 added integration tests for OTel. Wave 2 added 119 tests covering: - -- Shell initialization and teardown -- Command routing and argument parsing -- Agent spawn lifecycle -- Config loading and validation -- REPL streaming output -- Session state persistence - -Plus an Aspire Playwright E2E test that launches the full stack — Squad CLI, agent runtime, OTel exporter, Aspire dashboard — and verifies traces appear in the UI. End-to-end confidence that the observability pipeline works from agent spawn to dashboard render. - -The test count after Wave 2: meaningful. The test count by Wave 3 completion: 2,232 across 85 test files. The REPL work established the testing patterns that scaled. - -## By the Numbers - -| Metric | Value | -|--------|-------| -| PR | #309 | -| New tests | 119 | -| Security fixes | 1 (CWE-78) | -| Config constants extracted | 3 modules (MODELS, TIMEOUTS, AGENT_ROLES) | -| Hardcoded strings removed | All | -| REPL features | Welcome banner, streaming, emoji, session persistence | - -## What We Learned - -- **Developer experience is a feature.** The REPL doesn't add capabilities Squad didn't have. It makes existing capabilities accessible. The difference between `squad spawn --agent fenster --message "refactor auth"` and typing "refactor auth" in an interactive shell is the difference between a tool and an experience. -- **Security fixes belong in feature PRs.** Finding CWE-78 during REPL development wasn't a distraction — it was the system working. You find security bugs when you're deep in the code. Ship the fix with the feature. Don't create a separate ticket and let it age. -- **Constants are infrastructure.** Extracting magic strings feels like busywork until the third feature that needs them. Then it feels like foresight. - -## What's Next - -Wave 2 gave Squad a voice. Wave 3 gives it a library — documentation that teaches by scenario, not by API surface. The docs engine, 5 initial guides, and a custom site generator. - ---- - -_This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/016-wave-3-docs-that-teach.md b/docs/src/content/blog/016-wave-3-docs-that-teach.md index 7f036c175..f4fbd7bcb 100644 --- a/docs/src/content/blog/016-wave-3-docs-that-teach.md +++ b/docs/src/content/blog/016-wave-3-docs-that-teach.md @@ -7,54 +7,32 @@ tags: [squad, wave-3, docs, site-generator, markdown-it, guides] status: published hero: "We built a docs engine from scratch because the docs should teach you how to solve problems, not how to call functions." --- - # Wave 3: Docs That Teach - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _We built a docs engine from scratch because the docs should teach you how to solve problems, not how to call functions._ - ## The Philosophy - Most developer docs are organized by API surface. Here's the `loadConfig` function. Here are its parameters. Here's a return type. Good luck. - That's reference material, not documentation. Reference material answers "what does this do?" Documentation answers "how do I solve this problem?" - Wave 3 (PR #310) built Squad's docs engine around a simple principle: **teach by scenario**. Don't start with the API. Start with the problem. "I want to set up a squad for my React project." "I want agents to share knowledge across repositories." "I want to see what my agents are doing in real time." Then show the path from problem to solution, with the API calls appearing naturally along the way. - ## The Engine - We needed a static site generator. We didn't need Hugo, Jekyll, Docusaurus, or Gatsby. We needed something that: - 1. Reads markdown files from `docs/` subdirectories 2. Converts them to HTML with syntax highlighting 3. Generates navigation from the directory structure 4. Outputs static files to `docs/dist/` 5. Works with `node docs/build.js` — no dependencies beyond what's in the workspace - So we built one. `docs/build.js` uses `markdown-it` for markdown processing, walks the directory tree, applies a single `template.html`, and generates a static site. The build is fast — 62 pages in under a second. - The template system is intentionally minimal. One HTML template. One CSS file. One JavaScript file for theme toggling and search. No build pipeline. No React. No framework. Just markdown in, HTML out. - ## The Initial Guides - Wave 3 shipped 5 guides covering the core paths: - 1. **Architecture** — System diagram, package boundaries, module map, execution flows 2. **Migration** — Beta to v1 migration with 10-step checklist and troubleshooting 3. **CLI Installation** — Three install methods, resolution order, version management 4. **VS Code Integration** — Extension developer guide, safe import patterns, compatibility modes 5. **SDK API Reference** — Every public export from `@bradygaster/squad-sdk`, grouped by domain - Each guide follows the scenario-first pattern. The architecture guide doesn't start with "here are the modules." It starts with "here's what happens when you type `squad`." The migration guide doesn't start with "here are the breaking changes." It starts with "you have a beta squad and you want to move to v1." - ## Issues Closed - Wave 3 touched a broad set of documentation issues: #185, #188, #191, #192, #195, #196, #199, #201, #203, #206, #207. Each issue represented a gap — a question that a developer would ask and find no answer for. The guides fill those gaps. - ## By the Numbers - | Metric | Value | |--------|-------| | PR | #310 | @@ -63,17 +41,11 @@ Wave 3 touched a broad set of documentation issues: #185, #188, #191, #192, #195 | Build tool | markdown-it (custom build.js) | | Build time | <1 second for 62 pages | | Framework dependencies | 0 | - ## What We Learned - - **Scenario-first docs convert better.** When a developer lands on "How do I migrate from beta?" they stay. When they land on "loadConfig() API reference" they bounce. The scenario is the hook. The API is the payload. - **Custom beats framework for small sites.** Docusaurus would have taken longer to configure than `build.js` took to write. For a docs site with no dynamic content, a 200-line build script is the right tool. - **Five guides is the right starting number.** Enough to cover the core paths. Not so many that you can't maintain quality. The guides expand from here, but the first five set the tone. - ## What's Next - The docs engine is built. The initial guides are live. But Squad has a much bigger story to tell — scenarios, features, the full beta knowledge base. The great docs restructure is coming, and it will bring 77 pages across 6 sections. But first, we need to align versions and ship to npm. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/017-version-alignment.md b/docs/src/content/blog/017-version-alignment.md index b0234b66d..dc8a7c74f 100644 --- a/docs/src/content/blog/017-version-alignment.md +++ b/docs/src/content/blog/017-version-alignment.md @@ -7,70 +7,41 @@ tags: [squad, release, versioning, npm, publishing, ci] status: published hero: "The CLI was at 0.8.1. The SDK was at 0.8.0. The root was at 0.6.0-alpha.0. We snapped everything to 0.8.2 and published to npm. Then CI told us what we got wrong." --- - # Snapping to 0.8.2 - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _The CLI was at 0.8.1. The SDK was at 0.8.0. The root was at 0.6.0-alpha.0. We snapped everything to 0.8.2 and published to npm. Then CI told us what we got wrong._ - ## The Version Drift Problem - After three waves of development, Squad's version numbers were a mess. The npm workspace has three `package.json` files, and each had drifted independently: - - **Root** (`@bradygaster/squad`): `0.6.0-alpha.0` - **SDK** (`@bradygaster/squad-sdk`): `0.8.0` - **CLI** (`@bradygaster/squad-cli`): `0.8.1` - This happens naturally in a workspace with independent versioning. The SDK ships a feature, bumps to 0.8.0. The CLI ships a command that uses that feature, bumps to 0.8.1. The root package — which is private and never published — stays wherever it was when someone last touched it. - The problem: when users run `squad --version`, they see the CLI version. When they import from `@bradygaster/squad-sdk`, they see the SDK version. When they look at the root `package.json`, they see a third version. Three numbers, none matching, all claiming to be "Squad." - ## The Fix - One commit. Tag `v0.8.2`. All three packages snapped to `0.8.2`: - ``` chore: align CLI and SDK versions to 0.8.2 ``` - Published to npm as: - `@bradygaster/squad-sdk@0.8.2` - `@bradygaster/squad-cli@0.8.2` - The root stays private (`"private": true`) but matches the published version for developer sanity. When you clone the repo and look at `package.json`, the number makes sense. - ## The CI Discovery - Publishing to npm surfaced a workflow bug. The `publish.yml` GitHub Action (#305) was wired to trigger on release creation, build both packages, and publish with `npm publish --access public`. The workflow worked — but only after fixing the build order. - The CLI depends on the SDK. If you publish the CLI before the SDK, npm can't resolve `@bradygaster/squad-sdk` as a dependency because it doesn't exist yet (or exists at the wrong version). The fix: build and publish SDK first, then CLI. Sequential, not parallel. - This is the kind of bug you only find by actually publishing. Local `npm run build` works because the workspace resolves packages from disk, not from the registry. CI publishes to the real registry, where order matters. - ## Independent Versioning Going Forward - The v0.8.2 snap was a one-time alignment. Going forward, the SDK and CLI version independently using Changesets: - ```bash npx changeset # describe what changed npx changeset version # bump versions npm publish # push to registry ``` - A CLI bugfix bumps `@bradygaster/squad-cli` without touching the SDK. An SDK feature bumps `@bradygaster/squad-sdk` without touching the CLI. The versions will diverge again — and that's fine. The workspace supports it. What matters is that the *starting point* is clean. - ## What We Learned - - **Version alignment is a release, not a refactor.** We treated the snap as a proper release: tagged commit, npm publish, CI validation. Not a silent `package.json` edit buried in a feature branch. - **Publish order matters in workspaces.** Local builds resolve from disk. CI builds resolve from the registry. If package B depends on package A, publish A first. Always. - **Three versions is two too many for users.** Users don't care about workspace architecture. They see one tool. It should have one version number — or at minimum, version numbers that make sense together. - ## What's Next - With versions aligned and packages on npm, the next challenge is closer to the metal: the adapter layer between Squad and `@github/copilot-sdk` has unsafe type casts that need to go. A P0 bug in Codespaces is about to make that very urgent. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/018-the-adapter-chronicles.md b/docs/src/content/blog/018-the-adapter-chronicles.md index b3860526c..4336e31ea 100644 --- a/docs/src/content/blog/018-the-adapter-chronicles.md +++ b/docs/src/content/blog/018-the-adapter-chronicles.md @@ -7,81 +7,47 @@ tags: [squad, adapter, copilot-sdk, typescript, type-safety, codespace, bug] status: published hero: "A P0 crash in Codespaces led to a 7-issue sprint that eliminated every unsafe cast in Squad's adapter layer. Zero `as any` remaining." --- - # The Adapter Chronicles - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _A P0 crash in Codespaces led to a 7-issue sprint that eliminated every unsafe cast in Squad's adapter layer. Zero `as any` remaining._ - ## The P0 - Issue #315 came in hot. Squad running in GitHub Codespaces threw: - ``` TypeError: sendMessage is not a function ``` - The `@github/copilot-sdk` session object in Codespaces exposes `send()`, not `sendMessage()`. Squad's adapter assumed the method name. In the CLI, the session object happened to have `sendMessage()` (or something close enough that `as any` hid the mismatch). In Codespaces, the mask came off. - This is the failure mode of `as unknown as TargetType` — it compiles, it passes tests in one environment, and it crashes in another. The cast tells TypeScript "trust me." TypeScript trusts you. The runtime doesn't. - ## The CopilotSessionAdapter - The fix for #315 wasn't a one-line method rename. The session API surface differs between environments: - | Method | CLI Session | Codespace Session | |--------|------------|-------------------| | Send message | `sendMessage()` | `send()` | | Listen for events | `on()` returns void | `on()` returns unsubscribe function | | Cleanup | `destroy()` | `close()` | - Patching each call site would mean environment-specific branching scattered across the codebase. Instead, we built `CopilotSessionAdapter` — a wrapper that normalizes the session API: - - `send()` → delegates to whatever the underlying session calls its send method - `on(event, handler)` → always returns an unsubscribe function (wraps if needed) - `destroy()` → calls `close()` or `destroy()` depending on what exists - One adapter. One interface. Every consumer talks to the adapter, never to the raw session. The environment differences are absorbed in one place. - ## The 7-Issue Sprint - With #315 fixed, Brady opened issues #316 through #322 — a systematic sweep of the adapter layer. Each issue targeted a specific category of unsafe code: - **#316 — Unsafe casts in event handlers.** Event callbacks typed as `any`. Replaced with typed payloads for each event. - **#317 — EVENT_MAP.** Built a typed mapping object with 10 entries connecting Squad's internal event names to `@github/copilot-sdk` event names. No more string literals scattered across files. - **#318 — Field mapping.** Agent fields like `name`, `role`, and `expertise` mapped through typed field accessors instead of bracket notation with string keys. - **#319 — Response type casts.** Agent responses cast from `unknown` to expected shapes. Replaced with runtime validation — check the shape, then narrow the type. - **#320 — Session lifecycle.** Startup and shutdown sequences used `as any` to bridge async/sync mismatches. Replaced with proper `async`/`await` and typed return values. - **#321 — Tool registration.** Tool definitions passed to `@github/copilot-sdk` with cast parameters. Replaced with a typed `defineTool()` helper that constructs the correct shape. - **#322 — Dead code removal.** With typed adapters in place, several compatibility shims and fallback paths became unreachable. Removed them. - ## The Result - After the sprint: - - **Zero `as any` in the adapter layer.** Not reduced. Zero. - **Zero `as unknown as` patterns.** The anti-pattern that caused #315 is structurally impossible now. - **EVENT_MAP with 10 typed entries.** Every event has a name, a payload type, and a handler signature. - **CopilotSessionAdapter as the single integration point.** One file mediates between Squad and the SDK. One file to audit. One file to update when the SDK changes. - ## What We Learned - - **`as any` is technical debt with compound interest.** Every unsafe cast works until it doesn't. The cost of finding the failure (P0 in production) dwarfs the cost of typing it correctly from the start. The replatform's strict mode mandate exists for exactly this reason. - **Adapter patterns absorb environmental differences.** The Codespace session isn't wrong. The CLI session isn't wrong. They're different. The adapter's job is to make different things look the same to consumers. Classic GoF, still correct. - **Sprint the sweep.** Seven issues filed and closed in sequence. Not a backlog item that ages for weeks. When you find a category of bugs, sweep the category. Don't fix one and hope the others don't bite. - ## What's Next - The adapter layer is clean. The type system is honest. Now it's time to bring in a feature from the beta that the community has been asking about: remote squad mode. And it comes with a story about team collaboration. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/019-shaynes-remote-mode.md b/docs/src/content/blog/019-shaynes-remote-mode.md index 21a6df5fd..bbeb12b2b 100644 --- a/docs/src/content/blog/019-shaynes-remote-mode.md +++ b/docs/src/content/blog/019-shaynes-remote-mode.md @@ -7,59 +7,32 @@ tags: [squad, remote-mode, community, spboyer, doctor, link, collaboration] status: published hero: "Shayne Boyer built remote squad mode in the beta. We ported it to the replatform — and it taught us something about how teams actually work." --- - # Shayne's Remote Mode - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _Shayne Boyer built remote squad mode in the beta. We ported it to the replatform — and it taught us something about how teams actually work._ - ## The Origin - Back in the beta repo, [@spboyer](https://github.com/spboyer) (Shayne Boyer) opened PR [bradygaster/squad#131](https://github.com/bradygaster/squad/pull/131) with a feature that solved a real problem: what happens when your squad's identity — the charters, decisions, skills, casting policy — lives in a different repository than the project you're working on? - Think about it. A platform team maintains a shared squad configuration. Twelve product teams use that squad. In the beta model, each product team copies `.squad/` into their repo. Now you have twelve copies. Twelve copies that drift. Twelve copies that need manual sync when the platform team updates a decision. - Remote mode says: don't copy. Link. Your project has its own `.squad/` for project-local state (history, workspace config). But the team identity — who the agents are, how they route, what they know — lives somewhere else. One source of truth. Twelve projects pointing at it. - ## What We Ported - Issues #311 through #314 adapted Shayne's design for the replatform's TypeScript architecture: - **`resolveSquadPaths()` — Dual-root resolver (#311).** The core primitive. Given a project directory, resolve two paths: the project-local `.squad/` (for workspace state) and the team root (for identity). If no remote link exists, both paths point to the same place. If a link exists, they diverge. - **`squad doctor` — Setup validation (#312).** Nine checks with emoji output. Does `.squad/` exist? Is it linked? Can Squad reach the team root? Are charters loadable? Is the SDK version compatible? Doctor doesn't fix things — it tells you what's wrong so you can fix it. The output is deliberately human-readable, not machine-parseable. - **`squad link ` — Link a project (#313).** Point your project at a remote team root. The command writes a `.squad/.remote` config file with the path. From that point, Squad resolves team identity from the linked location. - **`squad init --mode remote` — Initialize with remote config (#313).** Like `squad init`, but sets up the dual-root structure from the start. Creates the local `.squad/` directory and the `.remote` config in one step. - **`ensureSquadPathDual()` / `ensureSquadPathResolved()` — Write guards (#314).** The replatform's `ensureSquadPath()` guard validates that `.squad/` exists before writing. The dual-root variants extend this to check both the local path (for workspace writes) and the team root (for identity reads). Writes always go local. Reads resolve through the chain. - ## Credit Where It's Due - This feature is Shayne's. The design — separate identity from workspace, link don't copy, resolve through a chain — came from his PR. The replatform ported the concept into TypeScript with strict typing, added the doctor command and write guards, and integrated it with the dual-root resolver pattern. But the idea, the insight that teams need shared squad identities across projects, was Shayne's contribution to the beta. - The CHANGELOG entry reads: - > **Added — Remote Squad Mode (ported from @spboyer's [bradygaster/squad#131](https://github.com/bradygaster/squad/pull/131))** - That's not a courtesy attribution. That's accurate history. - ## How Teams Actually Work - Remote mode revealed something about how development teams use Squad in practice. The assumption was: one repo, one squad. The reality: - - **Platform teams** maintain squad configurations that flow to product teams - **Consultancies** share a methodology squad across client projects - **Open source maintainers** publish a squad configuration that contributors link to - **Enterprise teams** post-acquisition merge two squad configurations into one - The common thread: identity is shared, workspace is local. Decisions, skills, and casting policy are organizational. History and runtime state are per-project. Remote mode makes that separation explicit and manageable. - ## By the Numbers - | Metric | Value | |--------|-------| | Issues | #311–#314 | @@ -67,17 +40,11 @@ The common thread: identity is shared, workspace is local. Decisions, skills, an | New commands | 3 (doctor, link, init --mode remote) | | Doctor checks | 9 | | New SDK functions | 4 (resolveSquadPaths, ensureSquadPathDual, ensureSquadPathResolved, plus init mode) | - ## What We Learned - - **Port the design, not just the code.** Shayne's beta implementation was JavaScript. We didn't transliterate it to TypeScript. We understood the design — dual roots, link-not-copy, resolve chain — and re-implemented it with the replatform's patterns (strict types, write guards, constants). - **Doctor commands pay for themselves.** Nine checks. Three minutes to run. Saves hours of debugging when something is misconfigured. Every CLI tool should have a doctor command. - **Credit the origin.** Remote mode works because Shayne saw the problem first. Open source runs on attribution. When you port someone's feature, say so. - ## What's Next - Remote mode completes the replatform's feature set. What's left is the biggest docs effort yet — restructuring everything we've built into a site that developers can actually navigate. 77 pages. 6 sections. The great docs restructure. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/020-docs-reborn.md b/docs/src/content/blog/020-docs-reborn.md index ef55a3a92..6eabc3e86 100644 --- a/docs/src/content/blog/020-docs-reborn.md +++ b/docs/src/content/blog/020-docs-reborn.md @@ -7,26 +7,14 @@ tags: [squad, docs, github-pages, restructure, dark-mode, search, site-generator status: published hero: "77 pages across 6 sections. Dark mode. Client-side search. Sidebar navigation. The beta's best UI, rebuilt for v1's content. Squad's docs are a real site now." --- - # Docs Reborn - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - > _77 pages across 6 sections. Dark mode. Client-side search. Sidebar navigation. The beta's best UI, rebuilt for v1's content. Squad's docs are a real site now._ - ## What Happened - The replatform created a documentation problem. Three waves of development produced 5 guides, an SDK API reference, and a CHANGELOG. The beta repo had 21 scenario docs, 23 feature docs, and 5 top-level guides. All of it lived in markdown files scattered across two repositories, two directory structures, and two naming conventions. - Developers couldn't find anything. The v1 guides were in `docs/guide/`. The beta scenarios were in a different repo entirely. There was no navigation, no search, no way to browse. If you didn't know the exact file path, you didn't find the doc. - The restructure fixed all of it. - ## The New Structure - Six sections, each with a clear purpose: - | Section | What's In It | Pages | |---------|-------------|-------| | **Getting Started** | First session, installation, configuration, migration from beta | ~10 | @@ -35,45 +23,28 @@ Six sections, each with a clear purpose: | **Features** | Upstream inheritance, marketplace, skills, and all 23 feature docs from beta | ~25 | | **Scenarios** | All 21 scenario docs from beta — real-world usage patterns | 21 | | **Blog** | This blog. The project's story, told chronologically | 20+ | - The directory structure mirrors the sections: `docs/guide/`, `docs/cli/`, `docs/sdk/`, `docs/features/`, `docs/scenarios/`, `docs/blog/`. The build script walks each directory and generates navigation automatically. Add a markdown file, run `node docs/build.js`, and it appears in the sidebar. - ## Porting the Beta UI - The beta site (from `bradygaster/squad`) had a good-looking docs UI: dark mode, sidebar navigation, search, responsive layout. We ported it wholesale to the replatform. - **Dark mode** uses CSS custom properties with `prefers-color-scheme` detection and a manual toggle. The theme persists in `localStorage` under the `squad-theme` key. Three states: auto (follows system), dark, light. Toggle button shows ☀️, 🌙, or 💻. - **Search** is client-side. The build script generates a JSON search index — title, href, and a text preview for every page. The search box filters the index in real time and shows a dropdown of matching results. No server. No Algolia. No API keys. Just JavaScript and a JSON array. - **Sidebar navigation** is generated from the directory structure. Each section becomes a `
` element (collapsible). Pages within sections are alphabetically ordered. The current page is highlighted. On mobile, the sidebar slides in from the left. - Credit to [@spboyer](https://github.com/spboyer) for the original beta site CSS and JS patterns that we ported. - ## The Tone Pass - Every ported document got a tone pass: - - **Removed "⚠️ INTERNAL ONLY" banners.** The v1 docs are public now. - **Updated CLI commands.** `npx github:bradygaster/squad` → `npx @bradygaster/squad-cli`. npm is the only distribution path. - **Preserved the beta voice.** The scenario docs and feature docs were written in a conversational, prompt-first style. We kept that. No corporate rewrite. - 62 documents. Each one touched. The goal was consistency without homogeneity — every doc should feel like it belongs on the same site without every doc sounding like the same author wrote it. - ## The GitHub Pages Pipeline - The `.github/workflows/squad-docs.yml` workflow deploys to GitHub Pages on every push to `main`: - 1. Checkout repo 2. `npm ci` (install dependencies) 3. `npm run docs:build` (runs `node docs/build.js`) 4. Upload `docs/dist/` as artifact 5. Deploy to GitHub Pages - Build time is under 10 seconds for 77 pages. The site updates within minutes of a merge to main. No manual deployment. No staging server. Push markdown, get a website. - ## By the Numbers - | Metric | Value | |--------|-------| | Total pages | 77 | @@ -83,19 +54,12 @@ Build time is under 10 seconds for 77 pages. The site updates within minutes of | Build time | <10 seconds | | Framework | Custom (markdown-it + build.js) | | Tests passing | 2,232 across 85 test files | - ## What We Learned - - **Port the content, not just the structure.** Downloading 49 markdown files from the beta repo was the easy part. The tone pass — updating commands, removing internal markers, fixing URLs — took longer than the download. Content migration is editorial work, not file copying. - **Scenario-first organization works.** The six-section structure puts "what can I do with this?" (scenarios, features) ahead of "how does this work?" (SDK, CLI). Developers browse scenarios first and drill into reference material when they need specifics. - **Dark mode is table stakes.** The beta site had it. The v1 site has it. Every developer docs site should have it. It's not a feature. It's an expectation. - ## What's Next - This is where the replatform blog catches up to the present. Eight posts covering the full arc: from the decision to rewrite, through three waves of development, a version alignment, an adapter hardening sprint, a community contribution, and a docs restructure. - The foundation is solid. The docs are live. The CLI is published. The SDK is typed. The adapter is clean. What comes next is what the community builds on top of it. - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/021-the-migration.md b/docs/src/content/blog/021-the-migration.md index eafc2c838..8eb82f23b 100644 --- a/docs/src/content/blog/021-the-migration.md +++ b/docs/src/content/blog/021-the-migration.md @@ -7,203 +7,133 @@ tags: [squad, migration, npm, distribution, github, public-repo, release] status: published hero: "Squad moves from private repo (bradygaster/squad-pr) to public (bradygaster/squad). New install path. Unified distribution. v0.5.4 → v0.8.18. Here's what changed and how to upgrade." --- - # The Migration: Private to Public, GitHub to npm - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - > _Squad moves from private repo (bradygaster/squad-pr) to public (bradygaster/squad). New install path. Unified distribution. v0.5.4 → v0.8.18. Here's what changed and how to upgrade._ - ## What Moved - The Squad SDK has moved from a **private repository** (`bradygaster/squad-pr`) to a **public repository** (`bradygaster/squad`). This is a clean separation between: - - **Old distribution:** GitHub-native (`npx github:bradygaster/squad`) — removed. No longer supported. - **Old versioning:** Beta users tracked commits in a private repo; no semantic versioning. - **Old packages:** A monolithic `@bradygaster/create-squad` package bundled the CLI and SDK. - ## What Changed for Users - ### Install Commands - **Beta users (v0.5.4) on the old path:** ```bash # DEPRECATED — do not use npx github:bradygaster/squad ``` - **New users and upgraders (v0.8.18) on npm:** ```bash # Install globally npm install -g @bradygaster/squad-cli - # Or use npx (no install) npx @bradygaster/squad-cli ``` - **For SDK integration in TypeScript projects:** ```bash npm install @bradygaster/squad-sdk ``` - ### Package Names - | Aspect | Beta | Current | |--------|------|---------| | CLI package | `@bradygaster/create-squad` | `@bradygaster/squad-cli` | | SDK package | bundled in CLI | `@bradygaster/squad-sdk` | | Distribution | GitHub-native (no versioning) | npm (semver: latest, insider) | | Repository | private | [bradygaster/squad](https://github.com/bradygaster/squad) (public) | - ### Why the Migration? - The move to npm and public distribution gives you: - - **Faster installs** — npm cache; no git clone on every run - **Semantic versioning** — explicit versions, not git commits - **Channels** — `latest` for stable, `@insider` for bleeding-edge - **Standard dependency management** — works with npm, yarn, pnpm - **Public collaboration** — anyone can file issues, contribute, fork - ## For Beta Users: How to Upgrade - You're on v0.5.4 with `@bradygaster/create-squad`. The jump to v0.8.18 is significant—features and APIs have evolved. Here's the upgrade path: - ### Step 1: Uninstall the old package - ```bash npm uninstall -g @bradygaster/create-squad ``` - ### Step 2: Install the new CLI - ```bash npm install -g @bradygaster/squad-cli ``` - ### Step 3: In your existing project, upgrade Squad files - If you have a `.squad/` directory (or the old `.ai-team/`), run: - ```bash squad upgrade ``` - This updates Squad-owned files (templates, core configs) **without touching your team state** (agents, history, decisions). Your custom changes are preserved. - **Optional:** If you're migrating from `.ai-team/` to `.squad/`, use: - ```bash squad upgrade --migrate-directory ``` - ### Step 4: Verify your setup - ```bash squad doctor ``` - This checks your environment, Node.js version, GitHub auth, and squad configuration. It reports warnings if anything's amiss. - ### Step 5: Start working - ```bash copilot ``` - In GitHub Copilot CLI, type `/agent` and select **Squad**. Or in VS Code, type `/agents` and select **Squad**. Then: - ``` I'm continuing a project. Here's what I need: [your task] ``` - **See the full migration guide:** [`docs/get-started/migration.md`](../get-started/migration.md) - ## For New Users: Getting Started - Never used Squad? Start here: - ### 1. Install Squad CLI - ```bash npm install -g @bradygaster/squad-cli ``` - Or use npx without installing: - ```bash npx @bradygaster/squad-cli ``` - ### 2. Create a project directory - ```bash mkdir my-squad-project && cd my-squad-project git init ``` - ### 3. Initialize Squad - ```bash squad init ``` - This scaffolds `.squad/` with team configuration, agent templates, and routing rules. Everything is editable and committed to git. - ### 4. Authenticate with GitHub - ```bash gh auth login ``` - This lets Squad access your Issues, PRs, and Projects. Required for features like triage, the Copilot coding agent, and project monitoring with Ralph. - ### 5. Open Copilot and talk to your team - ```bash copilot ``` - In the Copilot CLI, type `/agent` and select **Squad**. Then: - ``` I'm starting a new project. Here's what I'm building: a React + Node API with user auth and dark mode. ``` - Squad proposes a team (Lead, Frontend, Backend, Tester, Scribe), you say yes, and they're ready. Describe the work. They execute it. Messages, decisions, and history persist in `.squad/` — commit it, share it, iterate on it. - **Full guide:** [`README.md`](https://github.com/bradygaster/squad/blob/main/README.md) | **Samples:** [`samples/`](https://github.com/bradygaster/squad/tree/main/samples) - ## The Version Jump: v0.5.4 → v0.8.18 - You might notice the version leap. Here's why: - - **v0.5.x (beta)** — Private repo, feature experiments, no stable SemVer - **v0.6.x** — Replatform begins (SDK separation, hook pipeline, cost tracking) - **v0.7.x** — Three development waves (orchestration, observability, docs) - **v0.8.x (current)** — Unified, public, semver-stable - You're not jumping over broken versions. You're joining the stable channel of a mature codebase. Read the [CHANGELOG.md](https://github.com/bradygaster/squad/blob/main/CHANGELOG.md) if you want the full arc. - ## Links - - **Public repository:** [`bradygaster/squad`](https://github.com/bradygaster/squad) - **Migration guide:** [`docs/get-started/migration.md`](../get-started/migration.md) - **README with full install methods:** [`README.md`](https://github.com/bradygaster/squad/blob/main/README.md) - **Samples:** [`samples/`](../../samples/) — hello-squad, knock-knock, rock-paper-scissors, streaming-chat, hook-governance, and more - **Getting started guide:** `docs/guide/getting-started.md` (coming soon) - ## What's Next - The public repo is live. npm distribution is stable. Docs are rebuilt. The team is ready to grow. - If you hit issues: - **[File a bug](https://github.com/bradygaster/squad/issues/new)** — Issues are public. We read them. - **[Start a discussion](https://github.com/bradygaster/squad/discussions)** — Ideas, questions, feedback. - **[Check the docs](https://github.com/bradygaster/squad#what-is-squad)** — migration guides, scenarios, reference. - Welcome to the public Squad. Let's build. - --- - _This post was written by McManus, DevRel on Squad's team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try it →](https://github.com/bradygaster/squad)_ diff --git a/docs/src/content/blog/022-welcome-to-the-new-squad.md b/docs/src/content/blog/022-welcome-to-the-new-squad.md index 2fcbb1029..2cc7e790d 100644 --- a/docs/src/content/blog/022-welcome-to-the-new-squad.md +++ b/docs/src/content/blog/022-welcome-to-the-new-squad.md @@ -7,121 +7,79 @@ tags: [squad, v0.8.18, release, launch, sdk, cli, typescript, samples, migration status: published hero: "Squad v0.8.18 is here — a full TypeScript rewrite, npm distribution, 16 CLI commands, 8 SDK samples, 2200+ tests, and a feature set that turns multi-agent development from experiment to production tool." --- - # Welcome to the New Squad - > Blog post #22 — The complete guide to what changed from beta to v0.8.18. - ## The Big Picture - -Squad started as a scrappy beta experiment. A few scripts, some clever prompting, and a vision: what if you could build an AI team that lived in your repo, learned your codebase, and got better with every session? - +Squad started as a scrappy beta experiment. A few scripts, some clever prompting, and a vision: what if you could build an augmented team that lived in your repo, learned your codebase, and got better with every session? Today, Squad is a proper npm-distributed, TypeScript-strict, fully-tested multi-agent runtime. It's not just a rewrite — it's a ground-up rebuild of everything we learned from six months of usage, feedback, and production deployments. - Here's what shipped: - **Quick Stats:** - ✅ Full TypeScript rewrite (strict mode, no `any` escapes) - ✅ 16 CLI commands (init, doctor, triage, shell, aspire, and more) -- ✅ 8 SDK samples (hello-world to full autonomous pipelines) +- ✅ 8 SDK samples (hello-world to full background pipelines) - ✅ 2200+ tests across 613 test files - ✅ npm-only distribution (`@bradygaster/squad-cli`) - ✅ Global install: `npm install -g @bradygaster/squad-cli` - ✅ OpenTelemetry integration (traces + metrics → Aspire dashboard) - ✅ Security hardening (CWE-78 fixes, PII scrubbing, governance hooks) - ✅ Semantic versioning compliance (`X.Y.Z-preview.N`) - If you were on the beta (v0.5.x), this is your upgrade moment. If you're new to Squad, this is the best time to jump in. - --- - ## How to Get It - Three lines: - ```bash npm install -g @bradygaster/squad-cli cd your-project squad init ``` - **Or run without installing:** - ```bash npx @bradygaster/squad-cli ``` - **Upgrading from beta?** Check the [Migration Guide](../get-started/migration.md) for step-by-step upgrade instructions covering 9 scenarios from brand-new users to CI/CD pipelines. - --- - ## What Changed — The Headlines - Here's the side-by-side comparison of beta vs. v0.8.18: - | Area | Beta (v0.5.x) | New (v0.8.18) | |------|---------------|---------------| | **Language** | JavaScript | TypeScript (strict mode) | | **Distribution** | `npx github:bradygaster/squad` | `npm install -g @bradygaster/squad-cli` | | **Packages** | Monolithic | SDK + CLI (independent versioning) | | **Config** | JSON only | Markdown + optional `squad.config.ts` | -| **Shell** | Basic | Rich REPL with streaming, sessions, keyboard shortcuts | +| **CLI experience** | Basic | Rich command set, monitoring, and Copilot CLI integration | | **Testing** | Minimal | 2200+ tests across 613 files | | **Observability** | None | OpenTelemetry + Aspire dashboard | | **Security** | Basic | CWE-78 fixes, PII scrubbing, governance hooks | | **Versioning** | Inconsistent | Semver compliant (`X.Y.Z-preview.N`) | | **Documentation** | Sparse | Comprehensive docs, samples, and tutorials | - The beta was a proof of concept. v0.8.18 is the production-ready runtime. - --- - ## The CLI — Your New Command Center - The CLI went from "a few scripts" to "a full terminal experience." Here's what you get: - ### 1. **Global Install** — `squad` Works Everywhere - No more `npx github:` commands. Install once, run anywhere: - ```bash npm install -g @bradygaster/squad-cli squad --version ``` - That's it. Squad is now a first-class terminal command. - -### 2. **Interactive Shell** — Live Multi-Agent REPL - -Run `squad` with no arguments and you drop into a live terminal: - +### 2. **GitHub Copilot CLI** — Live Multi-Agent Collaboration +Launch Squad through GitHub Copilot CLI for the recommended conversational experience: ```bash -squad -``` - -You get a prompt: - -``` -squad > +copilot --agent squad ``` - **What you can do:** - Address agents by name: `@Ripley, fix the auth bug` -- Run slash commands: `/status`, `/agents`, `/history`, `/quit` +- Start conversational work immediately - Watch responses stream token-by-token in real time -- Navigate history with arrow keys -- Tab completion for agents and commands - -No more typing `squad` before every command. The shell is stateful, context-aware, and built for conversation. - +- Keep the same team context across follow-up prompts +- Route requests directly to specialists when you need them +No extra wrapper shell required — just open Squad in Copilot CLI and keep the conversation moving. ### 3. **`squad doctor`** — 9-Check Health Validation - Something feels off? Run the doctor: - ```bash squad doctor ``` - Nine checks run instantly: - ✅ Node.js version (≥ 20) - ✅ `gh` CLI installed and authenticated @@ -132,137 +90,88 @@ Nine checks run instantly: - ✅ Directory write permissions - ✅ Config file syntax - ✅ No orphaned files - If anything fails, you get a clear error message and suggested fix. No more guessing. - ### 4. **`squad start --tunnel`** — Stream Your Terminal to Your Phone - Demoing Squad to a remote audience? Run: - ```bash squad start --tunnel ``` - You get a QR code. Scan it with your phone. Your terminal output streams live to a web browser — perfect for presentations, demos, and remote debugging. - ### 5. **`squad triage`** — Auto-Scan GitHub Issues - Connect Squad to your GitHub repo and watch it auto-triage: - ```bash squad triage ``` - Squad scans open issues, labels them by category (bug/feature/docs/security), and assigns them to the right agent based on their role and skills. Runs continuously — every 10 minutes by default. - ```bash squad triage --interval 5 # Poll every 5 minutes ``` - -Your backlog manages itself. - +Your backlog stays moving with agent help and your guidance. ### 6. **`squad copilot`** — Add @copilot as a Team Member - -Want the GitHub Copilot coding agent to pick up issues autonomously? Add it to your team: - +Want the GitHub Copilot coding agent to pick up approved issues in the background? Add it to your team: ```bash squad copilot ``` - This adds `@copilot` as an agent in `.squad/team.md` and configures auto-assignment. When Squad triages an issue, @copilot picks it up, creates a branch, writes code, runs tests, and opens a PR. - Remove it with: - ```bash squad copilot --off ``` - ### 7. **Dual-Root Mode** — Org-Wide Team Sharing - Most teams want one Squad per repo. But some organizations want to share a team identity across projects. Dual-root mode gives you both: - ```bash squad init --mode remote ~/my-org-team ``` - This stores agent charters and team config in `~/my-org-team/.squad/`, while project-specific state lives in `./squad/`. Agents are consistent across repos, but each project has its own decisions, skills, and context. - Perfect for agencies, consulting teams, and orgs with standardized agent roles. - ### 8. **Plugin Marketplace** — Community Skill Packs - Extend Squad with community-built skill packs: - ```bash squad plugin marketplace list squad plugin marketplace add https://github.com/user/squad-skills ``` - Skills are markdown files that agents read to learn domain patterns. A "TypeScript Best Practices" skill pack might teach agents to prefer `unknown` over `any`, use strict mode, and avoid non-null assertions. - ### 9. **`squad export`/`import`** — Portable Squad Snapshots - Need to share your team config with a colleague or back up your setup? - ```bash squad export > my-squad.json squad import my-squad.json ``` - The export includes team roster, agent charters, decisions, skills, and config. Import into any repo and your team comes alive instantly. - ### 10. **`squad aspire`** — Observability Dashboard - Squad exports OpenTelemetry traces and metrics. Launch the Aspire dashboard to see: - ```bash squad aspire ``` - - Agent spawn/destroy events - Token usage per agent, per session - Task duration histograms - Session lifecycle logs - Cost tracking in real time - Hook it up to Jaeger, Aspire, or any OTLP-compatible backend. - ### 11. **`squad upgrade`** — Safe Upgrades That Never Touch Your Team State - Upgrading used to be scary. Not anymore: - ```bash squad upgrade ``` - This updates Squad-owned files (templates, CLI scripts, SDK code) but never touches: - `.squad/agents/**` (your agent charters) - `.squad/decisions.md` (architectural decisions) - `.copilot/skills/**` (learned patterns) - `.squad/history/**` (session logs) - Your team's memory is sacred. Upgrades respect that. - --- - ## The SDK — Build Your Own Multi-Agent Apps - The CLI is built on the SDK. The SDK is the runtime. You can use it directly to build custom multi-agent applications. - **Two packages, independent versioning:** - `@bradygaster/squad-sdk` — the runtime (casting, streaming, governance, cost tracking) - `@bradygaster/squad-cli` — the interface (commands, REPL, triage loop) - They evolve separately. CLI features can ship without breaking SDK consumers. - **Install the SDK:** - ```bash npm install @bradygaster/squad-sdk ``` - **Key SDK Capabilities:** - | Component | What It Does | |-----------|-------------| | **CastingEngine** | Deterministic, themed agent naming. Cast a team from "The Usual Suspects" or "The Avengers." Names persist across sessions. | @@ -272,51 +181,34 @@ npm install @bradygaster/squad-sdk | **SkillRegistry** | Runtime pattern discovery. Agents write `SKILL.md` files, share knowledge across sessions. | | **EventBus** | Pub/sub for cross-agent communication. Subscribe to `session:created`, `message:sent`, `cost:threshold`. | | **SessionPool** | Managed Copilot session lifecycle. Reuse sessions across messages, clean up on shutdown. | - **TypeScript-first:** - Every interface, type, and function is fully typed. No `any` escapes. If it compiles, it's correct. - --- - ## The Samples — See It In Action - Eight samples ship with v0.8.18. Each one demonstrates a slice of the SDK. Run them, read the code, learn the patterns. - ### 1. **hello-squad** (Beginner) - **What it does:** Cast a themed team from The Usual Suspects universe. Watch four agents materialize with deterministic names. The casting engine's "hello world." - **What you'll see:** ``` 🎭 Keyser — Lead Personality: Quietly commanding; sees the whole board before anyone else. - 🎭 McManus — Developer Personality: Sharp, precise, always three steps ahead. - 🎭 Fenster — Tester Personality: Unpredictable, curious, finds edge cases by instinct. - 🎭 Verbal — Scribe Personality: The storyteller; connects dots across sessions. ``` - **Difficulty:** Beginner **SDK APIs:** `resolveSquad()`, `CastingEngine.castTeam()`, `onboardAgent()` - **Run it:** ```bash cd samples/hello-squad npm install && npm start ``` - --- - ### 2. **knock-knock** (Intermediate) - **What it does:** Two Copilot sessions trade live knock-knock jokes, streaming token-by-token. Demonstrates `SquadClientWithPool`, casting, and `StreamingPipeline`. - **What you'll see:** ``` 🎭 Agent 1: Knock knock. @@ -325,35 +217,25 @@ npm install && npm start 🎭 Agent 2: Lettuce who? 🎭 Agent 1: Lettuce in, it's cold out here! 🥶 ``` - Responses stream word-by-word in real time. Watch the jokes build character by character. - **Difficulty:** Intermediate **SDK APIs:** `SquadClientWithPool`, `CastingEngine`, `StreamingPipeline`, `SessionPool`, `EventBus` - **Run it:** ```bash cd samples/knock-knock npm install && npm start ``` - --- - ### 3. **rock-paper-scissors** (Advanced) - **What it does:** Nine strategic agents battle in a tournament. Rocky always throws rock. Sherlock analyzes opponent history to predict and counter moves. A live leaderboard tracks wins. - **What you'll see:** ``` Match: Rocky vs. Sherlock — Round 1 - Rocky throws: rock Sherlock analyzes opponent history... Sherlock plays strategically: paper Scorekeeper: Rocky throws rock AGAIN! Sherlock counters with paper. Strategic victory! 📄 > 🪨 - Sherlock: 1 win | Rocky: 0 wins - [After 10 rounds] 🏆 LEADERBOARD 🏆 1. Sherlock 🔍: 12 wins @@ -362,63 +244,46 @@ Sherlock: 1 win | Rocky: 0 wins 4. Echo 🦜: 5 wins 5. Rocky 🪨: 2 wins ``` - Watch the learning agent adapt. Sherlock's win rate climbs as it detects patterns. - **Difficulty:** Advanced **SDK APIs:** `SquadClientWithPool`, `SessionPool`, `StreamingPipeline`, `EventBus`, system prompts - **Run it:** ```bash cd samples/rock-paper-scissors npm install && npm start ``` - --- - ### 4. **hook-governance** (Intermediate) - **What it does:** Four governance hooks in action. File-write guards block `/etc/passwd`. PII scrubber redacts emails. Reviewer lockout prevents self-revision. Rate limiter caps user interruptions. - **What you'll see:** ``` 🛡️ hook-governance — Squad SDK governance hooks sample - ──────────────────────────────────────────────────────────── Demo 1 — File-Write Guards ──────────────────────────────────────────────────────────── Write to src/utils/helper.ts: allow ✅ Write to /etc/passwd: block 🚫 Reason: File write blocked: "/etc/passwd" does not match allowed paths - ──────────────────────────────────────────────────────────── Demo 2 — PII Scrubbing ──────────────────────────────────────────────────────────── Before: Deploy fix by brady@example.com — cc: alice@company.io After: Deploy fix by [EMAIL_REDACTED] — cc: [EMAIL_REDACTED] ``` - **Rules as code, not prompts.** Hooks enforce policy at runtime. No LLM reasoning required. - **Difficulty:** Intermediate **SDK APIs:** `HookPipeline`, `addPreToolHook()`, `addPostToolHook()`, `getReviewerLockout()`, `PolicyConfig` - **Run it:** ```bash cd samples/hook-governance npm install && npm start ``` - --- - ### 5. **streaming-chat** (Intermediate) - -**What it does:** Three agents respond to keyword-routed messages in real time. Type "design an API" → Backend responds. Type "add dark mode" → Frontend delivers. Token-by-token streaming. - +**What it does:** Three agents respond to keyword-routed messages in real time. Ask for an API design → Backend responds. Ask for dark mode → Frontend delivers. Token-by-token streaming. **What you'll see:** ``` -squad > design a REST API for recipes - +design a REST API for recipes Backend (McManus) responding... I recommend a resource-based API with these endpoints: - GET /recipes — list all recipes @@ -426,100 +291,74 @@ I recommend a resource-based API with these endpoints: - POST /recipes — create new recipe - PUT /recipes/:id — update recipe - DELETE /recipes/:id — remove recipe - Use JSON for all payloads. Add pagination with ?page= and ?limit=. ``` - **Difficulty:** Intermediate **SDK APIs:** `SquadClient`, `createSession()`, `StreamingPipeline`, `onDelta()`, `onUsage()`, `EventBus` - **Run it:** ```bash cd samples/streaming-chat npm install && npm start ``` - --- - ### 6. **cost-aware-router** (Beginner) - **What it does:** Five tasks flow through a cost-optimized router. Typo fix → Direct tier (cheapest). Architecture review → Full tier (premium). A budget bar fills up with warnings at 70% and 90%. - **What you'll see:** ``` Task 1: Fix typo in README → Tier: direct (no LLM needed) → Cost: $0.00 → Budget: $0.00 / $0.50 [██░░░░░░░░] 0% - Task 2: Update docs → Tier: lightweight (fast model) → Cost: $0.02 → Budget: $0.02 / $0.50 [██░░░░░░░░] 4% - Task 3: Implement feature → Tier: standard (standard model) → Cost: $0.15 → Budget: $0.17 / $0.50 [████░░░░░░] 34% - Task 4: Architecture review → Tier: full (premium model) → Cost: $0.25 → Budget: $0.42 / $0.50 [████████░░] 84% ⚠️ Budget warning: 84% consumed ``` - **Difficulty:** Beginner **SDK APIs:** `CostTracker`, `selectResponseTier()`, `getTier()`, `recordUsage()`, `formatSummary()` - **Run it:** ```bash cd samples/cost-aware-router npm install && npm start ``` - --- - ### 7. **skill-discovery** (Intermediate) - **What it does:** Agents load domain knowledge from SKILL.md files, match skills to tasks, and discover NEW patterns at runtime. Confidence tracks from low → medium → high as patterns are confirmed. - **What you'll see:** ``` 📚 Loading skills from .copilot/skills/ - ✅ Loaded: TypeScript Patterns (confidence: high 🟢) Triggers: typescript, types, generics Roles: developer, lead - ✅ Loaded: Testing Best Practices (confidence: medium 🟡) Triggers: test, coverage, mock Roles: tester - 🔍 Matching skills to task: "Fix TypeScript build error" Matched: TypeScript Patterns (confidence: high) Reason: Trigger match: "typescript" ``` - **Difficulty:** Intermediate **SDK APIs:** `SkillRegistry`, `loadSkillsFromDirectory()`, `matchSkills()`, `parseSkillFile()` - **Run it:** ```bash cd samples/skill-discovery npm install && npm start ``` - --- - ### 8. **autonomous-pipeline** (Advanced) - -**What it does:** THE showcase. A 10-task dev pipeline runs autonomously. Agents pick up work, route blockers, record decisions, accumulate learnings. A live dashboard shows who's working on what. - +**What it does:** THE showcase. A 10-task dev pipeline runs in the background. Agents pick up work, route blockers, record decisions, accumulate learnings. A live dashboard shows who's working on what. **What you'll see:** ``` 🎬 autonomous-pipeline — Squad SDK Showcase - ──────────────────────────────────────────────────────────── Casting Team from "The Usual Suspects" ──────────────────────────────────────────────────────────── @@ -527,7 +366,6 @@ npm install && npm start 🎭 McManus — Developer 🎭 Fenster — Tester 🎭 Verbal — Scribe - ──────────────────────────────────────────────────────────── Task Queue (10 tasks) ──────────────────────────────────────────────────────────── @@ -541,17 +379,14 @@ npm install && npm start 8. [ ] Write migration guide 9. [ ] Update README 10. [ ] Record architecture decisions - ──────────────────────────────────────────────────────────── - Autonomous Execution + Background Execution ──────────────────────────────────────────────────────────── McManus picked up: Design REST API McManus routed to Fenster: Write unit tests Fenster recorded decision: Use RS256 for JWT Verbal accumulated learning: Pool size 20 optimal - [Live dashboard updates in real time] - ──────────────────────────────────────────────────────────── Final Report ──────────────────────────────────────────────────────────── @@ -560,159 +395,108 @@ npm install && npm start 🪙 Tokens: 45,230 input / 12,890 output 📊 OTel traces exported to Aspire dashboard ``` - This is the "wow demo" — everything Squad can do in one running script. - **Difficulty:** Advanced **SDK APIs:** `SquadClient`, `CastingEngine`, `HookPipeline`, `CostTracker`, `EventBus`, `StreamingPipeline`, `resolveSquad()`, `createSession()` - **Run it:** ```bash cd samples/autonomous-pipeline npm install && npm start ``` - --- - ## Under the Hood — What We Rebuilt - ### TypeScript Strict Mode - Everything is typed. No `any` escapes. No implicit `any`. No index access without guards. If it compiles, it's correct. - **What this means for SDK consumers:** - Autocomplete works everywhere - Type errors caught at compile time - Refactoring is safe - Documentation lives in the types - The SDK exports fully-typed interfaces for every concept: agents, sessions, tiers, hooks, skills, events. - ### Semver Done Right - Beta versions were a mess: `0.8.5.1-preview`, `0.8.6.1`, inconsistent formats. v0.8.18 follows the spec: - **Correct format:** `X.Y.Z-preview.N` **Example:** `0.8.18-preview.1` - Prerelease identifier comes after patch, build metadata comes after prerelease. No more confusion. - **What this enables:** - Proper dependency resolution in npm - Correct version comparisons - Predictable upgrade paths - Semver-compliant tooling support - ### Security Hardening - #### CWE-78 Fixes - Beta used template strings with `execSync`. This was a shell injection risk. v0.8.18 uses `execFileSync` with array args: - **Before (unsafe):** ```typescript execSync(`git commit -m "${message}"`); ``` - **After (safe):** ```typescript execFileSync('git', ['commit', '-m', message]); ``` - No shell interpolation. No injection vectors. - #### PII Scrubbing - Email addresses leak into logs, tool output, and session history. v0.8.18 includes automatic PII scrubbing: - ```bash squad scrub-emails ``` - This redacts all emails in `.squad/` files: - `brady@example.com` → `[EMAIL_REDACTED]` - Works recursively on all markdown and JSON files - Safe to commit the scrubbed state - #### Governance Hooks - Policy as code. Hooks run before and after every tool call: - - **Pre-tool hooks:** Block file writes outside allowed paths - **Post-tool hooks:** Redact PII from output - **Custom hooks:** Add audit logging, quota enforcement, content filters - No LLM reasoning required. Rules are code, not prompts. - ### CRLF Normalization - Windows users with `core.autocrlf=true` saw parser failures because `\r\n` line endings broke markdown frontmatter parsing. v0.8.18 normalizes all line endings before parsing. - **What changed:** - All 8 parsers now call `normalizeEol()` first - `\r\n` → `\n` before regex matching - Windows/Mac/Linux all parse identically - No more "works on my machine" bugs. - ### OpenTelemetry Integration - Squad exports traces and metrics via OpenTelemetry. Three-layer API: - 1. **Low-level:** `otel.ts` — raw OTel SDK wrappers 2. **Bridge:** `otel-bridge.ts` — Squad-specific spans and metrics 3. **Init:** `otel-init.ts` — auto-configure based on `OTEL_EXPORTER_OTLP_ENDPOINT` - **What you can track:** - Agent spawn/destroy events - Session lifecycle (created/resumed/closed) - Token usage per agent, per session - Task duration histograms - Cost accumulation in real time - **Export to:** - Aspire Dashboard (`squad aspire`) - Jaeger - Zipkin - Any OTLP-compatible backend - **Example:** ```bash export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 squad aspire ``` - Open `http://localhost:18888` to see your multi-agent pipeline visualized. - --- - ## Migrating from Beta - If you're on beta (v0.5.x), here's the upgrade path: - ### Quick Migration - ```bash # 1. Back up your current squad cp -r .squad/ .squad-backup/ - # 2. Uninstall old beta npm uninstall -g @bradygaster/create-squad - # 3. Install new CLI npm install -g @bradygaster/squad-cli - # 4. Reinitialize (this will detect and migrate your .squad/ directory) squad init - # 5. Verify squad doctor ``` - ### Full Migration Guide - The [Migration Guide](../get-started/migration.md) covers 9 scenarios: - 1. Brand new user 2. Upgrading from v0.5.4 beta 3. Already on v0.8.x via npm @@ -722,81 +506,56 @@ The [Migration Guide](../get-started/migration.md) covers 9 scenarios: 7. Have `.ai-team/` from an older version 8. Using Squad in CI/CD 9. Using Squad SDK programmatically - **Key changes to watch for:** - Package name: `@bradygaster/create-squad` → `@bradygaster/squad-cli` - Directory: `.ai-team/` → `.squad/` (auto-migrated with `--migrate-directory`) - Config: JSON → Markdown (`.squad/team.md`, `.squad/decisions.md`) - Commands: Some were renamed or merged (check `squad help`) - --- - ## What's Next - Squad v0.8.18 is the foundation. Here's what we're working on: - ### Short-Term (Next 4 Weeks) - More samples and tutorials - Plugin marketplace growth (community skill packs) - CLI polish (better error messages, progress bars, colors) - Documentation improvements (video walkthroughs, interactive guides) - ### Medium-Term (Next 12 Weeks) - Multi-repo squad support (one team, many projects) - Enhanced observability (real-time dashboards, cost alerts) - CI/CD integrations (GitHub Actions, GitLab CI, CircleCI) - Team collaboration features (shared squads, sync protocols) - ### Long-Term (Next 6 Months) - Visual squad builder (drag-and-drop agent creation) - Agent marketplace (community-built agent templates) - Cloud-hosted squads (no local setup required) - Enterprise features (SSO, audit logs, compliance hooks) - **Community contributions welcome!** Issues and PRs at [github.com/bradygaster/squad](https://github.com/bradygaster/squad). - --- - ## Get Started Today - Squad v0.8.18 is live on npm. Install it: - ```bash npm install -g @bradygaster/squad-cli ``` - Initialize a project: - ```bash cd your-project squad init ``` - Launch the shell: - ```bash squad ``` - Start building. - --- - ## Resources - - **GitHub:** [github.com/bradygaster/squad](https://github.com/bradygaster/squad) - **SDK on npm:** [@bradygaster/squad-sdk](https://www.npmjs.com/package/@bradygaster/squad-sdk) - **CLI on npm:** [@bradygaster/squad-cli](https://www.npmjs.com/package/@bradygaster/squad-cli) - **Migration Guide:** [docs/get-started/migration.md](../get-started/migration.md) - **Samples:** [samples/README.md](https://github.com/bradygaster/squad/blob/main/samples/README.md) - **Issues:** [github.com/bradygaster/squad/issues](https://github.com/bradygaster/squad/issues) - --- - **Questions? Feedback? Ideas?** File an issue. Join the community. Build something amazing. - Welcome to Squad v0.8.18. Let's build better software, together. 🚀 - --- - _McManus (DevRel) — March 10, 2026_ diff --git a/docs/src/content/blog/024-v0823-release.md b/docs/src/content/blog/024-v0823-release.md index efbfdd6a0..03b6c98f9 100644 --- a/docs/src/content/blog/024-v0823-release.md +++ b/docs/src/content/blog/024-v0823-release.md @@ -7,23 +7,13 @@ tags: [squad, release, v0.8.23, node24, sdk-first, stability, cli, typescript, a status: published hero: "v0.8.23 fixes a critical crash when running `squad init` on Node.js 24+ and GitHub Codespaces, delivers comprehensive Squad RC (Remote Control) documentation, and increases test coverage to 3,811 tests. Faster CLI startup for non-session commands." --- - # v0.8.23 Release: Node 24+ Compatibility, Squad RC Docs, and Critical Fixes - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - > _v0.8.23 is a critical hotfix addressing a crash when running `squad init` on Node.js 24+ and GitHub Codespaces. It ships comprehensive Squad RC documentation, introduces lazy module loading for faster CLI startup, and includes a postinstall patch for ESM import issues. 2 issues closed, 3 PRs merged, 3,811 tests passing._ - --- - ## What Shipped - ### 1. SDK-First Mode (Phase 1) — The Headline Feature - Squad now lets you define your entire team—agents, routing, ceremonies, telemetry, governance—in a single TypeScript config file. Type-safe. Validated at runtime. Compiled to markdown. Deployed anywhere. - **Eight builder functions** for type-safe team configuration: - - `defineTeam()` — Team metadata, project context, member roster - `defineAgent()` — Agent role, model, tools, capabilities, and status - `defineRouting()` — Pattern-based routing with tiers and priorities @@ -32,26 +22,20 @@ Squad now lets you define your entire team—agents, routing, ceremonies, teleme - `defineCasting()` — Casting configuration (universe allowlists, overflow strategy) - `defineTelemetry()` — OpenTelemetry instrumentation (metrics, traces, spans) - `defineSquad()` — Top-level config composition - **`squad build` command** with three modes: - ```bash squad build # Compile squad.config.ts to .squad/ markdown squad build --check # Validate without writing squad build --dry-run # Preview what would be generated squad build --watch # File monitoring (stub for Phase 2) ``` - Generates: - `.squad/team.md` — team roster and context - `.squad/routing.md` — routing rules with priorities - `.squad/agents/{name}/charter.md` — agent charters with capabilities - `.squad/ceremonies.md` — ceremony schedules (if defined) - Protected files (`.squad/decisions.md`, `.squad/history.md`) are **never overwritten**. - **Quick Start Example:** - ```typescript import { defineSquad, @@ -59,7 +43,6 @@ import { defineAgent, defineRouting, } from '@bradygaster/squad-sdk'; - export default defineSquad({ team: defineTeam({ name: 'Content Review Squad', @@ -67,7 +50,6 @@ export default defineSquad({ projectContext: 'HTTP-triggered review pipeline', members: ['tone-reviewer', 'technical-reviewer', 'copy-editor'], }), - agents: [ defineAgent({ name: 'tone-reviewer', @@ -100,7 +82,6 @@ export default defineSquad({ ], }), ], - routing: defineRouting({ rules: [ { @@ -133,36 +114,26 @@ export default defineSquad({ }), }); ``` - Then: - ```bash npm install @bradygaster/squad-sdk npx squad build # Generates .squad/team.md, .squad/routing.md, .squad/agents/*/charter.md ``` - --- - ### 2. Azure Function Sample — Serverless Multi-Agent Workflows - New sample: `samples/azure-function-squad/` — a **Content Review Squad** that wires an HTTP-triggered Azure Function to a multi-agent review pipeline. - **What it demonstrates:** - - `defineSquad()` composing team + agents + routing - Three specialist agents (tone, technical, copy) defined with `defineAgent()` - Pattern-based routing with `defineRouting()` - Real TypeScript integration with Azure Functions v4 - Structured JSON responses with per-agent findings - **Usage:** - ```bash cd samples/azure-function-squad npm install func start # Requires Azure Functions Core Tools - # In another terminal: curl -X POST http://localhost:7071/api/squad-prompt \ -H "Content-Type: application/json" \ @@ -170,9 +141,7 @@ curl -X POST http://localhost:7071/api/squad-prompt \ "prompt": "Building multi-agent systems with the Squad SDK is straightforward. Define your agents with defineAgent(), compose them into a team with defineTeam(), and wire up routing with defineRouting(). The SDK validates everything at runtime — no schema files needed." }' ``` - **Response:** - ```json { "reviews": [ @@ -212,134 +181,77 @@ curl -X POST http://localhost:7071/api/squad-prompt \ "consensus": "✅ Content is publication-ready with minor suggestions." } ``` - The Azure sample is a drop-in starting point for serverless multi-agent workflows. Replace the mock review handlers with live Squad runtime calls using `SquadClient`, and you have a production-ready review pipeline. - --- - ### 3. Remote Squad Mode - Cross-machine squad collaboration via new `squad rc` commands for linking project-local squads to remote team roots. - **New commands:** - - `squad rc` — Show remote config status - `squad init-remote` — Initialize with remote team root config - `squad rc-tunnel` — Establish remote connection - **Key concepts:** - - `resolveSquadPaths()` dual-root resolver — project-local vs team identity directories - `squad doctor` — 9-check setup validation with emoji output - `squad link ` — link a project to a remote team root - `ensureSquadPathDual()` / `ensureSquadPathResolved()` — dual-root write guards - Remote Squad Mode enables teams to share squad identity across multiple projects while maintaining project-local customization. - --- - ### 4. Critical Bug Fixes - #### **Installation Crash Fix (#247) — The Big One** - **Problem:** `npx @bradygaster/squad-cli` was crashing on fresh installs with: - ``` Error: Cannot find module '@opentelemetry/api' ``` - **Root cause:** `@opentelemetry/api` was a hard dependency that failed to resolve in npx's isolated install environment, causing the entire CLI to fail immediately. - **Fix:** - 1. Created `otel-api.ts` resilient wrapper with full no-op fallbacks 2. Moved OTel to optional dependencies (not required by default) 3. Telemetry now gracefully degrades when OTel is absent — zero crashes - **Impact:** Fresh installs now work reliably. Telemetry is truly optional. - --- - #### **CLI Command Wiring (#244)** - Four commands were implemented but never wired into the CLI entry point: - - `rc` - `copilot-bridge` - `init-remote` - `rc-tunnel` - **Fix:** Commands are now properly connected and accessible via `squad rc`, `squad copilot-bridge`, etc. - **Impact:** Remote squad features are now discoverable and functional. - --- - #### **Model Config Round-Trip (#245)** - **Problem:** `AgentDefinition.model` didn't accept structured model configuration — only strings. - **Fix:** - - `AgentDefinition.model` now accepts `string | ModelPreference` for advanced configuration - Charter compiler updated to emit and parse the new format correctly - Round-trip config survives compile → parse → serialize cycles intact - **Impact:** Advanced model selection (fallback chains, cost-aware routing) now works end-to-end. - --- - #### **ExperimentalWarning Suppression** - **Problem:** Node's `ExperimentalWarning` for `node:sqlite` was leaking into terminal output, cluttering user experience. - **Fix:** Process.emit override in `cli-entry.ts` filters experimental warnings before they reach stdout. - **Impact:** Clean, focused terminal output. - --- - #### **Blankspace Fix (#239)** - **Problem:** Idle blank space appeared below the agent panel even when no output was present. - **Fix:** Conditional height constraint only active during processing. Removes visual clutter. - **Impact:** Cleaner UI, professional appearance. - --- - ### 5. Test Hardening - #### **Windows Race Condition (EBUSY)** - Race condition in `fs.rm` with retry logic on Windows. Fixed with exponential backoff and resource cleanup. - #### **Speed Gate Adjustments** - Test speed gate thresholds adjusted for growing CLI codebase. No more false-positive timeout failures. - #### **Regression Fix Wave (#221)** - **Massive batch:** PR #221 resolved 25 test regressions across the suite. CRLF normalization, cross-platform path handling, and mock cleanup. - --- - ### 6. CI Stabilization (#232, #228) - GitHub Actions pipeline fixed and green. All workflows now run reliably without transient failures. - --- - ### 7. Community Contributions - - **PR #199 (migration command)** — Received, reviewed, and feedback captured as issue #231 for future implementation - **PR #243 (blankspace fix)** — Community contribution cherry-picked and credited - --- - ## By the Numbers - | Metric | Value | |--------|-------| | Issues closed | 26 | @@ -353,15 +265,10 @@ GitHub Actions pipeline fixed and green. All workflows now run reliably without | Documentation pages added | 2 (SDK-First Mode + SDK Reference) | | Sample projects | 1 (Azure Function Content Review Squad) | | Release candidate version | 0.8.22-preview.9 | - --- - ## Technical Details - ### SDK Mode Detection - The coordinator now auto-detects SDK-First mode: - ```typescript // squad.config.ts exists? if (fs.existsSync(resolve('.', 'squad.config.ts'))) { @@ -369,15 +276,10 @@ if (fs.existsSync(resolve('.', 'squad.config.ts'))) { // Coordinator uses compiled markdown + SDK awareness } ``` - Fallback: if `squad.config.ts` is missing, Squad operates in traditional markdown-first mode (backward compatible). - --- - ### Telemetry Architecture (OTel Resilience) - New `otel-api.ts` wrapper ensures telemetry is truly optional: - ```typescript // otel-api.ts export function initTelemetry(config?: TelemetryConfig) { @@ -393,15 +295,10 @@ export function initTelemetry(config?: TelemetryConfig) { } } ``` - **Benefit:** Telemetry is an optional add-on, not a blocker. - --- - ### Remote Squad Path Resolution - Dual-root resolver supports both project-local and team-identity directories: - ```typescript function resolveSquadPaths(projectRoot: string, remoteTeamRoot?: string) { // Check project-local first: {projectRoot}/.squad/ @@ -409,198 +306,127 @@ function resolveSquadPaths(projectRoot: string, remoteTeamRoot?: string) { // Load routing, teams, charters from first match } ``` - --- - ### OTel Readiness Assessment - All 8 telemetry modules (`defineHooks`, `defineTelemetry`, meter providers, span processors, exporters) compile and validate with zero runtime errors. This unblocks **Phase 3: OpenTelemetry Integration** — where agents report metrics and traces to Prometheus, Jaeger, and Datadog. - --- - ## Documentation Updates - ### New Guides - - **[SDK-First Mode](../sdk-first-mode.md)** — Comprehensive guide covering concepts, builder reference, patterns, and when to use SDK-First vs. markdown-only - **[SDK Reference](../reference/sdk.md)** — Full builder function signatures, type definitions, runtime validation rules, and examples - ### What Changed - - README includes quick reference for SDK-First teams - CHANGELOG updated with Phase 1 + bug fix deliverables - All sample code demonstrates the builder pattern - Blog post (this document) serves as release announcement - --- - ## Testing & Stability - **Test Coverage (v0.8.22 focus areas):** - - 36 builder function tests (`test/builders.test.ts`) — validates runtime type guards for each builder - 24 build command tests (`test/build-command.test.ts`) — `--check`, `--dry-run`, protected file guards - 29 markdown→SDK conversion round-trip tests (`test/sdk-conversion.test.ts`) — ensures config round-trips cleanly - 25 regression tests fixed from PR #221 - 2 Windows EBUSY race condition tests (fs.rm retry logic) - 13 known timeout flakes on Windows (non-logic, environment-related) - **Total test suite:** 3,811 passing tests (3,840 total, 0 logic failures) - --- - ## What We Learned - 1. **Type safety is a UX feature.** Developers writing `squad.config.ts` get autocomplete and catch misconfiguration errors at edit time, not at runtime. This pays for itself immediately. - 2. **Builders need to validate deeply.** Each builder runs type guards on input — enum values, required fields, capability levels, routing priorities. This surfaces configuration bugs early. - 3. **Optional dependencies unlock resilience.** Moving OTel to optional eliminated the installation crash entirely. Telemetry should be an add-on, not a blocker. - 4. **Azure Functions unlock serverless agents.** The sample demonstrates that Squad agents can run in a stateless HTTP function. This opens up cost-efficient deployments for batch processing workloads (content review, code analysis, compliance checks). - 5. **Protected files are critical.** `.squad/decisions.md` and `.squad/history.md` must never be overwritten by generated files. This ensures human-written knowledge persists across recompiles. - 6. **Windows needs dedicated testing.** Race conditions in `fs.rm`, CRLF normalization, and timeout thresholds are distinct from Unix environments. CI/CD must test both. - --- - ## Node 24+ Compatibility Fix - v0.8.23 fixes a critical crash when running `squad init` on Node.js 24+ (including GitHub Codespaces): - ``` Error [ERR_MODULE_NOT_FOUND]: Cannot find module 'vscode-jsonrpc/node' ``` - The root cause was an upstream ESM import issue in `@github/copilot-sdk`. Squad now uses a two-layer defense: - **Lazy imports** — commands like `init`, `build`, `link`, and `migrate` no longer eagerly load copilot-sdk - **Postinstall patch** — automatically fixes the broken import at install time - This also means CLI startup is faster for non-session commands. - --- - ## Squad RC Documentation - Comprehensive documentation for `squad rc` (Remote Control) is now available. The new guide covers ACP passthrough architecture, the 7-layer security model, mobile keyboard shortcuts, and troubleshooting. See [Squad RC](../features/squad-rc.md). - --- - ## What's Coming Next - ### v0.8.23 (Roadmap) - - `squad init --sdk` flag — opt-in to SDK-First mode during initialization (#249) - `squad migrate` command — convert existing markdown squads to SDK-First (#250) - Comprehensive SDK-First documentation expansion (#251) - ### Phase 2: Live Reload (Planned) - - `squad build --watch` fully implemented — hot reload of squad.config.ts changes - Agents re-spawn with new config without restarting the CLI - Decision file merging strategies for concurrent edits - ### Phase 3: OpenTelemetry Integration (Unblocked) - - `defineTelemetry()` config → live instrumentation - Agents export metrics and traces to Prometheus, Jaeger, and Datadog - Cost tracking per agent (token spend, wall-clock time) - Performance dashboards in Squad CLI (`squad aspire`) - ### Beyond v0.8.22 - - **Builder linting:** `squad lint` validates config against best practices (agent capability coverage, routing gaps, ceremony scheduling conflicts) - **Config versioning:** `squad config migrate` helpers for breaking changes across SDK versions - **Casting system integration:** `defineCasting()` → live universe selection and overflow handling in coordinator - --- - ## Upgrade Path - ### From v0.8.20 → v0.8.22 - ```bash npm install -g @bradygaster/squad-cli@latest # Or in your project: npm install --save-dev @bradygaster/squad-cli@latest ``` - **SDK-First Mode is opt-in.** Existing markdown-based squads continue to work without changes. - ### Fresh Install (Crash Fix Benefit) - If you've had issues with `npx @bradygaster/squad-cli` on fresh machines, v0.8.22 resolves the OTel dependency crash: - ```bash npx @bradygaster/squad-cli@latest doctor # Now works reliably without dependency resolution errors ``` - ### To Migrate to SDK-First (Optional) - 1. Create `squad.config.ts` with builder functions 2. Run `squad build --dry-run` to preview generated files 3. Run `squad build` to generate `.squad/` markdown 4. Commit the config, version control the generated files, and sync your team - Alternatively, keep your markdown-first squad — both modes will coexist indefinitely. - --- - ## Getting Started with v0.8.22 - ### Option 1: Stick with Markdown (No Changes Needed) - Your existing `.squad/` markdown-based squads work exactly as before. Upgrade and run: - ```bash npm install -g @bradygaster/squad-cli@latest npx squad doctor npx squad start ``` - ### Option 2: Try SDK-First Mode (New) - ```bash npm install -g @bradygaster/squad-cli@latest mkdir my-sdk-squad && cd my-sdk-squad git init - # Create squad.config.ts with builders # (see quick start above, or copy from samples/azure-function-squad/) - # Build your squad npx squad build - # See the generated markdown cat .squad/team.md - # Run agents (same CLI, same experience) npx squad start ``` - ### Option 3: Explore the Azure Function Sample - ```bash cd samples/azure-function-squad npm install func start # Requires Azure Functions Core Tools - # In another terminal: curl -X POST http://localhost:7071/api/squad-prompt \ -H "Content-Type: application/json" \ -d '{"prompt": "Your review text here"}' ``` - Full sample: [github.com/bradygaster/squad/tree/main/samples/azure-function-squad](https://github.com/bradygaster/squad/tree/main/samples/azure-function-squad) - --- - ## Important Fixes for Your Setup - If you've experienced any of these issues, v0.8.22 resolves them: - - ✅ **`npx @bradygaster/squad-cli` crashes on fresh install** (#247) — Fixed via OTel resilience - ✅ **`squad rc` command not found** (#244) — Now wired into CLI - ✅ **Model configuration doesn't persist** (#245) — Fixed round-trip support @@ -608,58 +434,41 @@ If you've experienced any of these issues, v0.8.22 resolves them: - ✅ **Extra blank space in UI** (#239) — Removed - ✅ **Timeout flakes on Windows** — Hardened with retry logic - ✅ **25 test regressions** (#221) — All fixed - --- - ## Community Credits - This release was shipped by the Squad core team with community contributions: - - **@bradygaster** — Architecture, SDK builders, squad build command, CLI wiring - **@edie** (TypeScript + type safety) — Builder implementations, runtime validation - **@mcmanus** (DevRel) — Documentation, sample walkthrough, blog post - **@fenster** (Testing + reliability) — Test suite, Windows hardening, regression fixes - **@spboyer** — Original remote mode design ([bradygaster/squad#131](https://github.com/bradygaster/squad/pull/131)) - **Community contributors:** - PR #199 — Migration command (feedback captured in #231) - PR #243 — Blankspace fix (cherry-picked) - Thanks to all early SDK-First adopters for feedback. - --- - ## Try It Now - ```bash npm install -g @bradygaster/squad-cli@latest mkdir my-sdk-squad && cd my-sdk-squad git init - # Create squad.config.ts with builders # (see quick start above, or copy from samples/azure-function-squad/) - # Build your squad npx squad build - # See the generated markdown cat .squad/team.md - # Run agents (same CLI, same experience) npx squad start ``` - --- - ## Links - - [GitHub Repository](https://github.com/bradygaster/squad) - [SDK-First Mode Guide](../sdk-first-mode.md) - [SDK Reference](../reference/sdk.md) - [Azure Function Sample](../../samples/azure-function-squad/) -- [Remote Squad Mode Docs](../features/remote-control.md) +- [Squad RC](../features/squad-rc.md) - [CHANGELOG](https://github.com/bradygaster/squad/blob/main/CHANGELOG.md) - **Related Issues:** - #194 — SDK-First Mode - #213 — Azure Function Sample @@ -670,7 +479,5 @@ npx squad start - #221 — Regression fixes (25 tests) - #232, #228 — CI stabilization - #231 — Migration command feedback - --- - _This post was written by McManus, the DevRel on Squad's own team. Squad is an open source project by [@bradygaster](https://github.com/bradygaster). [Try SDK-First Mode →](../sdk-first-mode.md)_ diff --git a/docs/src/content/blog/026-whats-new-ado-comms-subsquads.md b/docs/src/content/blog/026-whats-new-ado-comms-subsquads.md index 8c0e22323..8cb8f0db6 100644 --- a/docs/src/content/blog/026-whats-new-ado-comms-subsquads.md +++ b/docs/src/content/blog/026-whats-new-ado-comms-subsquads.md @@ -7,23 +7,13 @@ tags: [squad, azure-devops, enterprise, platform-adapter, communication, subsqua status: published hero: "Squad goes enterprise with native Azure DevOps support, adds a CommunicationAdapter for platform-agnostic agent-human messaging, renames Workstreams to SubSquads, and ships critical security hardening across all platform adapters." --- - # What's New: Azure DevOps Adapter, CommunicationAdapter, SubSquads, and Security Hardening - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - > _This batch adds first-class Azure DevOps support, a pluggable communication layer, the community-voted SubSquads rename, and security fixes that prevent shell injection, WIQL injection, and bearer token exposure. 5 PRs merged, 153 new tests, 4 issues closed._ - --- - ## What Shipped - ### 1. Azure DevOps Platform Adapter — The Enterprise Feature - Squad now works natively with Azure DevOps. When your git remote points to `dev.azure.com` or `*.visualstudio.com`, Squad auto-detects the platform and adapts everything. - **PlatformAdapter interface** — unified API for GitHub, ADO, and Planner: - ```typescript interface PlatformAdapter { listWorkItems(options): Promise; @@ -34,14 +24,11 @@ interface PlatformAdapter { // ... addTag, removeTag, addComment } ``` - Three adapters ship with the same interface: - **AzureDevOpsAdapter** — `az boards` CLI for work items, `az repos` for PRs - **GitHubAdapter** — `gh` CLI wrapper - **PlannerAdapter** — Microsoft Graph API for hybrid work-item tracking - **Configurable work items** via `.squad/config.json`: - ```json { "platform": "azure-devops", @@ -54,17 +41,11 @@ Three adapters ship with the same interface: } } ``` - All fields are optional. Cross-project support means your work items can live in a completely different ADO org/project than your git repo. - **Ralph on ADO** — the governance file (`squad.agent.md`) now includes a Platform Detection section, ADO WIQL commands for Ralph's scan cycle, and instructions to read `.squad/config.json` before any ADO command. - **Docs:** [Enterprise Platforms Guide](../features/enterprise-platforms.md) | [Blog #025](025-squad-goes-enterprise-azure-devops.md) - ### 2. CommunicationAdapter — Agent-Human Messaging - A new pluggable interface for agent-human communication. Scribe can post session summaries, Ralph can post board status, agents can escalate when blocked — all through a platform-appropriate channel. - ```typescript interface CommunicationAdapter { postUpdate(options): Promise<{ id: string; url?: string }>; @@ -72,31 +53,22 @@ interface CommunicationAdapter { getNotificationUrl(threadId): string | undefined; } ``` - Four adapters: - | Adapter | Phone-capable | Setup | |---------|:---:|---| | **FileLog** | Via git | Zero-config fallback | | **GitHub Discussions** | ✅ Browser | Auto-detected | | **ADO Work Item Discussions** | ✅ ADO mobile | Auto-detected | | **Teams Webhook** | ✅ Teams mobile | Stubbed (Phase 2) | - Factory auto-detects platform: `createCommunicationAdapter(repoRoot)`. - ### 3. SubSquads — The Community-Voted Rename - Workstreams → SubSquads. The community decided. - - CLI: `squad subsquads` (with `workstreams` and `streams` as deprecated aliases) - Types: `SubSquadDefinition`, `SubSquadConfig`, `ResolvedSubSquad` - Old names kept as `@deprecated` re-exports for backward compatibility - Config file stays at `.squad/streams.json` (file rename deferred) - ### 4. Security Hardening - Every platform adapter went through a community-driven 5-model security review (thanks [@wiisaacs](https://github.com/wiisaacs)): - | Fix | What it prevents | |-----|-----------------| | `execSync` → `execFileSync` | Shell injection via user input | @@ -106,45 +78,31 @@ Every platform adapter went through a community-driven 5-model security review ( | Cross-platform draft filter | `findstr` → JMESPath (macOS/Linux compat) | | PR status mapping | `active`→`open` for `gh` CLI compatibility | | `gh issue create` fix | No `--json` flag — parse URL from stdout | - ### 5. ESM Runtime Patch + Secret Guardrails (Brady) - - Runtime `Module._resolveFilename` intercept for Node 24+ ESM compatibility - 5-layer secret defense architecture - `.copilot/skills/secret-handling/SKILL.md` team reference - 59 TDD security hook tests - Charter hardening for Trejo (Git & Release) and Drucker (CI/CD) - --- - ## Quick Stats - - ✅ 5 PRs merged (#191, #263, #268, #272, #266) - ✅ 153 new tests (92 platform + 15 comms + 46 SubSquads) - ✅ 59 security tests (Brady's sprint) - ✅ 4 issues closed (#240, #261, #271, #273) - ✅ Security review: 7 code fixes from 10 review comments - ✅ External integration testing: 10/13 ADO tests passed - --- - ## Breaking Changes - None. All changes are additive. Repos without ADO remotes work exactly as before. Old `workstreams`/`streams` names still work as deprecated aliases. - --- - ## Contributors - - **[@tamirdresher](https://github.com/tamirdresher)** — ADO adapter, CommunicationAdapter, SubSquads rename, security fixes, docs, blog - **[@wiisaacs](https://github.com/wiisaacs)** — 5-model security review with test validation - **[@dfberry](https://github.com/dfberry)** — CommunicationAdapter requirements, tiered deployment proposal - **[@bradygaster](https://github.com/bradygaster)** — ESM fix, secret guardrails sprint, SubSquads merge, architecture guidance - --- - ## What's Next - - **Process template introspection** — auto-detect ADO work item types (#240) - **Teams webhook adapter** — full CommunicationAdapter implementation (#261) - **Pre-existing test stabilization** — fix 14 flaky/environment-dependent tests (#273) diff --git a/docs/src/content/blog/027-v0825-release.md b/docs/src/content/blog/027-v0825-release.md index 92221c7a1..72b048c06 100644 --- a/docs/src/content/blog/027-v0825-release.md +++ b/docs/src/content/blog/027-v0825-release.md @@ -7,64 +7,39 @@ tags: [squad, release, v0.8.25, testing, cli, quality, npm] status: published hero: "Squad now smoke-tests every CLI command in the packaged npm artifact before publishing." --- - # v0.8.25: Pre-Publish Quality Gate and CLI Smoke Testing - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - > _This release adds a critical pre-publish quality gate: 32 new tests that pack both `@bradygaster/squad-sdk` and `@bradygaster/squad-cli` into tarballs, install them in a clean temp directory, and verify all 27 CLI commands route correctly through the installed artifact. If a command is missing or broken in the tarball, the release is blocked._ - --- - ## What Shipped - ### CLI Packaging Smoke Test - The new test suite (`test/cli-packaging-smoke.test.ts`) simulates the exact npm install experience: - 1. **Pack** — Creates tarballs of both packages using `npm pack` 2. **Install** — Installs them in a clean isolated temp directory (just like `npm install -g` would) 3. **Verify routing** — Tests all 27 CLI commands through the installed artifact's bin entry 4. **Test aliases** — Validates 3 command aliases (watch, workstreams, remote-control) 5. **Error handling** — Tests --version, --help, and unknown command behavior - **32 new tests. 3,963+ tests total.** - ### Pre-Publish CI Gate - The `publish.yml` workflow now includes a `smoke-test` job that runs BEFORE both npm publish jobs. If the smoke test fails, nothing gets published. This blocks the exact class of bugs that bit us before: MODULE_NOT_FOUND errors, broken package.json exports, and ESM resolution failures. - ### Three-Layer Test Matrix - This release completes a three-layer defense: - | Layer | What it catches | Test file | |-------|----------------|-----------| | Source wiring | Import exists in code | `cli-command-wiring.test.ts` | | Packaged artifact | Command works after npm pack + install | `cli-packaging-smoke.test.ts` (NEW) | | Pre-publish gate | Blocks broken releases in CI | `publish.yml` smoke-test job (NEW) | - Before this release, a command could exist in source code, pass all tests, and still be missing from the published npm package. That gap is now closed. - --- - ## Why This Matters - npm packages are built artifacts. The code you write isn't always the code users install. This test suite verifies that the packaged tarball — the actual bytes users download — works correctly. The squad's own quality agents (FIDO and EECOM) verified the release and gave unanimous GO. - --- - ## Quick Stats - - ✅ 32 new CLI packaging smoke tests - ✅ 3,963+ tests passing, 150 test files - ✅ Pre-publish CI gate added to `publish.yml` - ✅ All 27 commands + 3 aliases verified in packaged artifact - --- - ## What's Next - - **Process template introspection** — auto-detect ADO work item types (#240) - **Teams webhook adapter** — full CommunicationAdapter implementation (#261) - **Pre-existing test stabilization** — fix 14 flaky/environment-dependent tests (#273) diff --git a/docs/src/content/blog/028-new-docs-site.md b/docs/src/content/blog/028-new-docs-site.md index 49de22a94..b78089039 100644 --- a/docs/src/content/blog/028-new-docs-site.md +++ b/docs/src/content/blog/028-new-docs-site.md @@ -7,103 +7,63 @@ tags: [squad, docs, community, astro, contributions] status: published hero: "Squad's documentation gets a complete rebuild — powered by Astro, Tailwind CSS, and community contributors." --- - # New Docs Site: Built by the Community - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - > _Squad's documentation site has been completely rebuilt from the ground up. A full Astro-powered docs experience with search, responsive design, and content contributed by multiple community members._ - --- - ## What Shipped - ### Complete Astro Docs Rewrite - [@IEvangelist](https://github.com/IEvangelist) (David Pine) delivered a **complete documentation site rebuild** in PR #293. This isn't a reskin — it's a ground-up rewrite: - - **Astro 5.7** — Static site generation with component islands - **Tailwind CSS 4.1** — Modern utility-first styling with responsive design - **Pagefind** — Client-side full-text search across all docs - **Structured content** — Markdown content collections with frontmatter validation - **Blog system** — All existing blog posts migrated into the new architecture - **Custom components** — Sidebar with scroll-to-active, syntax-highlighted code blocks, callout boxes - The site ships as a static build under `docs/` with its own `package.json`. Dev server: `npm run dev`. Production build: `astro build && pagefind`. - ### Docs Navigation Polish — PR #298 - [@IEvangelist](https://github.com/IEvangelist) (David Pine) followed up with targeted improvements in PR #298: - - **Active link highlighting** — Docs and Blog links now highlight in the top navigation when you're viewing that section - **Favicon fixes** — Favicon asset handling improved for all browsers - **Navigation clarity** — Users now have better visual feedback about where they are in the docs - This was a fast-follow polish pass on the Astro rewrite, catching the details that make navigation feel solid. - ### Community Content from @diberry - [@diberry](https://github.com/diberry) (Dina Berry) submitted **four pull requests** improving the getting-started experience: - - **PR #286** — Added validation steps to the Quick Start README - **PR #288** — "Which method should I use?" decision tree for the installation page — CLI, VS Code, or SDK, with clear guidance on when to use each - **PR #290** — ".squad/ directory explainer" for the first-session guide — a table showing every file and directory in `.squad/` with its purpose, plus ownership guidance - **PR #292** — Doc-impact review process added to team workflows - All four contributions have been merged or ported into the new Astro docs structure. - --- - ## What Changed for Users - ### Better Navigation - The sidebar now scrolls to your current position when a page loads. If you're deep in the table of contents, it stays where you are instead of jumping back to the top. - ### Copilot CLI Callouts - Key pages now include callouts directing users to the **GitHub Copilot CLI** as the recommended interface: - ``` 💡 The recommended way to use Squad is through GitHub Copilot CLI: copilot --agent squad ``` - ### CI/CD Safety Warnings - The CI/CD integration page now ships with the cron schedule **commented out by default** and a warning about GitHub Actions minutes consumption when enabling heartbeats and scheduled runs. - --- - ## Community Impact - This release represents a milestone for Squad's community. Two external contributors shaped the docs you'll use: - | Contributor | Impact | |-------------|--------| | [@IEvangelist](https://github.com/IEvangelist) | Complete Astro docs site architecture and build | | [@diberry](https://github.com/diberry) | Four PRs improving installation, getting-started, and team workflow docs | - Both contributors are now credited in [CONTRIBUTORS.md](https://github.com/bradygaster/squad/blob/main/CONTRIBUTORS.md). - --- - ## Try It - Visit the docs at [bradygaster.github.io/squad](https://bradygaster.github.io/squad/) or run them locally: - ```bash cd docs npm install npm run dev ``` - Open [localhost:4321/squad/](http://localhost:4321/squad/) and explore. - --- - ## What's Next - - Search refinements and indexing improvements - More scenario guides from community feedback - Continued content contributions welcome — see [CONTRIBUTING.md](https://github.com/bradygaster/squad/blob/main/CONTRIBUTING.md) diff --git a/docs/src/content/blog/028-v090-whats-new.md b/docs/src/content/blog/028-v090-whats-new.md index d42545715..ad604ecad 100644 --- a/docs/src/content/blog/028-v090-whats-new.md +++ b/docs/src/content/blog/028-v090-whats-new.md @@ -1,134 +1,72 @@ --- -title: "What's New in v0.9.0: Personal Squad, Worktrees, Cooperative Rate Limiting, and More" +title: "What's New in v0.9.0: Worktrees, Cooperative Rate Limiting, and More" date: 2026-03-23 author: bradygaster wave: 8 -tags: [squad, release, v0.9.0, features, personal-squad, worktree, rate-limiting, economy-mode] +tags: [squad, release, v0.9.0, features, worktree, rate-limiting, economy-mode] status: published -hero: "Squad's biggest release yet — personal agents that follow you across repos, isolated worktrees for conflict-free parallel work, cooperative rate limiting, economy mode for cost control, and a refreshed docs site." +hero: "Squad's biggest release yet — isolated worktrees for conflict-free parallel work, cooperative rate limiting, economy mode for cost control, and a refreshed docs site." --- - # What's New in v0.9.0 - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -> _This is Squad's biggest release. Personal Squad brings ambient agent discovery and project portability. Worktree spawning isolates each issue into its own branch. Cooperative rate limiting maps traffic across multi-agent teams. Economy Mode lets you budget costs. A complete docs refresh, security hardening, and community contributions make this one to upgrade for._ - +> _This is Squad's biggest release. Worktree spawning isolates each issue into its own branch. Cooperative rate limiting maps traffic across multi-agent teams. Economy Mode lets you budget costs. A complete docs refresh, security hardening, and community contributions make this one to upgrade for._ --- - ## What Shipped - -### 1. Personal Squad — Ambient Agent Discovery (#508) - -Your Squad follows you across repositories. - -**Personal agents** are ambient: you configure them once, and they're automatically available in every project you work on — no per-repo setup needed. They discover the projects they work in through the Ghost Protocol: safe interaction that respects your project's boundaries without disrupting other work. - -**CLI commands:** - -```bash -squad personal init # Declare your personal agents -squad personal list # See what's available -``` - -**Governance layer** — Personal agents declare what they need (memory, tools, platforms) and operate within guardrails. A personal coding agent won't blindly touch your enterprise repo; it reads your project's agent charters and `.squad/config.json` first. - -**What this enables:** -- One global Squad definition across all your repos -- Agents that know your coding style, tools, and preferences -- Safe cross-repo work without conflicts -- Team squads + personal squads = complete context - -:::tip -Personal Squad is foundational. It's how your favorite coding agent follows you into a new project without needing re-hiring. -::: - ---- - -### 2. Worktree Spawning — No More Branch Conflicts (#529) - +### 1. Worktree Spawning — No More Branch Conflicts (#529) Each issue gets its own git worktree. - Before: Agents working on multiple issues could block each other on branch conflicts. Now, every issue spawns an isolated worktree. Agents work in parallel, in separate filesystem branches, without touching the main worktree. - **What it solves:** - ✅ Parallel agent work on different issues (no blocking) - ✅ Clean, isolated branch per worktree - ✅ Main worktree stays stable - ✅ Automatic cleanup on issue completion - **How it works:** - Coordinator detects a new issue → spawns `.worktrees/issue-{number}/` - Agent checks out its issue's worktree - Multiple agents can work simultaneously across different worktrees - On completion, worktree is cleaned up automatically - This is the foundation for true parallel work at scale. - --- - -### 3. Machine Capability Discovery — Routing to Capable Hardware (#514) - +### 2. Machine Capability Discovery — Routing to Capable Hardware (#514) Agents declare what they need. Ralph routes work to machines that can handle it. - Use `needs:*` labels to tell Ralph what hardware an issue requires: - ``` needs:docker # This agent needs Docker needs:gpu # CUDA-capable GPU required needs:16gb-memory # At least 16GB RAM needs:k8s # Kubernetes cluster access ``` - Ralph's dispatcher reads these labels and routes the work to the machine in your squad pool that has those capabilities. No more "sorry, I can't run Docker here." - **Real-world scenarios:** - ML agents tagged `needs:gpu` route to GPU-equipped machines - Docker-based agents route to machines with Docker daemon running - Multi-region squads balance load based on capabilities - :::note Capability Discovery works with Ralph's mesh routing. If you're not running Ralph, this is aspirational. ::: - --- - -### 4. Cooperative Rate Limiting — Predictive Circuit Breaker (#515) - +### 3. Cooperative Rate Limiting — Predictive Circuit Breaker (#515) Multi-agent teams share rate limits responsibly. - Squad now implements **RAAS** — Rate-Aware Agent Scheduler. When your API calls trigger rate limiting, Squad maps `X-RateLimit-Remaining` headers to traffic light states: - - 🟢 **Green** — Normal traffic. Go. - 🟡 **Amber** — Approaching limit. Back off by 30%. - 🔴 **Red** — At/over limit. Wait 5–30 seconds; predictive circuit breaker handles recovery. - **Multi-agent coordination:** All agents in your squad see the same traffic light state. When one agent hits amber, the entire team backs off. No thundering herd, no cascading failures. - **Error surfacing:** Rate limit errors now surface with recovery suggestions: - ``` ⚠️ API rate limit reached. Waiting 15 seconds before retry. Agent: MyAgent | Remaining: 0/60 | Resets: 14:32 UTC Suggestion: 1 agent idle, 2 backing off. Try serial mode with --cooperative-delay=30s ``` - --- - -### 5. Economy Mode — Cost-Conscious Model Selection (#500) - +### 4. Economy Mode — Cost-Conscious Model Selection (#500) Budget-aware routing falls back to cheaper models when spend is high. - When your monthly LLM budget climbs, Economy Mode kicks in: - ```bash squad skill economy-mode enable --budget=50 # USD per agent per day ``` - Now when Claude-Sonnet (expensive) would exceed budget, the router automatically falls back to Claude-Haiku (70% cheaper). The agent still completes the work — just with a more efficient model. - **Governance:** ```typescript const role = await squad.resolveRole('coding-agent', { @@ -136,75 +74,48 @@ const role = await squad.resolveRole('coding-agent', { budgetPerDay: 50, // USD }); ``` - **Real-world impact:** - Reduces spend 40–60% for suitable tasks - Agents choose the most cost-effective model for their task - Human stays in budget control - --- - -### 6. Auto-Wired Telemetry (#281) - +### 5. Auto-Wired Telemetry (#281) One call to wire up full observability. - `initSquadTelemetry()` now auto-creates the EventBus and CostTracker: - ```typescript import { initSquadTelemetry } from '@bradygaster/squad-sdk'; - const { eventBus, costTracker } = await initSquadTelemetry({ endpoint: 'https://my-telemetry.com', // Optional; omit for in-process }); - // eventBus is live, costTracker is live // All agent work auto-reports to both ``` - **What you get:** - ✅ Real-time cost visibility per agent - ✅ Event stream for compliance/audit - ✅ Custom event handlers (e.g., Slack alerts on high spend) - ✅ Zero additional setup - No more hunting for telemetry wiring. One call, full observability. - --- - -### 7. Upgrade Path Overhaul — P0 Fixes (#544, #549) - +### 6. Upgrade Path Overhaul — P0 Fixes (#544, #549) **Windows EPERM handling** — File permission errors on Windows no longer break upgrades. Automatic retry with temporary directory fallback. - **Gitignore parent coverage** — Upgrade now respects `.gitignore` files in parent directories, preventing unintended file inclusion. - **Context-aware footer** — Upgrade footer includes your project's context: org name, repo, branch, so you know exactly what was upgraded. - **P0 fixes across the board:** - Node <22.5.0 hard-fail with clear error message - Memory safety caps to prevent runaway allocations - ESM patch improvements for Node 22/24 compatibility - Rate limit errors now surface with recovery options - --- - -### 8. Documentation Refresh - +### 7. Documentation Refresh **README slimmed**: 512 → 218 lines. Removed noise, kept substance. - **Upgrade section**: New dedicated docs guide for trouble-free version bumps, with platform-specific steps for Windows, macOS, Linux. - **Consistent install path**: All user-facing docs now reference `npm install -g @bradygaster/squad-cli`. - **Astro features**: 10 new Astro features implemented — section badges, improved search with Pagefind, better syntax highlighting. - **Teams MCP refresh**: Microsoft Teams integration updated for Workflows webhooks. Full docs at [Teams integration guide](../features/mcp.md). - -**Autonomous agents guide**: New guide for building agents that work unsupervised. How to set guardrails, declare intentions, handle escalation. - +**Background agent pipeline guide**: New guide for building agents that run in the background with guardrails, clear intentions, and escalation paths. --- - -### 9. Quality & Stability - +### 8. Quality & Stability - ✅ Node <22.5.0 hard-fail with clear message (prevents silent failures) - ✅ Memory safety caps (prevents runaway allocations on large codebases) - ✅ ESM patch improvements for Node 22/24 compatibility @@ -212,67 +123,41 @@ No more hunting for telemetry wiring. One call, full observability. - ✅ SIGINT/SIGTERM signal handling (graceful shutdown, 22 tests) - ✅ Read-modify-write race condition fixed in history-shadow.ts - ✅ ADO CLI exec timeout (prevent hanging on slow networks) - --- - -### 10. Community Contributions - +### 9. Community Contributions **Worktree regression tests** — @diberry added tests that guard against regressions in worktree `.git` handling (file vs. directory). - **Docs improvements** — @diberry contributed docs expansion for CLI README and reference. - **Community security review** — Thanks to @wiisaacs and the community for 5-model security review of platform adapters. - --- - ## Quick Stats - -- ✅ **5 major features** (Personal Squad, Worktrees, Capability Discovery, Cooperative Rate Limiting, Economy Mode) +- ✅ **4 major features** (Worktrees, Capability Discovery, Cooperative Rate Limiting, Economy Mode) - ✅ **Auto-wired telemetry** in one call - ✅ **P0 upgrade fixes** across Windows, Node compatibility, and error surfacing - ✅ **Docs refresh** — README -46%, new upgrade guide, consistent install path - ✅ **3,963+ tests passing**, 150 test files - ✅ **Community contributions** from 8+ contributors - --- - ## Breaking Changes - **None.** All changes are additive. Existing Squads work as-is. New features are opt-in via CLI or config. - --- - ## Upgrading - Upgrade to v0.9.0 with: - ```bash npm install -g @bradygaster/squad-cli ``` - Then: - ```bash squad upgrade # Walks you through any project-level config updates ``` - **Docs:** [Upgrade Guide](../scenarios/upgrading.md) | [Troubleshooting](../scenarios/troubleshooting.md) - If you hit issues, [open a GitHub issue](https://github.com/bradygaster/squad/issues). We're here to help. - --- - ## What's Next - - **Persistent Ralph** — Watch mode with heartbeat improvements, multi-region mesh routing - **Process template introspection** — Auto-detect ADO work item types at squad init - **Teams webhook CommunicationAdapter** — Full implementation of Teams mobile notifications - **SubSquad orchestration** — Compose squads from other squads; cross-team work at scale - This is an exciting time. v0.9.0 is the release where Squad scales from solo developers to distributed teams. - --- - **Questions?** Drop by [Squad Discussions](https://github.com/bradygaster/squad/discussions) or ping us on [Discord](https://discord.gg/squad-community). - **Want to contribute?** Check out [Contributing Guide](../guide/contributing.md). We're hiring agents (and humans who build them). diff --git a/docs/src/content/blog/029-upgrade-testing-at-scale.md b/docs/src/content/blog/029-upgrade-testing-at-scale.md index a10e8c199..0eedc78ed 100644 --- a/docs/src/content/blog/029-upgrade-testing-at-scale.md +++ b/docs/src/content/blog/029-upgrade-testing-at-scale.md @@ -7,37 +7,23 @@ tags: [squad, upgrade, testing, agents, quality] status: published hero: "We used Squad's own fan-out capability to clone 23 real-world repos from GitHub and validate our upgrade command in 5 minutes flat." --- - -We shipped a big upgrade fix — 10 changes addressing 13 gaps our AI team found during an audit. The automated tests passed. 18 out of 18. Green across the board. - +We shipped a big upgrade fix — 10 changes addressing 13 gaps our augmented team found during an audit. The automated tests passed. 18 out of 18. Green across the board. But "tests pass" isn't the same as "this won't break someone's project." We needed a different kind of confidence. - ## The Problem with Testing Upgrade Commands - Upgrade commands are uniquely hard to test. Your automated test suite creates pristine fixtures — perfectly structured directories, predictable file contents, known starting states. Real users are messier. They've deleted files they shouldn't have. They're running versions from six months ago. They've added custom files in directories you own. They've got unicode in their team names and hardcoded paths in their configs. - You can write a hundred synthetic tests and still miss the bug that only shows up when someone's `.gitattributes` already has 20 lines of C# rules and your upgrade appends new entries without a blank line separator. - ## The Approach: Use Your Own Tool to Test Itself - We build an AI agent framework. Our agents can fan out — multiple agents running in parallel, each with its own task, reporting back independently. So we used the framework to test itself. - **Step 1: Find real-world installs.** We used GitHub's code search API to find every public repo with our tool installed: - ``` filename:squad.agent.md path:.github/agents ``` - Result: **245 public repositories.** Real projects, real users, real configurations we'd never seen before. - **Step 2: Design a 3-tier testing strategy.** - - **Tier 1 — Synthetic:** 4 agents simulating controlled scenarios (old version, current version, corrupted state, edge cases like unicode and read-only files) - **Tier 2 — Our repos:** 1 agent cloning 3 of our own projects at different installed versions - **Tier 3 — Public repos:** 4 agents each cloning 4 public repos, running upgrade, taking before/after snapshots - **Step 3: Fan out.** We launched 9 agents in parallel. Each one: - 1. Checked out our fix branch and built the CLI 2. Created an isolated temp directory 3. Ran the upgrade @@ -46,26 +32,18 @@ Result: **245 public repositories.** Real projects, real users, real configurati 6. Verified every file that shouldn't have changed didn't 7. Ran the upgrade *again* to test idempotency 8. Cleaned up - **Zero impact to any public repo.** Shallow clones to temp directories, tested locally, deleted when done. No pushes, no PRs, no issues, no comments. Read-only interaction with GitHub. - ## The Numbers - | Tier | Targets | Checks | Passed | Failed | |------|---------|--------|--------|--------| | Synthetic scenarios | 4 | 60 | 59 | 1 | | Our own repos | 3 | 45 | 45 | 0 | | Public repos | 16 | 140 | 140 | 0 | | **Total** | **23** | **245** | **244** | **1** | - **99.6% pass rate.** The one failure was a read-only filesystem edge case — the upgrade threw a raw stack trace instead of a friendly warning. Valid bug, easy fix, and we never would have tested for it with synthetic fixtures alone. - Wall-clock time for all of this: **about 5 minutes.** Nine agents working simultaneously, each taking 3-5 minutes to clone, build, test, and clean up. - ## What the Version Span Looked Like - We tested upgrades from every version we encountered in the wild: - | Starting Version | Repos | |-----------------|-------| | v0.0.0 (source installs) | 4 | @@ -74,63 +52,41 @@ We tested upgrades from every version we encountered in the wild: | v0.5.x | 6 | | v0.8.x | 9 | | "Already current" | 2 | - No version was too old. The oldest repo — installed from source with version `0.0.0` — upgraded cleanly to current. Every version in between worked too. - ## What We Actually Checked - Every repo got the same verification checklist. Infrastructure on one side, user state on the other: - **Must change:** - Config files updated with merge rules and gitignore entries - Missing directories created (up to 6) - 28 skills deployed - 30 template files refreshed - Version stamp updated - **Must NOT change:** - Team roster - Decision log - Routing rules - Agent charters and histories - User configuration - **Must survive a second run:** - No duplicate entries in any config file - No errors - No corruption - Across 23 test targets spanning versions 0.0.0 through 0.8.25, team sizes from 5 to 12 agents, and projects in .NET, Node.js, Python, Flutter, and Rust: **zero user files were modified.** - ## What We Found That Surprised Us - The synthetic tests caught the obvious stuff. The real repos caught the stuff you'd never think to test for: - **Smart deduplication works.** One repo had 4 of 5 required `.gitignore` entries from a previous version. The upgrade detected the 4 existing entries and added only the 1 missing one. Not "overwrite all 5" — surgically precise. We didn't specifically test for this. We discovered it was working correctly by running against a real repo that happened to be in that state. - **Custom content survives.** Multiple repos had custom files inside directories we manage — investigation summaries, design specs, audit reports created by agents in past sessions. All of them survived the upgrade untouched. One repo even had a retired agents directory (`_alumni/`). Preserved perfectly. - **Legacy detection is graceful.** One repo was still using our old directory name from months ago. The upgrade detected it, printed a clear deprecation warning with an actionable migration command, and proceeded without crashing. We'd written that code, but we'd never tested it against a real legacy install in the wild. - **Privacy scrubbing works — but contradicts itself.** Two repos had email addresses in their team files from before we added privacy protections. The upgrade correctly scrubbed them. Good feature. But the console output ends with "Never touches user state" — which is technically false when the privacy scrub just modified 5 files. Small messaging fix, but we wouldn't have caught it without testing against repos old enough to have that data. - ## The Meta Insight - Here's what made this approach powerful: **the same fan-out capability we build for users is exactly what we needed to validate our own upgrade path.** We didn't write a special testing harness. We didn't set up CI matrices. We told our agents "go clone these repos, run upgrade, tell me what happened" — and they did, in parallel, in minutes. - The three tiers gave us three kinds of confidence: - **Synthetic** tells you the code handles expected cases correctly - **Your own repos** tells you the code handles real-but-familiar cases correctly - **Strangers' repos** tells you the code handles cases you never imagined - That last tier is the one most teams skip. It's also the one that found the most interesting issues. - ## The Takeaway - If your tool has a public footprint — if real people have installed it and configured it and built things on top of it — you have a free test corpus sitting on GitHub. Clone it, test against it, delete it. Zero impact, maximum confidence. - Your upgrade command's job is to make old installs current without touching what users built on top. That's the contract. Your test suite tells you the code works. Real repos tell you the *contract* holds. - --- - *We tested 23 repos in about 5 minutes. Found 3 bugs (1 real, 2 messaging). Fixed them before merging. The upgrade shipped the next morning.* diff --git a/docs/src/content/blog/030-v092-whats-coming.md b/docs/src/content/blog/030-v092-whats-coming.md index f51b0860f..fd006ab36 100644 --- a/docs/src/content/blog/030-v092-whats-coming.md +++ b/docs/src/content/blog/030-v092-whats-coming.md @@ -6,82 +6,51 @@ tags: [squad, release, features, watch, cleanup, fact-checker, external-state, s status: draft hero: "The next Squad release ships 10 new features, 4 bug fixes, and 60+ tests — all backward-compatible. Here's what to expect." --- - # What's Coming in v0.9.2 — 10 Features, Zero Breaking Changes - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - > _The next Squad release ships 10 new features, 4 bug fixes, and 60+ tests — all backward-compatible. Here's what to expect._ - ## The Noise Problem - If you've been running `squad watch` in production with output forwarded to Teams or Slack, you've probably noticed this: - ``` Squad Monitor Round 158 Squad Monitor Round 159 Squad Monitor Round 160 Squad Monitor Round 161 ``` - Hundreds of messages. No useful information. No way to tell which machine or repo they came from. - This release fixes that — and ships 9 other features while we're at it. - ## New Features - ### 1. Quiet Notifications (`--notify-level`) - The biggest quality-of-life improvement: watch rounds are now **silent by default when the board is empty**. Only rounds with actual work produce output. - ```bash squad watch --notify-level important # default — only meaningful rounds squad watch --notify-level all # old behavior (every round) squad watch --notify-level none # fully silent ``` - Round headers now include **machine name and repo** for attribution: - ``` 🔄 Ralph — Round 5 (DEVBOX-01 · my-project) ``` - Persistent config: `{ "watch": { "notifyLevel": "important" } }` in `.squad/config.json`. - ### 2. Fleet Hybrid Dispatch (`--dispatch-mode`) - Parallel issue processing via Copilot CLI `/fleet`. Benchmarked at **2.9x faster** for read-heavy workloads. - ```bash squad watch --execute --dispatch-mode hybrid ``` - Issues are auto-classified as read (research, reviews → fleet) or write (implementations, fixes → local). Fleet batches reads in parallel while writes execute sequentially. - ### 3. Verbose Debugging (`--verbose`) - When watch seems stuck on "Board is clear" but you know there's work, `--verbose` shows you exactly what's happening: - ```bash squad watch --verbose ``` - Prints: issue counts, label matches, auth status, PR review states, round timing, capability execution paths. - ### 4. Fact-Checker Agent Role (🔍) - New built-in role for output verification. Validates claims, detects hallucinations, runs counter-hypotheses. - ``` squad new agent fact-checker ``` - Confidence ratings: ✅ Verified, ⚠️ Unverified, ❌ Contradicted. Routes automatically on "fact-check", "verify", "audit" keywords. - ### 5. 8 Built-in Skills - `squad init` and `squad upgrade` now ship 8 curated skills: - | Skill | What it teaches | |-------|-----------------| | squad-conventions | Core patterns and file layout | @@ -92,67 +61,46 @@ Confidence ratings: ✅ Verified, ⚠️ Unverified, ❌ Contradicted. Routes au | reviewer-protocol | Code review gates | | test-discipline | Test-first discipline | | agent-collaboration | Multi-agent handoffs | - ### 6. Scratch Directory (`.squad/.scratch/`) - Agents no longer dump temp files in the repo root. The new `scratchDir()` and `scratchFile()` SDK APIs route all ephemeral files to `.squad/.scratch/` — gitignored and auto-cleaned. - ### 7. Cleanup Watch Capability - Automated housekeeping during `squad watch`: - Clears `.squad/.scratch/` every 10 rounds - Archives orchestration-log and session-log entries older than 30 days - Warns about stale decision inbox files (>7 days) - ### 8. External State Storage - Move `.squad/` state outside the working tree so it survives branch switches: - ```bash squad externalize # state moves to ~/.squad/projects/{repo}/ squad internalize # move it back ``` - ### 9. Self-Upgrade (`squad upgrade --self`) - Update the CLI itself from within squad: - ```bash squad upgrade --self # latest stable squad upgrade --self --insider # latest prerelease ``` - Auto-detects npm/pnpm/yarn. After CLI upgrade, automatically runs repo upgrade to apply new templates. - ### 10. Triage Label Slug Fix - Multi-word agent names (like "Steve Rogers") now correctly generate `squad:steve-rogers` labels instead of `squad:steve rogers` (which GitHub rejects). Labels are pre-created at watch startup. - ## Bug Fixes - - **PR contamination** — Scribe now stages only `.squad/` files, not broad staging commands (#783) - **Outdated review threads** — PR readiness check ignores threads where code changed (#780) - **Filename uniqueness** — `scratchFile()` uses monotonic counter, not just `Date.now()` - **Cross-platform paths** — `deriveProjectKey()` handles Windows paths on Linux CI - ## By the Numbers - - **10 features** across SDK, CLI, and watch capabilities - **4 bug fixes** (1 critical — PR contamination) - **60+ new tests** (scratch: 8, cleanup: 12, fact-checker: 8, skills: 5, external: 12, self-upgrade: 4, notify: 5, triage: 12) - **4 new feature docs** + 2 updated + README command table (15→17) - **0 breaking changes** — all opt-in, all backward-compatible - **1 behavioral change** — notify default flipped from `all` to `important` (use `--notify-level all` to restore) - ## Upgrade - ```bash squad upgrade --self # get the latest CLI squad upgrade # apply new templates to your repo ``` - Or if you're on insider: - ```bash squad upgrade --self --insider ``` diff --git a/docs/src/content/blog/031-state-backends.md b/docs/src/content/blog/031-state-backends.md index bc7012313..3cc0e0e9b 100644 --- a/docs/src/content/blog/031-state-backends.md +++ b/docs/src/content/blog/031-state-backends.md @@ -6,98 +6,65 @@ tags: [squad, state-backends, git-notes, orphan-branch, two-layer, architecture] status: draft hero: "Squad now supports 4 state backends that keep .squad/ files out of your PRs. Choose local, orphan branch, or the two-layer architecture from the blog." --- - # State Backends — Keep Your PRs Clean - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - > _Squad now supports 4 state backends. Your PRs stay clean — just code._ - ## The Problem - Every time an agent makes a decision, writes to history, or logs a session, those changes end up as `.squad/` file modifications in your working branch. Open a PR and your reviewer sees 57 code changes buried under 40 decision logs, agent history entries, and session files. - Two completely different workflows sharing one branch: - **Code** → slow, human-gated, needs review approval -- **Squad state** → fast, autonomous, no human ever needs to review it - +- **Squad state** → fast, background-managed, no human review needed for routine updates ## The Fix - PR #1004 adds state backend support. One line in your config, and all mutable state goes somewhere else. - ```bash # The fastest path to clean PRs: squad init --state-backend two-layer ``` - ## Four Options - | Backend | Where state goes | PRs clean? | Setup | |---------|-----------------|------------|-------| | `local` | Working branch (default) | ❌ | Zero config | | `orphan` | `squad-state` branch | ✅ | Config + branch | | `two-layer` | Notes + orphan combined | ✅ | `--state-backend two-layer` | - ## The Two-Layer Architecture - The `two-layer` option implements the architecture from [Tamir's blog post](https://www.tamirdresher.com/blog/2026/03/23/scaling-ai-part7b-git-notes): - - **Layer 1 (git notes):** Thin commit-scoped "why" annotations. Invisible in PR diffs. Attached to specific commits. - **Layer 2 (orphan branch):** Permanent state store. Decisions, histories, logs. The team's full diary. - **Ralph bridges the layers:** After a PR merges, Ralph promotes notes with `promote_to_permanent: true` to the orphan branch. Notes on rejected PRs are silently ignored. - This handles the three scenarios from the blog correctly: 1. **Rejected feature** — decision on a rejected PR is NOT promoted ✅ 2. **Universal truth** — routing change flagged with `promote_to_permanent` survives ✅ 3. **Valuable failure** — research flagged with `archive_on_close` is preserved ✅ - ## How It Works (Under the Hood) - The Squad coordinator (`squad.agent.md`) detects `stateBackend` from `.squad/config.json` at session start and adapts every agent spawn prompt: - - **Agents** receive backend-specific instructions for reading and writing state - **Scribe** receives backend-specific commit targets (orphan branch, note refs, or working branch) - **State Leak Guard** catches if an agent accidentally stages state files on the working branch - Static config (charters, team.md, routing.md) always stays on disk. Only mutable state (decisions, history, logs) moves to the configured backend. - ## Quick Start - ```bash # New project — set backend at init time squad init --state-backend two-layer - # Existing project — migrate with one config change # Edit .squad/config.json → add "stateBackend": "two-layer" git add .squad/config.json && git commit -m "config: use two-layer" ``` - For the full migration guide and troubleshooting, see the [State Backends feature docs](/docs/features/state-backends/). - ## Tested With Real Squads - We ran 12 E2E tests with real squad sessions — real team casting (Usual Suspects, Firefly universes), real agent spawns, real decisions recorded. All evidence is in [PR #1004](https://github.com/bradygaster/squad/pull/1004). - Key proof: - Git notes with `promote_to_permanent: true` written by agents ✅ - Orphan branch receives state commits from Scribe ✅ - Feature branch PRs show ONLY code changes, zero `.squad/` state ✅ - State persists across branch switches ✅ - ## Try It - Build from the PR branch: - ```bash git clone https://github.com/bradygaster/squad.git cd squad && git checkout feat/state-backend-global-996 npm install && npm run build ``` - Then init any repo with your preferred backend: - ```bash node /packages/squad-cli/dist/cli-entry.js init --state-backend two-layer ``` - We'd love feedback — especially on whether the init flow feels right and whether state actually persists across branch switches in your environment. diff --git a/docs/src/content/blog/watch-monitor.md b/docs/src/content/blog/watch-monitor.md index 2f4383302..3f3dc0b17 100644 --- a/docs/src/content/blog/watch-monitor.md +++ b/docs/src/content/blog/watch-monitor.md @@ -4,40 +4,25 @@ date: 2026-03-30 author: "Brady" tags: [squad, ralph, watch, work-monitor, automation, execution] status: published -hero: "Ralph started as a label router that idled when the board was clear. Now he spawns Copilot sessions and actually does the work." +hero: "Ralph started as a label router that idled when the board was clear. Now he spawns Copilot sessions and moves approved work forward." --- - # From Triage Bot to Work Monitor - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -> _Ralph started as a label router that idled when the board was clear. Now he spawns Copilot sessions and actually does the work._ - +> _Ralph started as a label router that idled when the board was clear. Now he spawns Copilot sessions and moves approved work forward._ ## What Changed - `squad watch` used to be a simple triage loop. Every N minutes, it would: - 1. Fetch open issues with the `squad` label 2. Route them to team members based on routing rules 3. Add labels like `squad:eecom` or `squad:gnc` 4. Idle when done - -That's it. Ralph was a label router — useful, but not autonomous. When the board cleared, Ralph just sat there waiting for humans to do the work. - +That's it. Ralph was a label router — useful, but limited. When the board cleared, Ralph just sat there waiting for humans to move the work forward. **Issue #708 changed that.** - With `--execute`, Ralph transforms from a triage bot into a full work monitor. He doesn't just label issues — he spawns Copilot CLI sessions to actually work on them. - ```bash squad watch --execute --interval 15 ``` - This one flag turns Ralph from a watcher into a worker. - ## The Unix Pipe Philosophy - The design follows the Unix philosophy: each flag is a composable feature that does one thing well. - - `--execute` → spawn Copilot sessions for actionable issues - `--monitor-teams` → scan Teams messages via WorkIQ - `--monitor-email` → scan email for alerts and action items @@ -46,26 +31,20 @@ The design follows the Unix philosophy: each flag is a composable feature that d - `--wave-dispatch` → parallel sub-task execution within issues - `--retro` → enforce retrospective checks (Fridays or >7 days) - `--decision-hygiene` → auto-merge decision inbox when >5 files - Each flag is opt-in. Existing `squad watch` behavior is unchanged — triage only, no execution. - ## Code Example: From Simple to Full Monitor - **Basic triage (original behavior):** ```bash squad watch ``` - **Add execution:** ```bash squad watch --execute ``` - **Add project board lifecycle:** ```bash squad watch --execute --board ``` - **Full monitor with all features:** ```bash squad watch --execute \ @@ -77,11 +56,8 @@ squad watch --execute \ --max-concurrent 2 \ --interval 15 ``` - ## Architecture of a Round - When all features are enabled, each round follows this cycle: - 1. **Self-pull**: `git fetch && git pull --ff-only` to stay current 2. **Scan**: Fetch open issues (two-pass if enabled: lightweight list → hydrate actionable only) 3. **Triage**: Label untriaged issues based on routing rules @@ -102,9 +78,7 @@ When all features are enabled, each round follows this cycle: - Retro: if it's Friday after 14:00 UTC or last retro was >7 days ago, spawn a retro session - Decision hygiene: if `.squad/decisions/inbox/` has >5 files, spawn a merge session 8. **Report**: Log round summary, sleep until next interval - ## Example Execution Output - ``` 🔄 Ralph — Round 1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ @@ -114,13 +88,10 @@ When all features are enabled, each round follows this cycle: ▶ [14:23:10] Executing #42 "Fix auth redirect bug" → gh copilot --message "Work on issue #42..." ✓ [14:25:43] #42 completed - ▶ [14:25:44] Executing #45 "Add retry logic" → gh copilot --message "Work on issue #45..." ✓ [14:28:20] #45 completed - ▶ [14:28:21] Executing #47 "Update docs" → gh copilot --message "Work on issue #47..." ✗ [14:58:21] #47 failed: Timed out after 30m - [14:58:22] Two-pass: 15 total → 3 actionable (hydrated) [14:58:23] Board: #42 → Done [14:58:23] Board: #45 → Done @@ -128,47 +99,31 @@ When all features are enabled, each round follows this cycle: ✓ [14:58:25] Email monitor scan complete [14:58:26] Round 1 complete — sleeping 15 minutes ``` - ## The Three Layers of Ralph - | Layer | When | How | |-------|------|-----| | **In-session** | You're at the keyboard | "Ralph, go" — active loop while work exists | | **Local watchdog** | You're away but machine is on | `squad watch --execute` | -| **Cloud heartbeat** | Fully unattended | `squad-heartbeat.yml` GitHub Actions events | - -The in-session loop is ephemeral — it lives only while the Copilot session is active. The local watchdog runs as a separate process and polls at your chosen interval. The cloud heartbeat is the fully unattended layer that triggers on GitHub events (issue close, PR merge, manual dispatch). - +| **Cloud heartbeat** | Event-driven | `squad-heartbeat.yml` GitHub Actions events | +The in-session loop is ephemeral — it lives only while the Copilot session is active. The local watchdog runs as a separate process and polls at your chosen interval. The cloud heartbeat is the event-driven layer that triggers on GitHub events (issue close, PR merge, manual dispatch). ## Why This Matters - Before #708, Ralph was a coordinator. He routed work to team members but never picked up the tools himself. - Now, Ralph is a worker. He claims issues, posts comments, spawns Copilot sessions, manages the project board, scans Teams and email, and enforces governance checks. - This closes the gap between "someone triaged this issue" and "someone is working on this issue." Ralph doesn't just label the work — he starts it. - ## What's Next - The next iteration will add: - - **SubSquad discovery**: automatically detect `.squad/subsquads/` for routing across multiple codebases - **Channel routing**: route notifications to specific Teams channels based on work type - **Multi-machine coordination**: distribute work across multiple Ralph instances - Ralph is no longer just a triage bot. He's a work monitor — and he's just getting started. - --- - **Try it:** ```bash squad watch --execute --interval 15 ``` - **Full feature set:** ```bash squad watch --execute --board --two-pass --monitor-teams --retro --decision-hygiene --max-concurrent 2 ``` - --- - _PR #709 · Issue #708 · Shipped 2026-03-30_ diff --git a/docs/src/content/docs/concepts/architecture.md b/docs/src/content/docs/concepts/architecture.md index b0ea9739d..f3c06fe58 100644 --- a/docs/src/content/docs/concepts/architecture.md +++ b/docs/src/content/docs/concepts/architecture.md @@ -1,71 +1,45 @@ -# Architecture - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -How Squad works — one page, no handwaving. - ---- - -## System diagram - -``` -User request - ↓ -Coordinator (routing engine) - ↓ -Spawns agents in parallel - ↓ -Agents read memory (.squad/) → work → write results - ↓ -Scribe merges decisions, Ralph tracks issues - ↓ -Results returned to user -``` - ---- - -## Components - -### Coordinator - -The coordinator is Squad's routing engine. It reads your request, checks routing rules in `.squad/routing.md`, and decides which agents to spawn. If you say "team," it decomposes the work and launches multiple agents in parallel. If you name an agent, it routes directly to them. - -### Agents - -Each agent is a specialist with a charter, role, and persistent memory. Agents are spawned as independent subprocesses with their own context windows and tools. They read `.squad/decisions.md` and their own history before working, then write results back. Agents never see each other's conversations — the coordinator orchestrates coordination. - -### Memory (.squad/) - -All team state lives in `.squad/`. This includes the roster (`team.md`), routing rules (`routing.md`), decisions (`decisions.md`), agent charters and histories (`agents/`), and ceremony schedules (`ceremonies.md`). Agents read this before every spawn. You own these files — edit them anytime. - -### Routing - -Routing rules in `.squad/routing.md` define which agent handles which work. The coordinator reads these rules before spawning. You can override routing by naming an agent directly in your request. - -### Scribe - -The Scribe is a silent agent that tracks decisions and logs sessions. Every team has a Scribe. You never talk to them directly — they work in the background, merging decisions from all agents into `.squad/decisions.md`. - -### Ralph - -Ralph is the work monitor. He watches your GitHub or GitLab issues, tracks work in progress, and alerts the team when something is ready. Every team has a Ralph. He's silent unless you ask him for status. - ---- - -## What happens when you say "Team, build X"? - -1. **Coordinator reads the request** and checks `.squad/routing.md` for decomposition rules. -2. **Coordinator spawns multiple agents in parallel** — one for frontend, one for backend, one for tests, etc. -3. **Each agent reads `.squad/decisions.md`** and their own history (`agents/{name}/history.md`), then works independently. -4. **Agents write results** to their history files and propose decisions. -5. **Scribe merges all decisions** into `.squad/decisions.md`. -6. **Coordinator returns labeled results** to you, tagged with each agent's name. - ---- - -## Learn more - -- [**Your Team**](./your-team.md) — How agents form, specialize, and work together -- [**Work routing**](../features/routing.md) — How the coordinator decides which agents to spawn -- [**Memory and knowledge**](memory-and-knowledge.md) — How decisions, skills, and history persist -- [**Parallel work**](parallel-work.md) — How agents work simultaneously without conflicts +# Architecture +How Squad works — one page, no handwaving. +--- +## System diagram +``` +User request + ↓ +Coordinator (routing engine) + ↓ +Spawns agents in parallel + ↓ +Agents read memory (.squad/) → work → write results + ↓ +Scribe merges decisions, Ralph tracks issues + ↓ +Results returned to user +``` +--- +## Components +### Coordinator +The coordinator is Squad's routing engine. It reads your request, checks routing rules in `.squad/routing.md`, and decides which agents to spawn. If you say "team," it decomposes the work and launches multiple agents in parallel. If you name an agent, it routes directly to them. +### Agents +Each agent is a specialist with a charter, role, and persistent memory. Agents are spawned as independent subprocesses with their own context windows and tools. They read `.squad/decisions.md` and their own history before working, then write results back. Agents never see each other's conversations — the coordinator orchestrates coordination. +### Memory (.squad/) +All team state lives in `.squad/`. This includes the roster (`team.md`), routing rules (`routing.md`), decisions (`decisions.md`), agent charters and histories (`agents/`), and ceremony schedules (`ceremonies.md`). Agents read this before every spawn. You own these files — edit them anytime. +### Routing +Routing rules in `.squad/routing.md` define which agent handles which work. The coordinator reads these rules before spawning. You can override routing by naming an agent directly in your request. +### Scribe +The Scribe is a silent agent that tracks decisions and logs sessions. Every team has a Scribe. You never talk to them directly — they work in the background, merging decisions from all agents into `.squad/decisions.md`. +### Ralph +Ralph is the work monitor. He watches your GitHub or GitLab issues, tracks work in progress, and alerts the team when something is ready. Every team has a Ralph. He's silent unless you ask him for status. +--- +## What happens when you say "Team, build X"? +1. **Coordinator reads the request** and checks `.squad/routing.md` for decomposition rules. +2. **Coordinator spawns multiple agents in parallel** — one for frontend, one for backend, one for tests, etc. +3. **Each agent reads `.squad/decisions.md`** and their own history (`agents/{name}/history.md`), then works independently. +4. **Agents write results** to their history files and propose decisions. +5. **Scribe merges all decisions** into `.squad/decisions.md`. +6. **Coordinator returns labeled results** to you, tagged with each agent's name. +--- +## Learn more +- [**Your Team**](./your-team.md) — How agents form, specialize, and work together +- [**Work routing**](../features/routing.md) — How the coordinator decides which agents to spawn +- [**Memory and knowledge**](memory-and-knowledge.md) — How decisions, skills, and history persist +- [**Parallel work**](parallel-work.md) — How agents work simultaneously without conflicts diff --git a/docs/src/content/docs/concepts/github-workflow.md b/docs/src/content/docs/concepts/github-workflow.md index 5bbd4826e..d59df6129 100644 --- a/docs/src/content/docs/concepts/github-workflow.md +++ b/docs/src/content/docs/concepts/github-workflow.md @@ -1,38 +1,24 @@ # GitHub Integration - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Squad plugs directly into your GitHub workflow — issues become branches, branches become PRs, PRs become merged code. No context-switching, no copy-paste, no ticket juggling. Just tell your squad what to build and watch the commits roll in. - --- - ## Try This - ``` Connect to myorg/myrepo and show me the backlog ``` - ``` Work on issue #42 ``` - ``` Ralph, go — process the backlog until it's clear ``` - --- - ## How It Works - The lifecycle is simple: **connect → backlog → work → PR → merge**. - ``` Connect repo → Show backlog → Assign issues → Agent branches + implements ↓ Merge PR ← Review feedback ← Agent opens PR ``` - | You say | What happens | |---------|-------------| | `"Connect to myorg/myrepo"` | Stores issue source in `team.md` (once per project) | @@ -42,19 +28,12 @@ Connect repo → Show backlog → Assign issues → Agent branches + imple | `"There's review feedback on PR #24"` | Author agent reads comments and pushes fixes | | `"Merge PR #24"` | Squash-merge, delete branch, close linked issue | | `"What's left?"` | Refreshes backlog, shows remaining open issues | - **Prerequisite:** Install and authenticate the `gh` CLI (`gh auth login`). Squad uses it for all GitHub operations. - --- - ## Working with your team - Squad is built for mixed teams — humans set direction, AI agents execute and report back. The Lead agent bridges between them, routing work and surfacing decisions when a human needs to act. - ### Humans in the lifecycle - The following is **one example** of how a mixed team might divide responsibilities. Your team decides its own process — use [ceremonies](../features/ceremonies.md) and [directives](../features/human-team-members.md) to shape the workflow that fits. - | Stage | Who acts | What happens | |-------|----------|--------------| | **Triage** | Human (or Lead) | Applies `go:yes` / `go:no` — decides what's worth building | @@ -62,21 +41,13 @@ The following is **one example** of how a mixed team might divide responsibiliti | **Implementation** | AI agents | Branch, build, test, open PRs — no human input required | | **PR review** | Human | Reviews and approves (or requests changes); lockout protocol prevents conflicting edits | | **Merge** | Human or Ralph | Squash-merge, branch cleanup, issue closed | - This is a starting point. Define your own checkpoints by configuring [ceremonies](../features/ceremonies.md) and capturing [directives](../features/human-team-members.md). - For details on how work routes to humans, see [Human team members](../features/human-team-members.md). - For ceremony details, see [Ceremonies](../features/ceremonies.md). - For agent anatomy and how each team member (AI, human, @copilot) is structured, see [your-team.md](your-team.md). - --- - ## Label Taxonomy - Labels aren't just tags — they're Squad's **state machine**. Five namespaces drive workflow automation, routing, and lifecycle tracking. - | Namespace | Purpose | Example Values | Mutual Exclusivity | |-----------|---------|----------------|-------------------| | `go:` | Verdict | `go:yes`, `go:no`, `go:needs-research` | ✅ One per issue | @@ -84,20 +55,14 @@ Labels aren't just tags — they're Squad's **state machine**. Five namespaces d | `type:` | Issue category | `type:feature`, `type:bug`, `type:spike`, `type:docs`, `type:chore`, `type:epic` | ✅ One per issue | | `priority:` | Urgency | `priority:p0`, `priority:p1`, `priority:p2` | ✅ One per issue | | `squad:{member}` | Agent assignment | `squad:fenster`, `squad:hockney` | ❌ Multiple OK (pair work) | - Within `go:`, `release:`, `type:`, and `priority:`, applying a second label **auto-removes** the first. The `squad:{member}` namespace allows multiple labels for collaborative work. - ### How Labels Drive Automation - Labels power four automation layers: - 1. **Enforcement** — `label-enforcement.yml` watches for changes and removes duplicates within a namespace. 2. **Sync** — Cross-namespace cascading: `go:no` → auto-adds `release:backlog`; `priority:p0` → ensures `go:yes`. 3. **Triage** — Ralph uses labels to route work: `squad:fenster` → Fenster picks it up; no `squad:*` + `type:bug` → routes based on `routing.md`. 4. **Heartbeat** — `squad-heartbeat.yml` runs every 30 minutes, auto-triaging unassigned issues and escalating stale research. - ### State Machine Flow - ```mermaid graph TD A["New issue"] --> B["squad label"] @@ -112,30 +77,21 @@ graph TD I -->|No| H J --> K["Issue closed"] ``` - Labels are created automatically during `init` or `upgrade`. Add custom labels with: - ```bash gh label create "squad:designer" --color "0366d6" --description "Work assigned to Designer" ``` - --- - ## Ralph — Work Monitor - Ralph is a built-in squad member who tracks the work queue, monitors CI status, and keeps the team moving. He's always on the roster — no casting required. - ### Talking to Ralph - | You say | What happens | |---------|-------------| | `"Ralph, go"` | Activates the self-chaining work loop | | `"Ralph, status"` | Single check cycle, reports board state | | `"Ralph, idle"` | Stops the loop | | `"Ralph, scope: just issues"` | Monitors only issues, skips PRs/CI | - ### What Ralph Monitors - | Signal | Action | |--------|--------| | Untriaged issues (no `squad:{member}` label) | Lead triages and assigns | @@ -144,45 +100,30 @@ Ralph is a built-in squad member who tracks the work queue, monitors CI status, | Review feedback on PRs | Routes to author agent | | CI failures | Notifies agent to fix | | Approved PRs | Merges and closes issue | - Ralph **never stops on his own while work remains** — he keeps cycling until the board clears, you say "idle", or the session ends. Every 3–5 rounds he posts a status update and keeps going. - ### Three Layers of Ralph - | Layer | When | How | |-------|------|-----| | **In-session** | You're at the keyboard | `"Ralph, go"` — active loop | | **Local watchdog** | You're AFK but machine is on | `squad watch --interval 10` | -| **Cloud heartbeat** | Fully unattended | `squad-heartbeat.yml` GitHub Actions events | - +| **Cloud heartbeat** | Event-driven | `squad-heartbeat.yml` GitHub Actions events | The heartbeat workflow (`squad-heartbeat.yml`) is installed during `init` or `upgrade`. It runs on issue close, PR merge, and manual dispatch. Edit the workflow in `.github/workflows/squad-heartbeat.yml` to customize triggers. For periodic polling without events, use `squad watch` locally. - **PAT requirement:** Ralph needs `gh` CLI authenticated with a Classic PAT (scopes: `repo` and `project`). The default `GITHUB_TOKEN` doesn't have sufficient scopes. - --- - ## PRD Mode - Got a product spec? Hand it to Squad and the Lead decomposes it into prioritized, dependency-tracked work items. - ``` Read the PRD at docs/product-spec.md and break it into work items ``` - The Lead agent: 1. Decomposes the spec into discrete work items (WI-1, WI-2, etc.) 2. Assigns priorities: P0 (must-have), P1 (important), P2 (nice-to-have) 3. Routes items to agents based on domain expertise 4. Tracks dependencies — won't start WI-4 if it depends on WI-2 - Independent items run in parallel. When requirements change, give Squad the updated PRD — the Lead diffs against existing items and adjusts the backlog without undoing completed work. - --- - ## Project Boards - Squad integrates with GitHub Projects V2 for visual workflow tracking. **Labels are the source of truth** — boards are one-way projections that visualize the state machine. - | Board Column | Label State | |--------------|-------------| | **Backlog** | `go:no` or `release:backlog` | @@ -190,85 +131,55 @@ Squad integrates with GitHub Projects V2 for visual workflow tracking. **Labels | **Ready** | `go:yes`, no `squad:*` | | **In Progress** | `go:yes` + `squad:{member}` | | **Done** | Issue closed | - Board sync runs on label changes, issue close, PR merge, and a 30-minute schedule. Dragging an issue on the board triggers a webhook that applies the corresponding label. - **Status:** Label-based state machine is fully implemented. Automated board sync workflows are in development for v0.4.0. You can use `gh project` commands now — full automation is coming. - --- - ## Notifications - Your squad pings you when they need input, hit an error, or finish work. Squad uses MCP-based notification servers — you bring your own delivery channel. - See the [Notifications Guide](../features/notifications.md) for [platform setup](../features/notifications.md#quick-start-teams-simplest-path) (Teams, Discord, iMessage, webhooks), [trigger configuration](../features/notifications.md#what-triggers-a-notification), and [sample MCP configs](../features/notifications.md#sample-mcp-configs). - --- - ## Tips - - You don't need to assign issues to agents — Squad routes based on domain expertise defined in charters and `routing.md`. - If `gh` isn't authenticated, Squad will tell you. Run `gh auth login` first. - Use `priority:p0` to fast-track critical items — it auto-sets `go:yes`. - Combine PRD mode with GitHub Issues to auto-create issues from work items. - Ralph's in-session loop is session-scoped — state resets between sessions. Use `squad watch` or the heartbeat for persistent monitoring. - --- - ## Sample Prompts - ``` connect to bradygaster/squad and show me the backlog ``` - Links Squad to a GitHub repo and displays all open issues. - ``` work on all issues labeled "bug" ``` - Processes multiple bug issues in parallel — each gets its own branch and PR. - ``` mark issue #42 as approved for v0.4.0 ``` - Applies `go:yes` and `release:v0.4.0` labels, removing any conflicting labels. - ``` Ralph, go — start monitoring and process the backlog until it's clear ``` - Activates Ralph's self-chaining loop to continuously triage, assign, and process work. - ``` read the PRD at docs/product-spec.md and break it into work items ``` - Ingests a product spec and creates a prioritized, dependency-tracked backlog. - ``` there's review feedback on PR #24 ``` - The author agent reads review comments and pushes fixes to the existing branch. - ``` list all p0 features approved for the next release ``` - Queries issues with `priority:p0 + type:feature + go:yes + release:{current milestone}`. - ``` squad watch --interval 5 ``` - Starts persistent local polling — checks GitHub every 5 minutes for new work and triages automatically. - --- - ## See Also - - [Your Team](./your-team.md) — How work routes to the right agent - [Work Routing](../features/routing.md) — Domain routing and agent assignment - [Parallel Work](./parallel-work.md) — Multi-agent parallel execution on batched issues diff --git a/docs/src/content/docs/concepts/memory-and-knowledge.md b/docs/src/content/docs/concepts/memory-and-knowledge.md index f5fda72c6..5d58a8ac8 100644 --- a/docs/src/content/docs/concepts/memory-and-knowledge.md +++ b/docs/src/content/docs/concepts/memory-and-knowledge.md @@ -1,32 +1,19 @@ # Memory & Knowledge - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Squad remembers everything — coding conventions, architecture decisions, deployment patterns, your personal preferences. Memory grows with every session, compounding across three layers so agents stop making the same mistakes and start anticipating your needs. - --- - ## Try This - ``` Always use single quotes in TypeScript ``` - ``` What decisions has the team made about testing strategy? ``` - ``` Show me what skills this team has learned ``` - --- - ## How It Works - Memory lives in three layers, each serving a different purpose: - ```mermaid graph TD A["Skills Layer
.copilot/skills/{name}/SKILL.md
Reusable patterns • Portable
How to set up CI with GitHub Actions"] @@ -36,49 +23,30 @@ graph TD A --> B B --> C ``` - ### How Memory Compounds - | Stage | What Agents Know | |-------|-----------------| | 🌱 First session | Project description, tech stack, your name | | 🌿 After a few sessions | Conventions, component patterns, API design, test strategies | | 🌳 Mature project | Full architecture, tech debt map, regression patterns, performance conventions | - The first session is always the least capable. Give the team a few sessions to build up context — they'll stop asking questions they've already answered. - --- - ## Personal Memory: `history.md` - Each agent has its own history file at `.squad/agents/{name}/history.md`. After every session, agents append what they learned — architecture decisions, conventions, file paths, user preferences. - **Only that agent reads its own history.** This means each team member builds specialized knowledge about their domain. Kane learns the auth system inside and out. Dallas masters the component library. Lambert memorizes the test infrastructure. - ### Progressive Summarization - When an agent's `history.md` exceeds ~12KB, older entries get archived into a summary section. Recent entries stay detailed; older entries condense. This keeps files within a useful context budget without losing accumulated knowledge. - --- - ## Shared Decisions: `decisions.md` - Team-wide decisions live in `.squad/decisions.md`. **Every agent reads this before working.** This is the team's shared brain. - Decisions get captured three ways: - 1. **From agent work** — agents write to `.squad/decisions/inbox/{agent-name}-{slug}.md` 2. **From your directives** — when you say "always…" or "never…" (see below) 3. **From Scribe merges** — the Scribe agent periodically consolidates inbox files into the canonical `decisions.md`, deduplicating overlapping entries - ### Decision Archiving - As your project grows, `decisions.md` accumulates hundreds of blocks. Stale sprint artifacts and one-time planning fragments consume context without adding value. When this happens, old decisions archive to `.squad/decisions-archive.md` — preserved for reference but no longer loaded into agent context. - Active decisions (ongoing policies, user preferences, current architecture) stay in `decisions.md`. Agents always read the lean, current shared brain. - ### Memory Architecture - ``` .squad/ ├── decisions.md # Shared — all agents read @@ -97,17 +65,11 @@ Active decisions (ongoing policies, user preferences, current architecture) stay ├── squad-conventions/SKILL.md # Starter skill └── ci-github-actions/SKILL.md # Earned skill ``` - --- - ## Directives - Directives are team rules that persist across sessions. Say "always" or "never" and Squad captures it permanently. Every agent reads directives before working. - ### Signal Word Detection - The coordinator listens for these phrases and captures them as directives: - | Phrase | Example | |--------|---------| | `"always"` | "Always use TypeScript strict mode" | @@ -116,9 +78,7 @@ The coordinator listens for these phrases and captures them as directives: | `"remember to"` | "Remember to run tests before pushing" | | `"don't"` | "Don't use var — only let and const" | | `"make sure to"` | "Make sure to document all public APIs" | - ### Capture Flow - ```mermaid sequenceDiagram participant You @@ -134,86 +94,55 @@ sequenceDiagram Scribe->>Agents: decisions.md updated Agents->>Agents: All agents read before next task ``` - ### Directive Scope - Directives can shape: - - **Coding style** — formatting, naming conventions, language features - **Tool preferences** — linters, formatters, test runners - **Workflow rules** — branch naming, commit messages, PR templates - **Scope constraints** — "Don't touch legacy/ directory" - **Review requirements** — "Always have Lead review security changes" - ### Directive Conflicts - When a new directive contradicts an existing one, the Scribe detects the overlap and asks you: "Replace, merge, or skip?" You decide, and `decisions.md` updates accordingly. - ### Viewing and Removing Directives - ``` Show me the team directives ``` - ``` What's our rule on testing? ``` - ``` Remove the no-Friday-deploy rule ``` - You can also edit `.squad/decisions.md` directly — it's plain Markdown. - ### Compliance - Directives are context-aware guidelines, not hard constraints. If an agent violates one, the [reviewer protocol](your-team.md#reviewer-protocol) catches it during review, or you flag it directly. - --- - ## Skills - Skills are reusable knowledge files that live at `.copilot/skills/{skill-name}/SKILL.md`. Unlike decisions (project policies like "use PostgreSQL"), skills are transferable techniques ("how to set up CI with GitHub Actions"). - **All agents can read any skill.** Skills are team-wide knowledge, not per-agent. - ### Starter vs. Earned - | Type | Source | Example | |------|--------|---------| | **Starter** | Bundled at init, prefixed `squad-` | `squad-conventions` | | **Earned** | Written by agents from real work | `ci-github-actions` | - Starter skills are overwritten on upgrade. Earned skills are never touched. - ### Confidence Lifecycle - Earned skills have a confidence level reflecting how battle-tested they are: - | Level | Meaning | |-------|---------| | **Low** | First written — based on a single experience | | **Medium** | Applied successfully in multiple contexts | | **High** | Well-established, consistently reliable | - Confidence only goes up, never down. A skill that reaches `high` stays there. - ### How Skills Get Used - 1. **Before working** — agents read skill files relevant to the task 2. **During routing** — the coordinator checks skills when deciding who to spawn (an agent with a relevant earned skill may be preferred) 3. **After working** — agents may write new skills or update existing ones based on what they learned - ### Portability - Skills export and import with your team. Move a trained team to a new repo, and all their earned knowledge comes along. This makes skills the most portable form of [team](your-team.md) intelligence. - --- - ## Knowledge persistence - Not all knowledge in `.squad/` lasts forever. When files grow large, Squad compacts them to keep performance fast. Here's what persists and what gets summarized: - | What | File | Compacted? | Where old content goes | Who reads it | |------|------|-----------|----------------------|-------------| | Personal history | `history.md` | Yes, at ~12 KB | `history-archive.md` (preserved, read-only) | Owning agent | @@ -224,77 +153,51 @@ Not all knowledge in `.squad/` lasts forever. When files grow large, Squad compa | Casting registry | `casting/registry.json` | Never | Permanent | Coordinator | | Session logs | `log/*.md` | Never edited | Append-only archive | Read-only | | Orchestration logs | `orchestration-log/*.md` | Never edited | Append-only archive | Read-only | - Knowledge that needs to survive compaction belongs in **skills**. Reusable patterns, code conventions, and technical techniques live here because they grow without limits. Team rules and preferences go in directives (stored in `decisions.md`) — they persist through compaction cycles because directives are preserved when decisions get summarized. - > 💡 **Where to store permanent knowledge:** Put reusable patterns and techniques in `.copilot/skills/`. Put team rules and preferences in directives (they persist in `decisions.md`). For org-wide knowledge that multiple teams need, use [upstream inheritance](/features/upstream-inheritance) to share a skills library. - --- - ## Tips - - **Commit `.squad/`** — anyone who clones the repo gets the team with all their accumulated knowledge. - Directives ("always…", "never…") are the fastest way to shape team behavior. Use them liberally. - If an agent keeps making the same mistake, check `decisions.md` — the relevant convention might be missing. - You can edit `decisions.md`, `history.md`, and skill files directly. They're all plain Markdown. - Manually seed skills by pasting your existing conventions into a `SKILL.md` — instant team knowledge. - --- - ## Sample Prompts - ``` Always use Prettier with single quotes and no semicolons ``` - Creates a coding style directive all agents will follow. - ``` From now on, all commit messages must follow Conventional Commits format ``` - Sets a workflow directive — agents format commits as `feat:`, `fix:`, `docs:`, etc. - ``` What does Kane remember about the authentication system? ``` - Queries Kane's personal `history.md` for relevant context. - ``` Show me the team decisions about API design ``` - Searches `decisions.md` for a particular topic. - ``` Create a skill for our deployment process ``` - Manually creates a new skill file and guides you through documenting the pattern. - ``` Which skills have low confidence? ``` - Finds recently-created skills that haven't been validated across multiple contexts yet. - ``` Never use `any` type in TypeScript — always define explicit types ``` - Establishes a type safety directive. Agents will avoid `any` and use proper types. - ``` Search past decisions for database choices ``` - Finds historical decisions related to a specific topic or keyword. - --- - ## See Also - - [Your Team](./your-team.md) — How agents use shared memory to coordinate - [Architecture](./architecture.md) — How the coordinator and agents share state - [Parallel Work](./parallel-work.md) — How agents maintain consistency while working in parallel diff --git a/docs/src/content/docs/concepts/parallel-work.md b/docs/src/content/docs/concepts/parallel-work.md index 999a3fba7..f136d33e0 100644 --- a/docs/src/content/docs/concepts/parallel-work.md +++ b/docs/src/content/docs/concepts/parallel-work.md @@ -1,32 +1,19 @@ # Parallel Work & Models - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Squad launches independent work in parallel by default — multiple agents working simultaneously, no waiting. It also picks the right AI model for each agent based on what they're doing, so you get quality where it counts and speed everywhere else. - --- - ## Try This - ``` Have three agents work on this in parallel: UI mockups, API spec, and database schema ``` - ``` Use Sonnet for code, Haiku for everything else ``` - ``` Work on issues #12, #15, and #18 at the same time ``` - --- - ## How Parallel Execution Works - When the coordinator receives a multi-part task, it follows a fan-out pattern: - ```mermaid graph TD A["Coordinator
receives work"] @@ -50,36 +37,26 @@ graph TD G --> I H --> I ``` - 1. **Dependency Analysis** — Check if tasks have data dependencies (A needs output from B). 2. **Fan-Out** — Launch all independent agents in parallel using background mode. 3. **Wait** — Coordinator polls agent status until all complete. 4. **Collect** — Aggregate results, check for errors, route to next step. - ### Example - > "Implement user authentication: API endpoints, frontend form, tests, and documentation" - Coordinator spawns **4 agents in parallel**: - Backend → API endpoints - Frontend → Login/signup form - Tester → Integration tests - DevRel → Auth documentation - All work simultaneously. No agent waits unless there's a code dependency. - --- - ## Background vs. Sync - | Mode | When Used | Behavior | |------|-----------|----------| | **Background** | Independent work, no data dependencies | Agents run in parallel, coordinator polls for completion | | **Sync** | One agent needs another's output | Agents run sequentially, coordinator waits | | **Sync** | Reviewer gate (Lead must approve first) | Agent runs, coordinator waits for [review](your-team.md#reviewer-protocol) decision | - ### Background (Fan-Out) - ```mermaid graph LR A["Coordinator"] --> B["Agent 1
background"] @@ -92,11 +69,8 @@ graph LR F --> H G --> H ``` - Agents don't see each other's output until the coordinator collects and synthesizes. - ### Sync (Dependencies & Gates) - ```mermaid graph TD A["Coordinator"] --> B["Agent 1
sync"] @@ -107,60 +81,40 @@ graph TD F --> G["Coordinator"] G --> H["Reviewer
sync gates"] ``` - Each step blocks until the previous completes. - ### Eager Execution - Squad's default is **eager parallelism** — launch everything that can run, let the coordinator handle synchronization. - - **Faster throughput** — no artificial sequencing - **Better utilization** — multiple agents saturate available compute - **Resilient** — if one agent stalls, others keep working - Trade-off: increased API cost. If cost is a concern: - ``` Work sequentially to save costs ``` - ### Deadlock Avoidance - When agents have circular dependencies (A needs B, B needs A), the coordinator detects the cycle and asks you to pick a resolution: run A first, run B first, or redesign. - ### Concurrency Limits - - **Default:** 5 agents in parallel - **Adjustable:** `"Run at most 2 agents at once"` → Coordinator batches work accordingly - --- - ## Model Selection - Squad routes each agent to the right AI model based on what they're doing — not a one-size-fits-all default. - ### Selection Layers - First match wins: - | Layer | How It Works | |-------|-------------| | **1. User Override** | You said `"use opus"` or `"save costs"` — done, session-wide | | **2. Charter Preference** | Agent's charter has a `## Model` section | | **3. Task-Aware Auto** | Coordinator checks what the agent is actually doing (see table below) | | **4. Default** | `claude-haiku-4.5` — cost wins when in doubt | - ### Task-Aware Defaults - | Task Output | Model | Tier | |-------------|-------|------| | Writing code (implementation, refactoring, tests, bug fixes) | `claude-sonnet-4.5` | Standard | | Writing prompts or agent designs | `claude-sonnet-4.5` | Standard | | Non-code work (docs, planning, triage, changelogs) | `claude-haiku-4.5` | Fast | | Visual/design work requiring image analysis | `claude-opus-4.5` | Premium | - ### Role-to-Model Mapping - | Role | Default Model | Why | |------|--------------|-----| | Core Dev / Backend / Frontend | `claude-sonnet-4.5` | Writes code — quality first | @@ -171,201 +125,133 @@ First match wins: | Scribe / Logger | `claude-haiku-4.5` | Mechanical file ops | | Git / Release | `claude-haiku-4.5` | Changelogs, tags, version bumps | | Designer / Visual | `claude-opus-4.5` | Vision capability required | - ### Model Catalog (16 models) - Squad supports models across three tiers: - - **Premium:** claude-opus-4.6, claude-opus-4.6-fast, claude-opus-4.5 - **Standard:** claude-sonnet-4.5, gpt-5.2-codex, claude-sonnet-4, gpt-5.2, gpt-5.1-codex, gpt-5.1, gpt-5, gemini-3-pro-preview - **Fast/Cheap:** claude-haiku-4.5, gpt-5.1-codex-mini, gpt-4.1, gpt-5-mini, gpt-5.1-codex-mini - ### Fallback Chains - If a model is unavailable (plan restriction, rate limit, deprecation), Squad silently retries with the next in chain. Never falls back **up** in tier — a fast task won't land on a premium model. - ``` Premium: claude-opus-4.6 → claude-opus-4.6-fast → claude-opus-4.5 → claude-sonnet-4.5 Standard: claude-sonnet-4.5 → gpt-5.2-codex → claude-sonnet-4 → gpt-5.2 Fast: claude-haiku-4.5 → gpt-5.1-codex-mini → gpt-4.1 → gpt-5-mini ``` - --- - ## Copilot Coding Agent (@copilot) - -Add the GitHub Copilot coding agent to your Squad as an autonomous team member. It picks up issues, creates branches, and opens PRs — all without a chat session. - +Add the GitHub Copilot coding agent to your Squad as an async team member. It picks up approved issues, creates branches, and opens PRs in the background. ### Prerequisites - 1. **Copilot coding agent enabled** on the repo (Settings → Copilot → Coding agent) 2. **`copilot-setup-steps.yml`** exists in `.github/` 3. **GitHub Actions** enabled on the repo - ### Quick Start - ```bash # Add @copilot with auto-assign squad copilot --auto-assign - # Create a classic PAT (repo scope) and add as secret gh secret set COPILOT_ASSIGN_TOKEN - # Commit and push git add .github/ .squad/ && git commit -m "feat: add copilot to squad" && git push - # Test — label any issue with squad:copilot ``` - Or in conversation: `"Add copilot to the squad with auto-assign enabled"` - ### How @copilot Differs - | | AI Agent | Human Member | @copilot | |---|----------|-------------|----------| | Badge | ✅ Active | 👤 Human | 🤖 Coding Agent | | Charter | ✅ | ❌ | ❌ (uses `copilot-instructions.md`) | | Works in session | ✅ | ❌ | ❌ (async via issue assignment) | -| Creates PRs | Via session | Outside Squad | Autonomously | - +| Creates PRs | Via session | Outside Squad | In the background | ### Capability Profile - The profile in `team.md` controls what @copilot handles: - | Tier | Meaning | Examples | |------|---------|----------| | 🟢 **Good fit** | Route automatically | Bug fixes, test coverage, lint fixes, dependency updates, small features, docs | | 🟡 **Needs review** | Route but flag for review | Medium features with specs, refactoring with tests, API additions | | 🔴 **Not suitable** | Route to a squad member | Architecture, multi-system design, security-critical, ambiguous requirements | - ### Auto-Assign Flow - When the `squad:copilot` label is added to an issue: 1. Workflow posts a routing comment 2. Workflow assigns `copilot-swe-agent[bot]` to the issue 3. Coding agent creates a `copilot/*` branch and opens a draft PR - Auto-assign requires a classic PAT stored as `COPILOT_ASSIGN_TOKEN` (fine-grained PATs return 403 for this endpoint). - --- - ## Git Worktrees - Squad supports git worktrees with two strategies for teams working across multiple branches simultaneously. - ### Worktree-Local (Independent State) - Each worktree gets its own `.squad/` directory. Agents in one worktree don't see state from another. - ``` project/ ├── .squad/ # Main worktree team - project-feature-a/ ├── .squad/ # Feature A team (independent) - project-feature-b/ ├── .squad/ # Feature B team (independent) ``` - **Best for:** multiple features with different teams, experimental branches, different compositions per worktree. - ### Main-Checkout (Shared State) - All worktrees share `.squad/` from the main checkout via symlink. - ``` project/ ├── .squad/ # Shared by all worktrees - project-feature-a/ ├── .squad -> ../project/.squad/ # Symlink - project-feature-b/ ├── .squad -> ../project/.squad/ # Symlink ``` - **Best for:** same team on multiple branches, coordinated parallel development, solo dev with multiple branches. - ### Which Strategy? - | Scenario | Strategy | |----------|----------| | Parallel features, same team | Main-checkout | | Experimental branch, isolated team | Worktree-local | | Hotfix + feature branch | Main-checkout | | Multiple teams in same repo | Worktree-local | - Setup is one command: `"Use the main worktree's team"` (creates symlink) or `"Initialize Squad in this worktree"` (creates independent `.squad/`). - Squad uses `merge=union` for append-only log files to avoid conflicts across worktrees. - --- - ## Tips - - Eager parallelism is the default. Only switch to sequential if cost is a real concern. - Start conservative with @copilot's capability profile and expand as you see what it handles well. -- Use `squad:copilot` labels with [issue-driven development](../scenarios/issue-driven-dev.md) for fully autonomous processing. +- Use `squad:copilot` labels with [issue-driven development](../scenarios/issue-driven-dev.md) for background processing with review gates. - Fallback chains are silent — you won't notice model switches unless you ask `"what model did Kane use?"`. - For worktrees, main-checkout is usually the right choice unless you need truly isolated teams. - --- - ## Sample Prompts - ``` Build the new dashboard feature — everyone work in parallel ``` - Coordinator spawns all relevant agents (Frontend, Backend, Tester, DevRel) simultaneously. - ``` Work on issues #12, #15, and #18 at the same time ``` - Spawns 3 agents in parallel, one per issue. - ``` Implement the API first, then write tests — do it sequentially ``` - Forces sync mode: Backend completes, then Tester starts. - ``` Run at most 2 agents at once to save costs ``` - Sets concurrency limit. Coordinator batches work in groups. - ``` Use opus for this architecture work ``` - One-off override to premium model for a high-stakes task. - ``` Always use haiku to save costs ``` - Session-wide preference for the cheapest model tier. - ``` Add copilot to the squad with auto-assign enabled ``` - Adds @copilot to the roster and configures automatic issue assignment. - ``` Use the main worktree's Squad team ``` - Creates a symlink so this worktree shares the main checkout's `.squad/` state. - --- - ## See Also - - [Your Team](./your-team.md) — How agents form and specialize for different roles - [Architecture](./architecture.md) — How the coordinator orchestrates parallel execution - [Memory & Knowledge](./memory-and-knowledge.md) — How agents share context across parallel work diff --git a/docs/src/content/docs/concepts/portability.md b/docs/src/content/docs/concepts/portability.md index c03ce760f..1411ebcc9 100644 --- a/docs/src/content/docs/concepts/portability.md +++ b/docs/src/content/docs/concepts/portability.md @@ -1,39 +1,25 @@ # Portability & Extensions - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Your squad isn't locked to one repo, one editor, or one set of tools. Export a trained team and import it somewhere else. Install plugins for instant expertise. Inherit org-wide practices from upstream repos. Wire up MCP servers so agents can talk to anything. - --- - ## Try This - ``` Export my team to a file — I want to use them on another project ``` - ``` Install the AWS deployment plugin ``` - ``` Add the platform team's repo as an upstream source ``` - --- - ## How It Works - Squad is designed to be **portable by default**. Four systems make this possible: - | System | What It Does | |--------|-------------| | **Export/Import** | Snapshot your entire team to a JSON file, import it anywhere | | **Plugins** | Install community bundles of agent templates, skills, and best practices | | **Upstream Inheritance** | Inherit skills, decisions, and routing from other repos | | **MCP Servers** | Extend agents with external services (GitHub, Trello, notifications) | - ``` Your Repo (.squad/) ↑ inherits from @@ -43,22 +29,15 @@ Plugins (community marketplaces) ↑ connected to MCP Servers (GitHub, Teams, Trello, Aspire, etc.) ``` - --- - ## Export & Import - Squad teams are portable. Export your trained agents, casting state, skills, and decisions to a single JSON file. - ### Export - ```bash squad export # creates squad-export.json squad export --out ./backups/team.json # custom path ``` - ### What's Included - | Data | Included | |------|----------| | Agent charters | ✅ | @@ -66,82 +45,54 @@ squad export --out ./backups/team.json # custom path | Casting state | ✅ | | Skills | ✅ All earned skills from `.copilot/skills/` | | Decisions | ✅ | - Skills are fully portable — they export and import with perfect fidelity. - ### Import - ```bash squad import squad-export.json ``` - If `.squad/` already exists, Squad warns you and stops. Use `--force` to archive the existing team and replace it: - ```bash squad import squad-export.json --force ``` - Nothing is deleted — the current team moves to an archive. - ### History Splitting - During import, agent histories are split into: - - **Portable knowledge** — general patterns and conventions that transfer across projects - **Project-specific learnings** — context-tagged entries tied to the original repo - Imported agents bring their skills and general knowledge without assuming your project works the same way. - --- - ## Plugins - Plugins are community-curated bundles of agent templates, skills, and best practices. Install one and your agents get instant expertise. - ### What's in a Plugin - - **Agent templates** — specialized role charters (e.g., "AWS DevOps", "Python Data Science") - **Skills** — reusable `.copilot/skills/SKILL.md` files - **Instructions** — `decisions.md` snippets for conventions and routing - **Sample prompts** — ready-to-use prompts that activate plugin capabilities - ### Available Marketplaces - | Marketplace | What's Inside | |-------------|--------------| | **awesome-copilot** | Frontend frameworks, backend stacks, deployment patterns | | **anthropic-skills** | Claude-optimized patterns, prompt engineering, RAG | | **azure-cloud-dev** | Azure VMs, App Service, Cosmos DB, GitHub Actions | | **security-hardening** | OWASP, input validation, secrets management | - ### Installing a Plugin - ``` Install the react-component-library plugin from awesome-copilot ``` - Or use the command: - ``` /plugin install awesome-copilot/react-component-library ``` - Squad downloads the bundle, merges agent templates into `.squad/agents/`, adds skills to `.copilot/skills/`, updates `decisions.md`, and seeds agents with the new knowledge. - ### Managing Marketplaces - ``` /plugin marketplace add github/awesome-copilot # register /plugin marketplace browse awesome-copilot # browse /plugin marketplace remove awesome-copilot # unregister ``` - Installed plugins remain even after removing a marketplace — you just can't install new ones from it. - ### Creating Your Own Marketplace - A plugin marketplace is just a GitHub repo with a specific structure: - ``` my-team-plugins/ ├── awesome-patterns/ @@ -156,33 +107,23 @@ my-team-plugins/ │ └── fault-tolerance.md └── README.md ``` - Register it with `squad` and your team can install from it. - --- - ## Upstream Inheritance - Declare external Squad sources and automatically inherit their context at session start. Knowledge flows down from org → team → repo without duplicating configuration. - ### Three Source Types - | Type | Example | Use Case | |------|---------|----------| | **local** | `../org-practices/.squad/` | Sibling repo, monorepo package | | **git** | `https://github.com/acme/platform-squad.git` | Public or private org repo | | **export** | `./exports/snapshot.json` | Offline use or version pinning | - ### What Gets Inherited - - **Skills** — all `.copilot/skills/*/SKILL.md` files - **Decisions** — `.squad/decisions.md` - **Wisdom** — `.squad/identity/wisdom.md` - **Casting Policy** — `.squad/casting/policy.json` - **Routing** — `.squad/routing.md` - ### Closest-Wins Resolution - ``` Org-level upstream ↓ @@ -192,45 +133,30 @@ Repo config (local .squad/) ↓ Agent instance ``` - Upstreams are read in order from `upstream.json` — **later entries override earlier ones** for the same content type. Your local `.squad/` always wins. - ### Quick Start - ```bash # Local upstream squad upstream add ../org-practices/.squad --name org - # Git upstream squad upstream add https://github.com/acme/platform-squad.git --name platform --ref main - # Export snapshot squad upstream add ./exports/snapshot.json --name snapshot - # List configured upstreams squad upstream list - # Sync git upstreams squad upstream sync ``` - Git upstreams clone to `.squad/_upstream_repos/{name}` (auto-added to `.gitignore`). Local and export upstreams are read live at session start — no sync needed. - --- - ## MCP Setup - MCP (Model Context Protocol) servers extend Squad with external services. Agents discover and use MCP tools automatically — no per-agent configuration required. - ### Configuration - | Platform | Config File | |----------|------------| | **Copilot CLI** | `.copilot/mcp-config.json` | | **VS Code** | `.vscode/settings.json` (under `copilot.mcp.servers`) | - ### Example: GitHub MCP - ```json { "mcpServers": { @@ -244,111 +170,76 @@ MCP (Model Context Protocol) servers extend Squad with external services. Agents } } ``` - Use environment variables instead of hardcoding tokens: - ```bash export GITHUB_TOKEN=$(gh auth token) ``` - ### Other Integrations - | Service | What Agents Can Do | |---------|-------------------| | **GitHub** | List issues/PRs, create branches, post comments | | **Trello** | Create cards, move between lists, update descriptions | | **Notifications** | Ping you on Teams, Discord, iMessage, webhooks | | **Aspire** | Monitor .NET deployments, check service health | - Agents discover tools at spawn time and use them naturally during work. See [GitHub Integration](github-workflow.md) for how notifications connect to your workflow. - --- - ## VS Code Integration - Squad runs identically in VS Code — same `.squad/` state, same agents, same decisions. Initialize with CLI, open in VS Code, and everything just works. - ### Key Differences from CLI - | Feature | CLI | VS Code | |---------|-----|---------| | Per-spawn model selection | ✅ | ❌ (uses session model) | | Agent execution | Background + polling | Parallel sync (results arrive together) | | SQL tool | ✅ | ❌ (use file-based state) | | File writes | Automatic | May prompt for approval (once) | - ### What's the Same - - Same `.squad/` directory and state - Same team roster, skills, and decisions - Parallel execution works (multiple agents per turn) - MCP tools are inherited from workspace config - ### Tips - - Use single-root workspaces (multi-root has path resolution bugs) - Accept file modification approval once — subsequent writes are automatic - For heavy parallel work (5+ agents), SQL workflows, or per-spawn model selection → use CLI - Check the model picker if agents seem slow — switch to Haiku for cost savings - --- - ## Tips - - Export before running `upgrade` — it's your rollback point. - The export JSON is human-readable — inspect it to see exactly what your team knows. - Imported agents keep their names and universe casting. - Commit `.squad/` after importing so everyone who clones the repo gets the team. - Order matters in `upstream.json` — later entries override earlier ones. Use `remove` + `add` to reorder. - --- - ## Sample Prompts - ``` export the current team ``` - Creates a `squad-export.json` snapshot of the entire team. - ``` import squad-export.json into this repo ``` - Imports a team snapshot into the current project's `.squad/` directory. - ``` install the azure-infrastructure plugin for the DevOps agent ``` - Downloads the Azure plugin and seeds the DevOps agent with cloud expertise. - ``` show me available plugins for React development ``` - Searches all configured marketplaces for React-related plugins. - ``` add the platform team's repo as an upstream source ``` - Inherits skills, decisions, and routing from a shared org repo. - ``` show me all configured MCP servers and which ones are working ``` - Tests each MCP server and reports status. - ``` squad upstream sync ``` - Updates all git upstream clones and validates local/export paths. - ``` package our current React conventions into a plugin called react-best-practices ``` - Exports your relevant skills and decisions into a reusable plugin bundle for sharing. diff --git a/docs/src/content/docs/concepts/your-team.md b/docs/src/content/docs/concepts/your-team.md index e144b2485..f41f358ae 100644 --- a/docs/src/content/docs/concepts/your-team.md +++ b/docs/src/content/docs/concepts/your-team.md @@ -1,32 +1,19 @@ # Your Team - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Squad builds you a team of AI specialists that live in your repo. Tell it what you're working on, and it proposes a roster — backend devs, testers, writers, a lead — each with their own personality, expertise, and memory. Your team grows smarter every session. - --- - ## Try This - ``` Set up a team for a React + Node.js API with PostgreSQL ``` - ``` Fenster, fix the login validation bug ``` - ``` Add a security specialist to the team ``` - --- - ## How It Works - When you first run Squad in a repository, it walks through a four-step init flow: - ```mermaid graph LR A["Discovery
scan repo
detect stack"] @@ -38,14 +25,11 @@ graph LR B --> C C --> D ``` - 1. **Discovery** — Squad scans your repo: languages, file structure, test frameworks, dependencies, existing workflows. 2. **Proposal** — It suggests a roster with 3–7 members tailored to what it found. 3. **Confirmation** — You review and customize: accept as-is, add roles, remove roles, rename members. 4. **Creation** — Squad writes the `.squad/` directory, creates charters, and sets up the coordinator. - ### What Gets Created - ``` .squad/ ├── team.md # Team roster @@ -62,9 +46,7 @@ graph LR ├── orchestration-log/ # Coordinator state └── casting/ # Universe assignments ``` - ### Default Team Composition - | Role | When Included | |------|--------------| | **Lead** | Always — triages, reviews, unblocks | @@ -74,19 +56,13 @@ graph LR | **Frontend** | If React/Vue/Svelte/Angular detected | | **Backend** | If API routes, database code, or server framework detected | | **Scribe** | Always — silent decision logger | - --- - ## Human Team Members - Not every team member needs to be AI. Add real people for decisions that need a human — design sign-off, security review, product approval. - ``` Add Sarah as design reviewer ``` - Sarah appears on the roster with a 👤 Human badge. - | | AI Agent | Human Member | |---|----------|-------------| | Badge | Role-specific emoji | 👤 Human | @@ -94,27 +70,18 @@ Sarah appears on the roster with a 👤 Human badge. | History | ✅ | ❌ | | Spawned as sub-agent | ✅ | ❌ | | Can review work | ✅ | ✅ | - When work routes to a human, Squad **pauses** and tells you someone needs to act. You relay the task outside of Squad, then report back what happened. Stale reminders keep things moving. - Not sure whether someone should be a roster member or just a normal GitHub collaborator? See [When to add a human member](../features/human-team-members.md#when-to-add-a-human-member) for a decision framework. - --- - ## Work Routing - The coordinator routes work automatically using three strategies. First match wins: - | Strategy | How It Works | Example | |----------|-------------|---------| | **Named** | You say who does it | `"Fenster, fix the login bug"` | | **Domain** | Pattern matching in `.squad/routing.md` | `src/api/**` → Backend | | **Skill-aware** | Capability check in `.copilot/skills/` | Auth expertise → Backend or Lead | - **Routing priority:** Named > Domain > Skill-aware. If nothing matches, the Lead triages. - ### Sample Routing Table - ```markdown | Pattern | Owner | Reason | |---------|-------|--------| @@ -123,91 +90,60 @@ The coordinator routes work automatically using three strategies. First match wi | `*.test.ts` | Tester | Test files | | `docs/**` | DevRel | Documentation | ``` - GitHub issues with `squad:{member}` labels route directly — `squad:fenster` goes to Fenster, no triage needed. - ### Multi-Agent Work - Some tasks need multiple agents: - ``` Fenster, implement the API. Hockney, write the tests. ``` - The coordinator spawns both in parallel. They work independently and coordinate through shared `.squad/` state. See [Parallel Work & Models](parallel-work.md) for details. - --- - ## Reviewer Protocol - When a reviewer (Lead, Tester) rejects work, the original agent gets **locked out** — no self-revision allowed. This prevents endless fix-retry loops. - ``` Agent A writes code → Lead rejects → Agent A locked out → Coordinator reassigns to Agent B or escalates to you ``` - | Outcome | What Happens | |---------|-------------| | **Approve** | PR merges, issue closes, agent unlocked | | **Request changes** | Author locked out, work reassigned or escalated | - ### Lockout Details - - **Task-specific** — locked out of that PR/issue, not all work - **Session-persistent** — survives restarts (stored in `.squad/orchestration-log/`) - **Clearable** — `"Unlock Fenster for issue #42"` - ### Reviewer Authority - | Reviewer | Scope | |----------|-------| | **Lead** | Code quality, architecture, security — all submissions | | **Tester** | Correctness, test coverage — test-related changes | | **You** | Final arbiter — can override any decision | - ### Deadlock Handling - If all capable agents are locked out, the coordinator escalates to you with options: manual fix, unlock with guidance, or close as won't-fix. - --- - ## Ceremonies - Structured team meetings that trigger at key moments — automatically or on demand. - | Ceremony | Auto-Triggers When | What Happens | |----------|-------------------|-------------| | **Design Review** | Multi-agent task with 2+ agents modifying shared systems | Lead facilitates; agents weigh in on interfaces, risks, contracts | | **Retrospective** | Build failures, test failures, reviewer rejections | Lead runs root-cause analysis; decisions written to `decisions.md` | - Run either manually anytime: - ``` Run a design review before we start the authentication rebuild ``` - You can also create custom ceremonies, disable auto-triggers, or skip a ceremony for a single task. Config lives in `.squad/ceremonies.md`. - --- - ## Response Modes - Squad auto-selects the right level of effort for each request: - | Mode | Time | What Happens | Triggered By | |------|------|-------------|-------------| | **Direct** | ~2–3s | Coordinator answers from memory, no agent spawned | Status checks, factual questions | | **Lightweight** | ~8–12s | One agent, minimal prompt — skips charter/history/decisions | Small fixes, typos, quick follow-ups | | **Standard** | ~25–35s | Full agent spawn with charter, history, and decisions | Normal work requests | | **Full** | ~40–60s | Multi-agent parallel spawn, may trigger design review | Complex multi-domain tasks | - **Pro tip:** `"Team, ..."` prompts trigger Full mode. Named agent prompts (`"Kane, ..."`) trigger Standard. Quick questions get Direct automatically. - --- - ## Customizing After Init - | What You Say | What Happens | |--------------|-------------| | `"Add a database specialist"` | Coordinator casts a new member, creates charter, updates routing | @@ -215,141 +151,94 @@ Squad auto-selects the right level of effort for each request: | `"Change the tester to focus on integration tests"` | Updates the tester's charter and expertise | | `"Route all CSS files to Frontend"` | Adds a rule to `.squad/routing.md` | | `"From now on, McManus reviews all docs before merge"` | Creates routing rule + [directive](../features/memory.md) | - Running `init` on an existing Squad repo automatically offers upgrade mode. - --- - ## Planning your team - Before running `squad init`, think through these decisions. Squad will scan your repo and propose a team — having answers ready makes setup faster. - - **What does your project do?** Have a 1–2 sentence description of the language, stack, and purpose ready — Squad uses this to pick roles - **What roles do you need?** The [default composition](#default-team-composition) covers common cases, or let Squad propose custom roles based on your repo - **How many agents?** Typical teams are 3–7 agents. Scribe (memory) is always included - **Will humans join the team?** [Human members](#human-team-members) can serve as reviewers or domain experts alongside AI agents -- **Will @copilot participate?** The GitHub Copilot coding agent can pick up issues autonomously — see [Agent anatomy](#agent-anatomy) +- **Will @copilot participate?** The GitHub Copilot coding agent can pick up approved issues in the background — see [Agent anatomy](#agent-anatomy) - **How will you track work?** GitHub Issues with `squad:{member}` labels, or conversational tasking via named prompts - **Do you want review gates?** [Reviewers](#reviewer-protocol) can approve or reject work before it proceeds - **What ceremonies matter?** [Design reviews and retrospectives](#ceremonies) can auto-trigger or run on demand - **What model preferences?** Default is automatic selection, or specify preferred models per agent — see [Parallel Work & Models](parallel-work.md) -- **How many squads, and where do they live?** One squad per repo is the default — your `.squad/` directory lives alongside your code. For multi-repo projects, you can run one squad per repo (each with its own team) or share a single squad across repos using a personal squad or linked team repo. Start with one squad in one repo and expand as needed. - +- **How many squads, and where do they live?** One squad per repo is the default — your `.squad/` directory lives alongside your code. For multi-repo projects, start with one squad per repo, then use linked team roots, exports, or upstream inheritance when you need shared context. --- - ## Agent anatomy - An agent is a directory at `.squad/agents/{name}/`. The contents depend on the member type. - For how humans differ from AI agents, see [Human team members](#human-team-members) above. - | | @copilot (🤖) | |---|---| | Directory | None | | `charter.md` | ❌ Uses `copilot-instructions.md` | | `history.md` | ❌ | | Spawnable | ❌ Works via issue assignment | - **AI agents** have a `charter.md` (identity, expertise, voice — compiled into the system prompt at spawn time) and an optional `history.md` (append-only cross-session learnings). - **@copilot** (🤖) appears on the roster and works via GitHub issue assignment. It reads `.github/copilot-instructions.md` instead of a charter. - **Retired agents** move to `.squad/agents/_alumni/{name}/` — charter preserved as a read-only archive, not spawnable. - --- - ## Cross-agent context - Agents don't share memory directly. Context flows through explicit shared files: - - **`team.md`** — who's on the team and what they do - **`routing.md`** — work assignment rules the coordinator reads on every request - **`decisions.md`** — canonical team memory: directives, patterns, learnings - **`.squad/decisions/inbox/`** — agents drop decision files here; the Scribe merges them into `decisions.md` - Each agent's `history.md` is personal — only that agent reads it at spawn time. For the full picture on knowledge flow, see [Memory and knowledge](../features/memory.md). - --- - ## Hiring an agent - To add a new AI agent to your team: - - [ ] Create `.squad/agents/{name}/` directory - [ ] Write `charter.md` — start from `.squad/templates/charter.md` - [ ] Add to `team.md` roster with status `✅ Active` - [ ] Add to `routing.md` with work type assignments - [ ] (Optional) Create `history.md` for persistent memory - [ ] (Optional) Allocate a name via the casting system - To add a **human member**, skip the directory — just add them to `team.md`. See [Human team members](#human-team-members) for badge and routing details. - --- - ## Tips - - **Commit `.squad/`** to version control — anyone who clones the repo gets the full team with all accumulated knowledge. - Use human members for approval gates: design review, compliance, final sign-off. - Design reviews prevent agents from building conflicting implementations — let them run on multi-agent tasks. - Retros produce [decisions](../features/memory.md) that improve future work, not just diagnose the current failure. - You're the relay for human members. Squad can't message them directly — it tells you, and you coordinate. - --- - ## Sample Prompts - ``` Start a new Squad team for this project ``` - Triggers init mode — Squad analyzes the repo and proposes a team. - ``` Fenster, implement the new search API. Hockney, write integration tests for it. ``` - Named routing to two agents. Both spawn in [parallel](parallel-work.md). - ``` Add Jordan as security reviewer ``` - Adds a human team member with a specific review responsibility. - ``` Route all database migrations to Backend ``` - Adds a domain routing rule to `.squad/routing.md`. - ``` Lead, review PR #15 ``` - Triggers review — Lead evaluates and either approves (merge) or rejects (lockout author). - ``` Unlock Fenster for issue #42 — I've given better guidance ``` - Clears lockout so Fenster can revise the PR with your additional context. - ``` Run a retro on why those tests failed ``` - Starts a retrospective ceremony to analyze failures and capture learnings. - ``` Who handles authentication work? ``` - Coordinator checks routing and skills, reports the responsible agent(s). - --- - ## See Also - - [Architecture](./architecture.md) — How the coordinator, agents, and shared memory work together - [Work Routing](../features/routing.md) — How work gets assigned to the right agent - [Parallel Work & Models](./parallel-work.md) — Agents working simultaneously without conflicts diff --git a/docs/src/content/docs/cookbook/recipes.md b/docs/src/content/docs/cookbook/recipes.md index 5ebfc2e7c..3177ea79f 100644 --- a/docs/src/content/docs/cookbook/recipes.md +++ b/docs/src/content/docs/cookbook/recipes.md @@ -1,201 +1,118 @@ # Recipes & Advanced Scenarios - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - A compact cookbook of prompts, patterns, and power moves. Each recipe is a prompt you can paste straight into Squad. - --- - ## Starting Out - ### New Project from Scratch - > "Set up Squad for a new React + Node.js app" - Create a repo, run `squad`, describe your project, and agents assemble a team based on your stack. Say "team" to trigger parallel work from the start. First session is the slowest — after 2–3 sessions, agents know your conventions. - ### Joining Mid-Project - > "This project is already in progress — catch me up on what's been built and what's in the backlog" - Never too late. Run `squad`, describe the project as it is today, and let agents explore the codebase before giving tasks. Feed conventions they can't discover from code alone. After 2–3 sessions, agents are fully up to speed. - ### Large Codebase (200K+ Lines) - > "This is a 200k line codebase — help me understand the architecture before we start making changes" - Each agent gets its own 200K context window. Routing ensures only the right agent looks at relevant code. Use routing rules in `.squad/routing.md` to keep agents focused on their domain. Be explicit about scope on monorepos. - --- - ## Team Management - ### Running Multiple Squads - > "Export skills from my React project and import them into this new project" - Import one full squad, then cherry-pick skills from others. Skills are standalone markdown files — just copy them into `.copilot/skills/`. Best practice: merge knowledge, don't run parallel squads. - ### Moving a Team Between Repos - > "Export my team from project-a so I can import it into project-b" - ```bash squad export # In source repo squad import squad-export.json # In target repo ``` - Agents carry skills and portable knowledge. Project-specific details stay tagged so they don't bleed into the new project. - ### Where to Store `.squad/` - > "Keep .squad/ out of my main branch" - Six storage options: committed (default), gitignored, separate branch, submodule, symlink, or dev-branch-only. Solo devs: just commit it. Enterprise: gitignore or submodule. Check the decision matrix above for your setup. - ### Keeping Your Squad Across Projects - > "I want to keep my current team — don't cast a new one for this project" - Export often — at the end of each project or after a major milestone. Your squad gets smarter over time. Generic skills carry forward; project-specific details are stripped on export. - --- - ## Workflows - ### Release Process - > "We're ready to ship v1.2.0 — run the release process: changelog, tags, and publish" - Squad uses a three-branch model: dev → preview → main. The guard workflow blocks `.squad/` from reaching production. Tag from main only. Full lifecycle: prepare on dev, validate on preview, merge to main, tag, release. - ### Open Source Maintainer - > "Enable auto-triage for incoming issues on my OSS repo" - -Ralph triages issues every 6 hours via the heartbeat workflow. Skills become living contributor docs. Export your squad for forks. `go:*` labels + auto-assign = autonomous issue processing with human approval on merge. - +Ralph triages issues every 6 hours via the heartbeat workflow. Skills become living contributor docs. Export your squad for forks. `go:*` labels + auto-assign = background issue processing with human approval on merge. ### Private Repos & Security - > "I need to know Squad's data security model" - Squad runs entirely in your Copilot session. Nothing leaves your machine beyond Copilot's standard operation. Skills are generic and safe to share. Review agent histories before exporting — they may contain project-specific details. - --- - ## Configuration - ### Switching Models - > "Switch everyone to Haiku — I'm trying to save costs this sprint" - Squad supports 17 models across three tiers. Budget mode: `claude-haiku-4.5` for everything. Quality mode: `claude-opus-4.6` for the Lead, `claude-sonnet-4.5` for everyone else. Fallback chains handle unavailability automatically. - ### Client Compatibility - > "Does Squad work in VS Code?" - CLI is the primary platform with full features. VS Code works with conditional support — parallel subagents, workspace-scoped file access, session model selection. JetBrains and GitHub.com are untested. Both CLI and VS Code share the same `.squad/` state. - --- - ## Recovery - ### Disaster Recovery - > "My .squad/ directory was deleted — help me recover the team state" - If committed: `git checkout .squad/`. If not: rebuild with `squad` or import from a previous export. Override bad decisions with directives. Archive confused agent histories. Upgrades never touch `.squad/`. - --- - ## Pro Tips - > Patterns from real usage that make Squad click. - **Be specific about scope.** Describe the boundary, not just the task: ``` Build JWT auth for login/logout/refresh. Sessions in Redis. No OAuth yet — that's phase 2. ``` - **Say "team" for parallel work.** Naming a specific agent sends work to just them: ``` Team, build the login page. ``` - **Stack decisions in your prompt.** Early conventions prevent agents from asking questions later: ``` Always use TypeScript strict mode. Named exports only. React hooks, no class components. ``` - **Use bullet points for multi-part tasks.** Agents process lists better than paragraphs. - **Don't interrupt parallel work.** Let agents chain their own follow-ups. Check the work log after, not during. - **Let Ralph grind the backlog.** Say `"Ralph, go"` and Ralph triages, assigns, spawns agents, and reports every 3–5 rounds. You focus on critical-path work. - **Decision first, implementation second.** Before agents write code, have the team agree on the design: ``` Team, design the user model. Don't code yet. Write decisions to decisions.md. ``` - **Spike → Decision → Build.** For hard problems, have the Lead do a spike first: ``` Keaton, do a 20-minute spike on authentication patterns. Write a decision. ``` - --- - ## Power Prompts - Copy these directly into Squad. - ### Bootstrap a New Project - ``` I'm building a CLI tool in Go that monitors AWS costs and sends Slack alerts when spending exceeds thresholds. Set up the team. I want this done fast — everyone works at once. ``` - ### Parallel Feature Work - ``` Team, I want you to work on two things in parallel: - Feature A (Frontend + Backend): - User profile page with avatar upload - Feature B (Backend + Tester): - Rate limiting on API endpoints - Divide the team. Start both immediately. ``` - ### Architectural Spike - ``` Keaton, do a 20-minute spike on authentication patterns for this stack. Research JWT vs session-based auth. Write a decision with your recommendation. ``` - ### Issue-Driven Sprint - ``` Connect to myorg/recipe-app Show the backlog Work on #7 and #12 ``` - ### Code Review Request - ``` Michael, review the cart app. Are there issues I should know about? ``` - ### Closing a Phase - ``` Team, we're closing the MVP phase. Keaton, what's the current architecture? @@ -204,17 +121,12 @@ Dallas, what UX work is pending? Lambert, what tests are missing? Write your summary to history.md. ``` - ### Status Check - ``` What did the team accomplish last session? Any blockers? ``` - --- - ## See Also - - [CLI Reference](../reference/cli.md) — Every command and config file - [SDK Reference](../reference/sdk.md) — Programmatic API - [Migration & Troubleshooting](../get-started/migration.md) — Upgrades and fixes diff --git a/docs/src/content/docs/features/capability-routing.md b/docs/src/content/docs/features/capability-routing.md index d46961317..765f52070 100644 --- a/docs/src/content/docs/features/capability-routing.md +++ b/docs/src/content/docs/features/capability-routing.md @@ -1,76 +1,50 @@ ---- -title: Capability Routing -description: Machine capability discovery and needs:* label routing for hardware-specific and OS-specific work. -order: 35 ---- - -# Capability Routing - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -**Try this to declare machine capabilities:** -``` -This machine has a GPU — tag it for GPU-required work -``` - -**Try this to route an issue to a capable machine:** -``` -Label issue #42 with needs:gpu so it goes to the right runner -``` - -Squad discovers what each machine can do and routes issues only to machines that meet the requirements. No manual assignment needed for hardware- or OS-specific work. - ---- - -## What Are Capabilities? - -A capability is a label that describes what a machine can do — hardware, OS, or environment attributes that not every runner has. You declare capabilities in `machine-capabilities.json` at the project root or home directory; Squad reads them when routing issues. - -Examples: `gpu`, `windows`, `macos`, `arm64`, `high-memory`, `docker`. - -## Declaring Capabilities - -Add a `capabilities` array to `machine-capabilities.json` at the project root or home directory on each machine: - -```json -["gpu", "cuda", "high-memory"] -``` - -Squad reads this file at startup. The declared capabilities are available to the routing system immediately. - -## The `needs:*` Label Pattern - -Apply a `needs:*` label to any GitHub issue to require a specific capability: - -| Label | Meaning | -|-------|---------| -| `needs:gpu` | Must run on a machine with GPU | -| `needs:windows` | Must run on Windows | -| `needs:macos` | Must run on macOS | -| `needs:arm64` | Must run on ARM64 architecture | -| `needs:docker` | Must run where Docker is available | - -You can combine multiple `needs:*` labels — all must match. - -## How Routing Works - -When Ralph picks up an issue: - -1. It reads all `needs:*` labels on the issue. -2. It compares them against the current machine's declared capabilities. -3. If the machine satisfies all requirements, it proceeds. If not, it skips the issue and leaves it for a capable machine to claim. - -No central scheduler needed. Each machine self-selects based on what it can do. - -## Example Flow - -``` -Issue #99 labels: needs:gpu, needs:windows -Machine A capabilities: ["gpu", "windows", "cuda"] ← picks it up -Machine B capabilities: ["macos"] ← skips it -``` - -## See Also - -- [Work Routing](routing.md) — pattern-based and skill-aware routing -- [Ralph — Work Monitor](ralph.md) — how Ralph polls and claims issues +--- +title: Capability Routing +description: Machine capability discovery and needs:* label routing for hardware-specific and OS-specific work. +order: 35 +--- +# Capability Routing +**Try this to declare machine capabilities:** +``` +This machine has a GPU — tag it for GPU-required work +``` +**Try this to route an issue to a capable machine:** +``` +Label issue #42 with needs:gpu so it goes to the right runner +``` +Squad discovers what each machine can do and routes issues only to machines that meet the requirements. No manual assignment needed for hardware- or OS-specific work. +--- +## What Are Capabilities? +A capability is a label that describes what a machine can do — hardware, OS, or environment attributes that not every runner has. You declare capabilities in `machine-capabilities.json` at the project root or home directory; Squad reads them when routing issues. +Examples: `gpu`, `windows`, `macos`, `arm64`, `high-memory`, `docker`. +## Declaring Capabilities +Add a `capabilities` array to `machine-capabilities.json` at the project root or home directory on each machine: +```json +["gpu", "cuda", "high-memory"] +``` +Squad reads this file at startup. The declared capabilities are available to the routing system immediately. +## The `needs:*` Label Pattern +Apply a `needs:*` label to any GitHub issue to require a specific capability: +| Label | Meaning | +|-------|---------| +| `needs:gpu` | Must run on a machine with GPU | +| `needs:windows` | Must run on Windows | +| `needs:macos` | Must run on macOS | +| `needs:arm64` | Must run on ARM64 architecture | +| `needs:docker` | Must run where Docker is available | +You can combine multiple `needs:*` labels — all must match. +## How Routing Works +When Ralph picks up an issue: +1. It reads all `needs:*` labels on the issue. +2. It compares them against the current machine's declared capabilities. +3. If the machine satisfies all requirements, it proceeds. If not, it skips the issue and leaves it for a capable machine to claim. +No central scheduler needed. Each machine self-selects based on what it can do. +## Example Flow +``` +Issue #99 labels: needs:gpu, needs:windows +Machine A capabilities: ["gpu", "windows", "cuda"] ← picks it up +Machine B capabilities: ["macos"] ← skips it +``` +## See Also +- [Work Routing](routing.md) — pattern-based and skill-aware routing +- [Ralph — Work Monitor](ralph.md) — how Ralph polls and claims issues diff --git a/docs/src/content/docs/features/ceremonies.md b/docs/src/content/docs/features/ceremonies.md index 889cde063..7859953d1 100644 --- a/docs/src/content/docs/features/ceremonies.md +++ b/docs/src/content/docs/features/ceremonies.md @@ -1,91 +1,57 @@ # Ceremonies - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to schedule team ceremonies:** ``` Schedule a daily standup at 9am and a sprint retro every Friday ``` - **Try this to trigger a pre-work design review:** ``` Run a design review before we start the authentication rebuild ``` - Ceremonies are structured team meetings that happen at key moments — before multi-agent work begins, or after something goes wrong. Squad runs them automatically when needed. - --- - ## Built-in Ceremonies - ### Design Review - **Triggers automatically** before multi-agent tasks involving 2+ agents modifying shared systems. - The Lead facilitates. Each relevant agent is spawned to weigh in on interfaces, risks, and contracts before work begins. - ``` > Team, rebuild the authentication system - 📋 Design Review completed — facilitated by Ripley Decisions: 3 | Action items: 4 Agreed on JWT format, session storage strategy, and endpoint contracts ``` - ### Retrospective - **Triggers automatically** after build failures, test failures, or reviewer rejections. - The Lead facilitates a focused root-cause analysis. - ``` 📋 Retrospective completed — facilitated by Ripley Decisions: 2 | Action items: 3 Root cause: missing null check in API response parser ``` - --- - ## Manual Triggers - Run any ceremony on demand: - ``` > Run a retro > Run a design meeting ``` - --- - ## Managing Ceremonies - ### Create a new ceremony - ``` > Add a ceremony for code reviews ``` - ### Disable a ceremony - ``` > Disable retros ``` - The ceremony stays in `ceremonies.md` but won't auto-trigger. - ### Skip a ceremony once - ``` > Skip the design review for this task ``` - The ceremony remains enabled for future tasks. - --- - ## Summary - | Action | Prompt | |--------|--------| | Trigger a retro | `"Run a retro"` | @@ -93,44 +59,30 @@ The ceremony remains enabled for future tasks. | Create a ceremony | `"Add a ceremony for code reviews"` | | Disable a ceremony | `"Disable retros"` | | Skip once | `"Skip the design review for this task"` | - --- - ## Tips - - Design reviews prevent agents from building conflicting implementations. Let them run on multi-agent tasks. - Retros produce decisions that get written to `decisions.md` — they improve future work, not just diagnose the current failure. - Ceremony config lives in `.squad/ceremonies.md`. You can edit it directly if you prefer. - Ceremonies work well with [human team members](human-team-members.md) — add a human as a participant for approval gates. - ## Sample Prompts - ``` run a design review before we start ``` - Manually triggers a design review ceremony for the current task. - ``` run a retro on why those tests failed ``` - Starts a retrospective to analyze test failures and capture learnings. - ``` add a ceremony for security reviews ``` - Creates a custom ceremony type with its own triggers and participants. - ``` skip the design review for this quick fix ``` - Bypasses the design review ceremony for the current task only. - ``` disable automatic retros ``` - Turns off auto-triggering for retrospectives while keeping the ceremony defined. diff --git a/docs/src/content/docs/features/cleanup.md b/docs/src/content/docs/features/cleanup.md index 8830bff9c..5c6bef5a5 100644 --- a/docs/src/content/docs/features/cleanup.md +++ b/docs/src/content/docs/features/cleanup.md @@ -1,13 +1,8 @@ # Cleanup Watch - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to trigger a cleanup cycle:** ``` squad watch --execute ``` - **Try this to configure cleanup frequency:** ```json { @@ -17,56 +12,35 @@ squad watch --execute } } ``` - Ralph runs automated housekeeping during `squad watch` to keep `.squad/` clean — clearing temp files, archiving old logs, and flagging stale decisions. - --- - ## What Gets Cleaned - ### Scratch Directory - Clears all files in `.squad/.scratch/` — the ephemeral temp directory used for prompt files, commit drafts, and processing artifacts. These are temporary by design and safe to delete between sessions. - ### Log Archives - Archives orchestration-log and session-log entries older than the configured `maxAgeDays` (default: 30 days): - Orchestration logs (work dispatch, agent lifecycle) - Session logs (Copilot session metadata) - Archived logs are moved to `.squad/logs/archive/{YYYY-MM}/` for long-term storage without cluttering active logs. - ### Decision Inbox Warnings - Scans `.squad/decisions/inbox/` for files older than 7 days and warns you. Decision inbox files represent unmerged decisions — leaving them stale means the team's decision log is out of sync with actual project state. - ``` ⚠️ Stale decision inbox files detected: - inbox/auth-strategy-2025-01-15.md (12 days old) - inbox/api-versioning-2025-01-10.md (17 days old) - Run: squad decisions merge ``` - Cleanup doesn't auto-merge — it just warns. You decide when to merge. - --- - ## When Cleanup Runs - Cleanup runs during the **housekeeping phase** of `squad watch` — after all work is processed for the round, before the next polling interval. This happens every `N` rounds based on your config. - **Default behavior:** - Cleanup runs every **10 rounds** of `squad watch` - Archives logs older than **30 days** - Warns about decision inbox files older than **7 days** - --- - ## Configuration - Add a `cleanup` section to your `.squad/config.json`: - ```json { "cleanup": { @@ -75,14 +49,11 @@ Add a `cleanup` section to your `.squad/config.json`: } } ``` - | Option | Type | Default | Description | |--------|------|---------|-------------| | `everyNRounds` | number | 10 | Run cleanup every N watch rounds | | `maxAgeDays` | number | 30 | Archive logs older than this many days | - **Examples:** - Run cleanup every 5 rounds, keep 60 days of logs: ```json { @@ -92,7 +63,6 @@ Run cleanup every 5 rounds, keep 60 days of logs: } } ``` - Run cleanup every round (aggressive), keep 14 days: ```json { @@ -102,62 +72,41 @@ Run cleanup every round (aggressive), keep 14 days: } } ``` - --- - ## What Cleanup Does NOT Touch - - Earned skills in `.copilot/skills/` — never deleted - Decision log in `.squad/decisions/log.md` — never deleted - Active session data - Router state, team config, and other core Squad files - Cleanup is safe and conservative — it only removes temporary files and archives old logs. Core squad state is never touched. - --- - ## Manual Cleanup - You can manually trigger cleanup without running `squad watch`: - ```bash # Clean scratch dir only rm -rf .squad/.scratch/* - # Archive old logs manually squad logs archive --before 2025-01-01 - # Merge stale decision inbox squad decisions merge ``` - --- - ## Notes - - Cleanup is **opt-in** — it only runs during `squad watch`, not in standalone Copilot sessions - Cleanup logs are written to the orchestration log for audit trail - Archived logs are still accessible but separated from active logs - Decision inbox warnings are informational only — no auto-merge - --- - ## Sample Prompts - ``` Ralph, run cleanup now ``` - Triggers a cleanup cycle immediately (if Ralph is active in `squad watch`). - ``` Show me what cleanup will do ``` - Dry-run preview of cleanup actions without actually running them. - ``` How often does cleanup run? ``` - Reports the current `everyNRounds` setting from config. diff --git a/docs/src/content/docs/features/consult-mode.md b/docs/src/content/docs/features/consult-mode.md index 4c0b8e3af..b93fa688f 100644 --- a/docs/src/content/docs/features/consult-mode.md +++ b/docs/src/content/docs/features/consult-mode.md @@ -1,39 +1,23 @@ # Consult Mode - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -Consult mode lets you bring your personal squad to projects you don't own — OSS contributions, client work, temporary collaborations — without leaving any trace. Your team consults, does the work, learns things, and returns home with only the generic learnings you approve. - +Consult mode lets you bring an established squad into projects you don't own — OSS contributions, client work, temporary collaborations — without leaving any trace. Your team consults, executes the approved work, learns things, and returns home with only the generic learnings you approve. --- - ## The Problem - -You have a personal squad at your global path (e.g., `~/Library/Application Support/squad/personal-squad` on macOS, `~/.config/squad/personal-squad` on Linux) with agents, skills, and decisions refined over time. When you contribute to someone else's project, you face a dilemma: - +You have an established source squad stored outside the target repository, with agents, skills, and decisions refined over time. When you contribute to someone else's project, you face a dilemma: - **Pollute the project?** Running `squad init` creates a `.squad/` folder they didn't ask for - **Pollute your squad?** Project-specific knowledge bleeds into your global squad - **Work without your team?** Lose the productivity benefits you've built up - --- - ## The Solution - Your team **consults** on a project. They bring their expertise, do the work, and learn things. When done, they extract what's reusable and return home. The project never knows Squad was there. - | Aspect | Normal Mode | Consult Mode | |--------|-------------|--------------| -| Squad location | `.squad/` in project | **Copy** of personal squad into project `.squad/` | +| Squad location | `.squad/` in project | **Copy** of source squad into project `.squad/` | | Git visibility | Committed or `.gitignore` | Invisible via `.git/info/exclude` | | Writes go to | Project `.squad/` | Project `.squad/` (isolated copy) | -| After session | Stays in project | Extract generic learnings → personal squad, discard rest | - +| After session | Stays in project | Extract generic learnings → source squad, discard rest | --- - ## Quick Start - ### OSS Contribution - ```bash cd ~/projects/kubernetes-dashboard squad consult # Enter consult mode @@ -41,9 +25,7 @@ squad consult # Enter consult mode squad extract # Review and extract generic learnings squad extract --clean --yes # Clean up after extraction ``` - ### Client Work - ```bash cd ~/client-projects/acme-corp squad consult # Enter consult mode @@ -51,62 +33,44 @@ squad consult # Enter consult mode squad extract --dry-run # Preview what would be extracted squad extract --clean # Extract and clean up (prompts for confirmation) ``` - ### Check Status - ```bash squad consult --status # See if consult mode is active squad consult --check # Dry-run: show what would happen ``` - --- - ## Command Reference - ### `squad consult` - -Enter consult mode with your personal squad. - +Enter consult mode with your source squad. ```bash squad consult # Enter consult mode squad consult --status # Check current consult mode status squad consult --check # Dry-run: show what would happen without creating files ``` - **What happens:** - -1. Copies your personal squad into the project's `.squad/` directory +1. Copies your source squad into the project's `.squad/` directory 2. Adds `.squad/` and `.github/agents/squad.agent.md` to `.git/info/exclude` 3. Patches the Scribe charter with extraction instructions 4. Creates a staging area at `.squad/extract/` for generic learnings - **Created structure:** - ``` -.squad/ # Full copy of personal squad +.squad/ # Full copy of source squad ├── config.json # { "consult": true, "sourceSquad": "...", ... } -├── agents/ # Copied from personal squad -├── skills/ # Copied from personal squad -├── decisions.md # Copied from personal squad +├── agents/ # Copied from source squad +├── skills/ # Copied from source squad +├── decisions.md # Copied from source squad ├── scribe-charter.md # Patched with consult mode extraction instructions ├── sessions/ # Local session history └── extract/ # Staging area for generic learnings - .github/agents/ └── squad.agent.md # Points to local .squad/ (also excluded from git) ``` - **Requirements:** - -- You must have a personal squad configured +- You must have a source squad configured - The project must not already have a committed `.squad/` folder - --- - ### `squad extract` - -Extract generic learnings from a consult session back to your personal squad. - +Extract generic learnings from a consult session back to your source squad. ```bash squad extract # Review and extract generic learnings squad extract --dry-run # Preview what would be extracted (no changes) @@ -114,77 +78,51 @@ squad extract --clean # Also delete project .squad/ after (prompts fo squad extract --clean --yes # Delete without confirmation squad extract --accept-risks # Allow extraction despite license risks ``` - **What happens:** - 1. Reads the project's LICENSE file 2. Loads staged learnings from `.squad/extract/` 3. Presents an interactive selection UI -4. Merges selected items to your personal squad -5. Logs the consultation to `/consultations/{project}.md` +4. Merges selected items to your source squad +5. Logs the consultation to `/consultations/{project}.md` 6. Optionally cleans up the project `.squad/` directory - **Example output:** - ``` 📤 Learnings staged for extraction: - ⚠️ License: MIT (safe to extract) - Found 3 learning(s) in .squad/extract/: [1] use-async-await.md [2] validate-inputs.md [3] prefer-composition.md - Select learnings to extract (space to toggle, enter to confirm): ❯ ◉ use-async-await.md ◉ validate-inputs.md ◉ prefer-composition.md - Extract 3 learning(s)? [Y/n] ``` - --- - ## Learning Classification - During your consult session, the **Scribe** automatically classifies decisions as they're made: - ### Generic (applies to any project) - Copied to `.squad/extract/` for later extraction: - - "Always use async/await instead of callbacks" - "Validate inputs at API boundaries" - "Prefer composition over inheritance" - Best practices, coding standards, patterns that work anywhere - ### Project-specific (only applies here) - Kept in local `decisions.md` only — not extracted: - - References to specific file paths in the project - Project-specific config, APIs, or schemas - Decisions that mention "this project" or "this codebase" - **You always have final say.** The Scribe proposes by writing to `extract/`, you approve or reject via `squad extract`. No extraction happens without your explicit confirmation. - --- - ## License Handling - ### Permissive Licenses (Safe) - MIT, Apache, BSD, ISC — proceed normally: - ``` ⚠️ License: MIT (safe to extract) ``` - ### Copyleft Licenses (Blocked) - GPL, AGPL, LGPL — extraction is blocked by default: - ``` 🚫 License: GPL-3.0 (copyleft) Extraction blocked. Patterns from copyleft projects may carry @@ -194,70 +132,46 @@ GPL, AGPL, LGPL — extraction is blocked by default: To proceed anyway: squad extract --accept-risks ``` - To override: - ```bash squad extract --accept-risks ``` - --- - ## Technical Notes - ### Git Invisibility - Consult mode uses `.git/info/exclude` to hide Squad files: - - Same syntax as `.gitignore` - Lives inside `.git/`, so it's never committed - Project owners never see it - `git status` shows nothing Squad-related - ### Why Copy Instead of Reference? - -Your personal squad is **copied** into the project rather than referenced: - -- Changes during the session don't pollute your personal squad +Your source squad is **copied** into the project rather than referenced: +- Changes during the session don't pollute your source squad - Session-specific decisions stay isolated until explicitly extracted - Works offline (no dependency on external path) - Clean separation between "consulting" and "bringing home" - ### Consultation Log - -All consultations are tracked in your personal squad at `consultations/{project}.md`: - +All consultations are tracked in your source squad at `consultations/{project}.md`: ```markdown # kubernetes-dashboard - **First consulted:** 2026-02-27 **Last session:** 2026-03-15 **License:** Apache-2.0 - ## Sessions - ### 2026-02-27 - use-async-await.md: "### Always use async/await..." - validate-inputs.md: "### Validate inputs at API..." - ### 2026-03-15 - prefer-composition.md: "### Prefer composition over..." ``` - --- - ## Tips - - Run `squad consult --check` before entering consult mode to preview what will happen - Use `squad extract --dry-run` to review staged learnings without committing - The `--clean` flag is convenient for OSS drive-by contributions where you won't return - Consult mode errors out if the project already has a committed `.squad/` — use normal mode instead -- Your personal squad is never modified during the session — only via explicit `squad extract` - +- Your source squad is never modified during the session — only via explicit `squad extract` --- - ## Next Steps - -- **Set up a personal squad:** See [Your Personal Squad](../guide/personal-squad.md) for initial setup with `squad init --global` - **Learn about sharing:** See [Export & Import](./export-import.md) for portable team snapshots - **Upstream inheritance:** See [Upstream Inheritance](./upstream-inheritance.md) for knowledge sharing across teams diff --git a/docs/src/content/docs/features/context-hygiene.md b/docs/src/content/docs/features/context-hygiene.md index 9850e863f..e59ddea63 100644 --- a/docs/src/content/docs/features/context-hygiene.md +++ b/docs/src/content/docs/features/context-hygiene.md @@ -1,130 +1,84 @@ -# Context Hygiene: Nap, Reskill, and Compact - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -**Try this to compact your team's memory:** -``` -Team, take a nap -``` - -**Try this to refresh agent skills:** -``` -Team, reskill -``` - -**Try this to do both and report results:** -``` -Team, reskill, take a nap, and let me know how much context you cleared out collectively for future iterations -``` - -Over multiple sessions, Squad's `.squad/` files grow — agent histories, decisions, skill files. Context hygiene commands let you actively manage that growth so agents stay fast and focused. - ---- - -## Nap - -**What it does:** Summarizes accumulated work into smaller, more efficient memory files. This is the same as running `/compact` in the CLI or `squad nap` from the command line. - -When you tell the team to "take a nap," each agent: - -1. Reviews its `history.md` and other state files -2. Compresses older entries into concise summaries -3. Archives verbose detail while preserving key decisions and learnings -4. Reports how much context was reclaimed - -### Nap ≠ Shutting Down - -This is the most common misconception: - -| Action | What happens to `.squad/` files | -|--------|-------------------------------| -| **Shutting down Squad** (closing the CLI, killing the process) | Files stay exactly as they are. Nothing is summarized or compacted. | -| **Nap** (`team, take a nap` or `squad nap`) | Files are actively summarized and compacted. Older entries are archived, working context gets leaner. | - -Shutting down Squad every night does **not** perform context hygiene. You must explicitly tell the team to take a nap. - -### CLI equivalents - -```bash -squad nap # Standard context hygiene -squad nap --deep # Thorough cleanup with recursive descent -squad nap --dry-run # Preview what would be cleaned up -``` - -In the interactive shell, use `/compact` for the same effect. - ---- - -## Reskill - -**What it does:** Tells agents to re-examine their skills, validate them against the current codebase, and potentially discover new patterns. - -When you tell the team to "reskill," agents: - -1. Review existing skill files in `.copilot/skills/` -2. Validate that documented patterns still apply -3. Look for new reusable patterns from recent work -4. Update skill confidence levels based on current evidence - -### Availability - -> **Note:** As of now, reskill requires running Squad from source (via symlink). It is not yet available through `squad upgrade`. This will change in a future release. - ---- - -## Combined Commands - -You can trigger nap and reskill together in a single prompt: - -``` -Team, reskill, take a nap, and let me know how much context you cleared out collectively for future iterations -``` - -This runs both behaviors and gives you a report on how much context was reduced — useful for understanding how lean your team's working memory is before the next session. - ---- - -## When to Use These - -| Situation | Command | -|-----------|---------| -| After several work sessions, agents feel slow or unfocused | `team, take a nap` | -| Codebase has changed significantly and skills may be stale | `team, reskill` | -| Before a major new phase of work | Combine both | -| End of sprint / milestone | `squad nap --deep` | - ---- - -## Tips - -- **Nap regularly.** A few sessions of heavy work can bloat history files. Napping keeps context budgets in check. -- **Don't rely on shutdown.** Closing the CLI preserves files as-is — it does not compact anything. -- **Reskill after refactors.** If you've restructured the codebase, agent skills may reference outdated patterns. -- **Check the dry run first.** Use `squad nap --dry-run` to preview cleanup actions before committing to them. - -## Sample Prompts - -``` -team, take a nap -``` - -Compacts and summarizes all agent memory files, reclaiming context space. - -``` -team, reskill -``` - -Agents re-examine and validate their skills against the current codebase. - -``` -team, reskill, take a nap, and let me know how much context you cleared out collectively for future iterations -``` - -Combines both behaviors and reports back on total context reduction. - -``` -squad nap --dry-run -``` - -Previews what a nap would clean up without making any changes. +# Context Hygiene: Nap, Reskill, and Compact +**Try this to compact your team's memory:** +``` +Team, take a nap +``` +**Try this to refresh agent skills:** +``` +Team, reskill +``` +**Try this to do both and report results:** +``` +Team, reskill, take a nap, and let me know how much context you cleared out collectively for future iterations +``` +Over multiple sessions, Squad's `.squad/` files grow — agent histories, decisions, skill files. Context hygiene commands let you actively manage that growth so agents stay fast and focused. +--- +## Nap +**What it does:** Summarizes accumulated work into smaller, more efficient memory files. This is the same as running `/compact` in the CLI or `squad nap` from the command line. +When you tell the team to "take a nap," each agent: +1. Reviews its `history.md` and other state files +2. Compresses older entries into concise summaries +3. Archives verbose detail while preserving key decisions and learnings +4. Reports how much context was reclaimed +### Nap ≠ Shutting Down +This is the most common misconception: +| Action | What happens to `.squad/` files | +|--------|-------------------------------| +| **Shutting down Squad** (closing the CLI, killing the process) | Files stay exactly as they are. Nothing is summarized or compacted. | +| **Nap** (`team, take a nap` or `squad nap`) | Files are actively summarized and compacted. Older entries are archived, working context gets leaner. | +Shutting down Squad every night does **not** perform context hygiene. You must explicitly tell the team to take a nap. +### CLI equivalents +```bash +squad nap # Standard context hygiene +squad nap --deep # Thorough cleanup with recursive descent +squad nap --dry-run # Preview what would be cleaned up +``` +In the interactive shell, use `/compact` for the same effect. +--- +## Reskill +**What it does:** Tells agents to re-examine their skills, validate them against the current codebase, and potentially discover new patterns. +When you tell the team to "reskill," agents: +1. Review existing skill files in `.copilot/skills/` +2. Validate that documented patterns still apply +3. Look for new reusable patterns from recent work +4. Update skill confidence levels based on current evidence +### Availability +> **Note:** As of now, reskill requires running Squad from source (via symlink). It is not yet available through `squad upgrade`. This will change in a future release. +--- +## Combined Commands +You can trigger nap and reskill together in a single prompt: +``` +Team, reskill, take a nap, and let me know how much context you cleared out collectively for future iterations +``` +This runs both behaviors and gives you a report on how much context was reduced — useful for understanding how lean your team's working memory is before the next session. +--- +## When to Use These +| Situation | Command | +|-----------|---------| +| After several work sessions, agents feel slow or unfocused | `team, take a nap` | +| Codebase has changed significantly and skills may be stale | `team, reskill` | +| Before a major new phase of work | Combine both | +| End of sprint / milestone | `squad nap --deep` | +--- +## Tips +- **Nap regularly.** A few sessions of heavy work can bloat history files. Napping keeps context budgets in check. +- **Don't rely on shutdown.** Closing the CLI preserves files as-is — it does not compact anything. +- **Reskill after refactors.** If you've restructured the codebase, agent skills may reference outdated patterns. +- **Check the dry run first.** Use `squad nap --dry-run` to preview cleanup actions before committing to them. +## Sample Prompts +``` +team, take a nap +``` +Compacts and summarizes all agent memory files, reclaiming context space. +``` +team, reskill +``` +Agents re-examine and validate their skills against the current codebase. +``` +team, reskill, take a nap, and let me know how much context you cleared out collectively for future iterations +``` +Combines both behaviors and reports back on total context reduction. +``` +squad nap --dry-run +``` +Previews what a nap would clean up without making any changes. diff --git a/docs/src/content/docs/features/coordinator-as-agent-export.md b/docs/src/content/docs/features/coordinator-as-agent-export.md index 8ad8179c5..a5635de97 100644 --- a/docs/src/content/docs/features/coordinator-as-agent-export.md +++ b/docs/src/content/docs/features/coordinator-as-agent-export.md @@ -2,34 +2,23 @@ title: Coordinator-as-Agent Export description: Compile your squad's coordinator into a repo-native Copilot custom agent file with squad export agent. --- - # Coordinator-as-Agent Export - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - **Try this to generate a coordinator agent:** ```bash squad export agent ``` - **Try this for CI drift detection:** ```bash squad export agent --check ``` - **Try this for live development:** ```bash squad export agent --watch ``` - `squad export agent` compiles your `.squad/` state — team roster, routing rules, ceremony triggers, agent charters — into a single repository-native Copilot custom agent at `.github/agents/squad.md`. The generated file works across **every** Copilot surface (CLI, VS Code, GitHub Desktop, github.com) without requiring the Squad runtime installed. - This is the "ship Squad as a portable agent" path. Use it when you want collaborators or downstream repos to get the benefit of your squad's setup with **zero install** — they just check out the repo and the agent is available. - --- - ## When to use it - | Scenario | Use `squad export agent`? | |----------|-----------------------| | You want collaborators to use your team's coordinator without installing the CLI | ✅ Yes | @@ -37,20 +26,13 @@ This is the "ship Squad as a portable agent" path. Use it when you want collabor | You want CI to enforce that `.github/agents/squad.md` stays in sync with `.squad/` | ✅ Yes — use `--check` | | You need full Squad runtime features (Scribe, Ralph, MCP state tools, ceremonies) | ❌ No — install the CLI | | You want to share state (decisions, history) not just coordinator behavior | ❌ No — use [`squad export`](/squad/docs/features/export-import/) (snapshot mode) | - The exported coordinator agent has access to the team's roster and routing logic but does NOT include the live Squad runtime. Sub-agents in the exported coordinator will be dispatched via Copilot's native `task` tool, not via Squad's full spawn machinery. - --- - ## Commands - ### `squad export agent` - Generate or update `.github/agents/squad.md` from your current `.squad/` state: - ```bash $ squad export agent - 🔧 Compiling coordinator agent... - Read team.md (8 members) - Read routing.md (24 work-type entries) @@ -58,52 +40,35 @@ $ squad export agent - Loaded 8 agent charters - Compiled prompt: 12,847 tokens (under 14k soft budget — full mode) - Wrote .github/agents/squad.md (38,294 bytes) - ✓ Coordinator exported to .github/agents/squad.md ``` - The output is a self-contained Copilot custom-agent file with proper YAML frontmatter and a compiled coordinator prompt. Anyone in the repo can now run `copilot --agent squad` and get the coordinator's behavior. - ### `squad export agent --watch` - Re-export on every change to `.squad/`. Useful during active team development when you want the exported agent file to track your edits: - ```bash $ squad export agent --watch 👀 Watching .squad/ for changes... ✓ .github/agents/squad.md up to date - [edit .squad/routing.md] 🔄 .squad/routing.md changed — re-exporting... ✓ .github/agents/squad.md updated (38,401 bytes) ``` - Press `Ctrl+C` to stop. - ### `squad export agent --check` - Verify that `.github/agents/squad.md` is in sync with the current `.squad/` state. Exits with non-zero if drift is detected. Use this in CI to enforce "if you change `.squad/`, you must re-run `squad export agent`": - ```bash $ squad export agent --check - ✓ .github/agents/squad.md is up to date - # Or, on drift: - ✗ Drift detected: .squad/routing.md changed but .github/agents/squad.md not regenerated. Run 'squad export agent' to update. exit 1 ``` - ### `squad export agent --dry-run` - Preview what would be written without actually writing the file: - ```bash $ squad export agent --dry-run - 🔍 DRY RUN — would write to .github/agents/squad.md: Size: 38,294 bytes Prompt tokens: ~12,847 @@ -113,32 +78,22 @@ $ squad export agent --dry-run description: ... No changes made. ``` - ### `squad export agent --compact` - Force compact mode even if the prompt fits within the soft budget. Useful for keeping the generated file lean intentionally: - ```bash squad export agent --compact ``` - Compact mode omits some optional sections (extended examples, on-demand reference pointers) and is the default when the prompt would otherwise exceed the soft token budget. - --- - ## Token budget modes - The exporter adapts automatically to your team size: - | Mode | Trigger | What's in the prompt | |------|---------|---------------------| | **Full** | ≤8 members AND prompt < 14k tokens | Full charters inlined, all routing tables, complete ceremony definitions | | **Compact** | Prompt 14k–20k tokens OR `--compact` flag | Condensed charters, abbreviated examples, on-demand references | | **Lazy-load** | >8 members OR roster > 3k tokens | Coordinator instructed to load charters on demand at dispatch time | | **(fails)** | Prompt > 20k hard budget | Fails with diagnostics — suggests removing rarely-used members or splitting the squad | - These thresholds protect against generating a coordinator file that's too large for the LLM's context budget. If you hit the hard limit, the exporter prints actionable diagnostics: - ``` ✗ Coordinator prompt exceeds hard budget (22,841 / 20,000 tokens). Top contributors: @@ -151,24 +106,16 @@ These thresholds protect against generating a coordinator file that's too large - Split into multiple squads (see Multiple Squads docs) - Use --compact to drop ~2k tokens ``` - --- - ## Safety guarantees - The exporter is conservative about what it writes: - - **Won't overwrite user-owned agent files.** If `.github/agents/squad.md` exists and lacks the generated-file marker comment header, the export fails unless you pass `--force`. - **Detects legacy `squad.agent.md` collisions.** The classic Squad CLI installation puts the coordinator at `.github/agents/squad.agent.md`. The export warns if both files would exist, and `squad init`/`squad upgrade` skip writing `squad.agent.md` when an exported `squad.md` is present. - **Generated files are marked.** The output starts with `` so the file is unambiguous. - **`--check` mode never mutates.** Safe to run on every CI build. - --- - ## CI integration - The pattern most teams use: - ```yaml # .github/workflows/squad-drift-check.yml name: Squad drift check @@ -185,15 +132,10 @@ jobs: - run: npm install -g @bradygaster/squad-cli - run: squad export agent --check ``` - If anyone changes `.squad/` without re-exporting, the PR fails CI with a clear message. - --- - ## What gets compiled into the exported agent - The exporter loads these sources from your `.squad/`: - | File | Used for | |------|----------| | `.squad/team.md` | Roster — agent names, roles, charter paths | @@ -201,36 +143,25 @@ The exporter loads these sources from your `.squad/`: | `.squad/ceremonies.md` | Auto-trigger definitions for design review, retro, etc. | | `.squad/config.json` | State backend selection, model preferences | | `.squad/agents/{name}/charter.md` | Per-agent role definitions (inlined or referenced based on budget mode) | - These get rendered into the output as: - - **YAML frontmatter** — `name`, `description`, `model`, `tools` declarations - **Coordinator prompt body** — dispatch rules, routing table, ceremony triggers, agent identities, response mode selection - **On-demand reference markers** — pointers to charters in lazy-load mode - What's NOT compiled in: - Decisions ledger (use snapshot export to share state) - Agent histories (personal learnings, owner-only) - Skills (live as separate files in `.copilot/skills/`) - Casting state (registry, history) - --- - ## Architecture (for the curious) - The export pipeline lives at `packages/squad-sdk/src/repo-native/`: - 1. **Context loader** — parses `.squad/` files into a typed IR 2. **Prompt compiler** — renders the coordinator prompt with budget enforcement 3. **Frontmatter renderer** — emits valid custom-agent YAML 4. **File writer** — handles safety checks and atomic write - You can use the SDK module directly if you want to embed coordinator export into your own tooling — see `@bradygaster/squad-sdk/repo-native`. - --- - ## See also - - [Export & Import](/squad/docs/features/export-import/) — full state snapshots (different from this) - [Self Upgrade](/squad/docs/features/self-upgrade/) — keeping the CLI itself updated - [Multiple Squads](/squad/docs/scenarios/multiple-squads/) — when one team gets too large diff --git a/docs/src/content/docs/features/copilot-coding-agent.md b/docs/src/content/docs/features/copilot-coding-agent.md index 095819c68..81e6173ce 100644 --- a/docs/src/content/docs/features/copilot-coding-agent.md +++ b/docs/src/content/docs/features/copilot-coding-agent.md @@ -1,101 +1,62 @@ # Copilot Coding Agent (@copilot) - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -Add the GitHub Copilot coding agent to your Squad as an autonomous team member. It picks up issues, creates branches, and opens PRs — all without a Copilot chat session. - +Add the GitHub Copilot coding agent to your Squad as an async team member. It picks up approved issues, creates branches, and opens PRs in the background. --- - ## Prerequisites - Before enabling @copilot on your Squad, ensure: - 1. **Copilot coding agent is enabled** on the repository (Settings → Copilot → Coding agent) 2. **`copilot-setup-steps.yml`** exists in `.github/` (defines the agent's environment) 3. **GitHub Actions** are enabled on the repository - --- - ## Quick Start - ```bash # 1. Add @copilot to your squad with auto-assign squad copilot --auto-assign - # 2. Create a classic PAT for auto-assignment (see below) # https://github.com/settings/tokens/new → check "repo" scope - # 3. Add the PAT as a repo secret gh secret set COPILOT_ASSIGN_TOKEN - # 4. Commit and push git add .github/ .squad/ && git commit -m "feat: add copilot to squad" && git push - # 5. Test — label any issue with squad:copilot gh issue edit --add-label "squad:copilot" ``` - > **Why can't I use `gh issue edit --add-assignee "@copilot"`?** Bot accounts cannot be assigned via the GitHub CLI the same way as human users. Use label-based assignment instead. See [FAQ: Why doesn't gh issue edit --add-assignee "@copilot" work?](../guide/faq.md#why-doesnt-gh-issue-edit---add-assignee-copilot-work) for details. - --- - ## Enabling @copilot - ### In conversation (recommended) - Say something like: - **"I want to add copilot to the squad"** - **"cast copilot to the squad"** - **"add team member copilot"** - The coordinator will add @copilot to the roster and ask about auto-assign. - > **Note:** If your project has features named "copilot" (e.g., a Copilot extension), the coordinator may misinterpret the phrase as project work. Use the CLI fallback in that case. - ### During team setup (new projects) - Squad asks if you want to include the coding agent during `init`. Say **yes** and it's added to the roster with a default capability profile. - ### Via CLI (fallback) - ```bash # Add @copilot to the team squad copilot - # Add with auto-assign enabled squad copilot --auto-assign - # Remove from the team squad copilot --off ``` - --- - ## COPILOT_ASSIGN_TOKEN (required for auto-assign) - The `squad-issue-assign` workflow needs a **classic Personal Access Token** to assign `copilot-swe-agent[bot]` to issues. The default `GITHUB_TOKEN` cannot do this. - ### Create the token - 1. Go to https://github.com/settings/tokens/new 2. **Note:** `squad-copilot-assign` 3. **Expiration:** 90 days (or your preference) 4. **Scopes:** check **`repo`** (full control of private repositories) 5. Click **Generate token** - ### Add as repo secret - ```bash gh secret set COPILOT_ASSIGN_TOKEN --repo owner/repo ``` - > **Why a classic PAT?** Fine-grained PATs return `403 Resource not accessible` for this endpoint. The REST API for assigning `copilot-swe-agent[bot]` requires a classic PAT with `repo` scope. The `GITHUB_TOKEN` silently ignores the assignment. - --- - ## How @copilot Differs from Other Members - | | AI Agent | Human Member | @copilot | |---|----------|-------------|----------| | Badge | ✅ Active | 👤 Human | 🤖 Coding Agent | @@ -103,119 +64,78 @@ gh secret set COPILOT_ASSIGN_TOKEN --repo owner/repo | Charter | ✅ | ❌ | ❌ (uses `copilot-instructions.md`) | | Works in session | ✅ | ❌ | ❌ (asynchronous via issue assignment) | | Spawned by coordinator | ✅ | ❌ | ❌ | -| Creates PRs | Via session commands | Outside Squad | Autonomously | - +| Creates PRs | Via session commands | Outside Squad | In the background | --- - ## Capability Profile - The capability profile in `team.md` defines what @copilot should and shouldn't handle: - | Tier | Meaning | Examples | |------|---------|----------| | 🟢 **Good fit** | Route automatically | Bug fixes, test coverage, lint fixes, dependency updates, small features, docs | | 🟡 **Needs review** | Route to @copilot but flag for PR review | Medium features with specs, refactoring with tests, API additions | | 🔴 **Not suitable** | Route to a squad member instead | Architecture, multi-system design, security-critical, ambiguous requirements | - The profile is editable. The Lead can suggest updates based on experience: - ``` > @copilot nailed that refactoring — bump refactoring to good fit > That API change needed too much context — keep multi-endpoint work at not suitable ``` - --- - ## Auto-Assign Flow - When the `squad:copilot` label is added to an issue: - 1. **Step 1** — Workflow posts a routing comment (uses `GITHUB_TOKEN`) 2. **Step 2** — Workflow assigns `copilot-swe-agent[bot]` to the issue (uses `COPILOT_ASSIGN_TOKEN`) 3. **Step 3** — Coding agent picks up the issue, creates a `copilot/*` branch, and opens a draft PR - The workflow automatically detects the repo's default branch (`main`, `master`, etc.). - --- - ## Lead Triage - The Lead evaluates every issue against @copilot's capability profile during triage: - 1. **Good fit?** → Routes to @copilot with reasoning 2. **Needs review?** → Routes to @copilot, flags for squad member PR review 3. **Not suitable?** → Routes to the right squad member, explains why not @copilot - The Lead can also suggest reassignment in either direction: - ``` > This test coverage task could go to @copilot — want me to reassign? > @copilot might struggle with this — suggesting we reassign to Ripley. ``` - --- - ## Labels - When @copilot is on the team, the `sync-squad-labels` workflow creates: - | Label | Color | Purpose | |-------|-------|---------| -| `squad:copilot` | 🟢 Green | Assigned to @copilot for autonomous work | - +| `squad:copilot` | 🟢 Green | Assigned to @copilot for background work | This works alongside the existing `squad` (triage) and `squad:{member}` labels. - --- - ## copilot-instructions.md - -The `.github/copilot-instructions.md` file gives the coding agent context about your Squad when it works autonomously. It tells @copilot to: - +The `.github/copilot-instructions.md` file gives the coding agent context about your Squad when it works in the background. It tells @copilot to: - Read `team.md` for roster and capability profile - Read `routing.md` for work routing rules - Check its capability profile before starting (and request reassignment if the issue doesn't match) - Follow the `squad/{issue}-{slug}` branch naming convention - Write decisions to the inbox for the Scribe to merge - This file is **upgraded automatically** when you run `squad upgrade` and `@copilot` is on your team — even if Squad is already up to date. If @copilot is not enabled, the file is left untouched. - --- - ## Tips - - Start conservative with the capability profile and expand as you see what @copilot handles well. -- Use auto-assign for repos where you want fully autonomous issue processing. +- Use auto-assign for repos where you want background issue processing with clear guardrails. - The coding agent works great alongside [issue-driven development](../scenarios/issue-driven-dev.md) — label issues `squad` and the Lead + @copilot handle the rest. - @copilot's PRs go through normal review — treat them like any team member's work. - ## Sample Prompts - ``` add copilot to the squad with auto-assign enabled ``` - Adds @copilot to the roster and configures automatic issue assignment. - ``` what's copilot's capability profile? ``` - Shows which task types are marked as good fit, needs review, or not suitable for @copilot. - ``` reassign issue #42 from copilot to Kane ``` - Routes an issue away from @copilot to a different squad member. - ``` bump refactoring to good fit for copilot ``` - Updates the capability profile to mark refactoring tasks as automatically routable to @copilot. - ``` review copilot's PR on #56 ``` - Spawns the appropriate squad member to review @copilot's pull request. diff --git a/docs/src/content/docs/features/copilot-mcp-trust.md b/docs/src/content/docs/features/copilot-mcp-trust.md index e8c3ef96e..45ecf8524 100644 --- a/docs/src/content/docs/features/copilot-mcp-trust.md +++ b/docs/src/content/docs/features/copilot-mcp-trust.md @@ -1,23 +1,12 @@ # Copilot CLI Non-Interactive MCP Trust Gate - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - When `squad watch` or another Squad automation spawns `copilot -p` (non-interactive mode), it automatically injects `--yolo --additional-mcp-config @.mcp.json` into every Copilot sub-invocation. This page explains why that injection is mandatory and what to do if `squad_state_*` tools are silently unavailable. - --- - ## What Is the Trust Gate? - Copilot CLI 1.0.59+ protects against loading arbitrary MCP binaries from workspace files by requiring the user to explicitly trust a folder before its `.mcp.json` is auto-loaded. In **interactive mode** this is a one-time prompt ("Trust this folder?"). In **non-interactive (`-p`) mode** there is no UI, so the gate cannot be satisfied and workspace `.mcp.json` is silently skipped. - This is a security measure (RCE prevention), not a bug. - --- - ## Empirical Test Matrix - The following was verified against Copilot CLI 1.0.59: - | Invocation | `.mcp.json` loaded? | |------------|---------------------| | `copilot -p "..."` | ❌ No | @@ -25,29 +14,18 @@ The following was verified against Copilot CLI 1.0.59: | `copilot --yolo --autopilot -p "..."` | ❌ No | | `copilot --additional-mcp-config @.mcp.json --yolo -p "..."` | ✅ **Yes** | | Interactive `copilot` → "Trust folder?" → Yes | ✅ Yes (not automatable) | - The `--additional-mcp-config @` flag bypasses the trust gate for the explicitly named file and is the only proven workaround for non-interactive sessions. - --- - ## How Squad Handles This Automatically - `squad watch`, the loop command, and any other Squad automation that spawns `copilot` as a subprocess automatically prepend: - ``` --yolo --additional-mcp-config @/abs/path/to/.mcp.json ``` - before the `-p` prompt and any other flags. You do **not** need to add these flags yourself when using Squad commands. - `--yolo` also suppresses the per-tool-call consent prompt that would cause `copilot -p` to hang waiting for input in non-interactive mode. - --- - ## Recommended `package.json` Script - If you write your own non-interactive Copilot scripts (CI, cron jobs, shell aliases), use this pattern to ensure `.mcp.json` is loaded: - ```json { "scripts": { @@ -55,29 +33,18 @@ If you write your own non-interactive Copilot scripts (CI, cron jobs, shell alia } } ``` - Then invoke it as: - ```bash npm run squad:copilot -- --yolo -p "Your prompt here" ``` - The `--yolo` flag is intentionally omitted from the `package.json` script itself so that interactive runs (`npm run squad:copilot`) still show per-tool consent prompts by default. - --- - ## Troubleshooting - **`squad_state_*` tools are not available in `squad watch` sessions** - 1. Verify `.mcp.json` exists at the repo root: `cat .mcp.json` 2. If missing, run `squad init` or `squad upgrade` to regenerate it 3. Confirm the file has a `squad_state` entry under `mcpServers` - **Squad emits `⚠ .mcp.json not found at `** - This warning appears when Squad tries to inject MCP config but `.mcp.json` is absent. Run `squad init` or `squad upgrade` to create it. - **`.copilot/mcp-config.json` still exists from an older Squad version** - Squad automatically tombstones (removes) the `squad_state` entry from `.copilot/mcp-config.json` during `init` and `upgrade`. Both files can coexist; Squad reads only `.mcp.json` for its own state tools. diff --git a/docs/src/content/docs/features/cost-tracking.md b/docs/src/content/docs/features/cost-tracking.md index 13b638421..a726266e7 100644 --- a/docs/src/content/docs/features/cost-tracking.md +++ b/docs/src/content/docs/features/cost-tracking.md @@ -1,89 +1,63 @@ -# Token Usage & Cost Tracking - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -Squad can track token usage and estimated cost for each agent spawn, roll that data up by session, and expose it through orchestration logs, terminal summaries, and telemetry backends. - ---- - -## Overview - -- Squad tracks token usage (input/output tokens) and estimated cost per agent spawn -- Usage data is recorded in orchestration logs and available via `squad cost` CLI -- Optional budget limits can be configured per agent or per session - ---- - -## How It Works - -- The `CostTracker` class (`packages/squad-sdk/src/runtime/cost-tracker.ts`) accumulates token data -- Each orchestration log entry includes a **Token usage** row -- OTel metrics (`squad.tokens.input`, `squad.tokens.output`, `squad.tokens.cost`) are emitted when telemetry is enabled - -The orchestration log template stores usage in a markdown table row like this: - -```md -| **Token usage** | 12,450 in / 3,200 out — $0.0234 | -``` - ---- - -## Viewing Costs - -```bash -squad cost # current session costs -squad cost --all # all historical costs -squad cost --agent fenster # costs for specific agent -``` - -**Example output:** - -```text -=== Squad Cost Summary === -Total input tokens: 12,450 -Total output tokens: 3,200 -Estimated cost: $0.0234 - ---- By Agent --- - fenster: 12,450in / 3,200out ($0.0234) [1 turns, model: claude-sonnet-4.5] - ---- By Session --- - session-abc123: 12,450in / 3,200out ($0.0234) [1 turns] -``` - ---- - -## Budget Configuration - -```typescript -import { defineSquad, defineAgent, defineBudget } from '@bradygaster/squad-sdk'; - -export default defineSquad({ - defaults: { - budget: defineBudget({ - perAgentSpawn: 50000, - perSession: 500000, - warnAt: 0.8, - }), - }, - agents: [ - defineAgent({ - name: 'fenster', - role: 'Core Dev', - budget: defineBudget({ perAgentSpawn: 100000 }), - }), - ], -}); -``` - -- `perAgentSpawn` limits an individual agent invocation -- `perSession` limits the total budget for the coordinator session -- `warnAt` emits warnings when usage reaches a fraction of the configured limit - ---- - -## OTel Integration - -- Token metrics are exported as OpenTelemetry counters when telemetry is enabled -- Compatible with Aspire dashboard, Grafana, and any OTel-compatible backend -- Metrics: `squad.tokens.input`, `squad.tokens.output`, `squad.tokens.cost` +# Token Usage & Cost Tracking +Squad can track token usage and estimated cost for each agent spawn, roll that data up by session, and expose it through orchestration logs, terminal summaries, and telemetry backends. +--- +## Overview +- Squad tracks token usage (input/output tokens) and estimated cost per agent spawn +- Usage data is recorded in orchestration logs and available via `squad cost` CLI +- Optional budget limits can be configured per agent or per session +--- +## How It Works +- The `CostTracker` class (`packages/squad-sdk/src/runtime/cost-tracker.ts`) accumulates token data +- Each orchestration log entry includes a **Token usage** row +- OTel metrics (`squad.tokens.input`, `squad.tokens.output`, `squad.tokens.cost`) are emitted when telemetry is enabled +The orchestration log template stores usage in a markdown table row like this: +```md +| **Token usage** | 12,450 in / 3,200 out — $0.0234 | +``` +--- +## Viewing Costs +```bash +squad cost # current session costs +squad cost --all # all historical costs +squad cost --agent fenster # costs for specific agent +``` +**Example output:** +```text +=== Squad Cost Summary === +Total input tokens: 12,450 +Total output tokens: 3,200 +Estimated cost: $0.0234 +--- By Agent --- + fenster: 12,450in / 3,200out ($0.0234) [1 turns, model: claude-sonnet-4.5] +--- By Session --- + session-abc123: 12,450in / 3,200out ($0.0234) [1 turns] +``` +--- +## Budget Configuration +```typescript +import { defineSquad, defineAgent, defineBudget } from '@bradygaster/squad-sdk'; +export default defineSquad({ + defaults: { + budget: defineBudget({ + perAgentSpawn: 50000, + perSession: 500000, + warnAt: 0.8, + }), + }, + agents: [ + defineAgent({ + name: 'fenster', + role: 'Core Dev', + budget: defineBudget({ perAgentSpawn: 100000 }), + }), + ], +}); +``` +- `perAgentSpawn` limits an individual agent invocation +- `perSession` limits the total budget for the coordinator session +- `warnAt` emits warnings when usage reaches a fraction of the configured limit +--- +## OTel Integration +- Token metrics are exported as OpenTelemetry counters when telemetry is enabled +- Compatible with Aspire dashboard, Grafana, and any OTel-compatible backend +- Metrics: `squad.tokens.input`, `squad.tokens.output`, `squad.tokens.cost` diff --git a/docs/src/content/docs/features/cross-squad-discover.md b/docs/src/content/docs/features/cross-squad-discover.md index 6912a14e0..879d04ca6 100644 --- a/docs/src/content/docs/features/cross-squad-discover.md +++ b/docs/src/content/docs/features/cross-squad-discover.md @@ -2,88 +2,59 @@ title: Cross-Squad Discover & Delegate description: Discover other squads across repository boundaries and delegate work to them via squad discover and squad delegate. --- - # Cross-Squad Discover & Delegate - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - **Try this to see what squads you can reach:** ```bash squad discover ``` - **Try this to send work to another squad:** ```bash squad delegate platform-squad "Add monitoring dashboard for the auth service" ``` - When you have multiple Squad-enabled repositories — a platform squad, a frontend squad, a data squad — you often need to ask the other squad to do something for you. Cross-squad orchestration lets a squad **discover** other squads and **delegate** issues to them, with proper labels and contact info, so the right team picks it up automatically. - Both commands work via GitHub issues — no shared infrastructure required. Each squad's manifest declares what it accepts and how to reach it. - --- - ## How it works - Each squad publishes a `.squad/manifest.json` file declaring: - Its name and capabilities (e.g., `kubernetes`, `helm`, `monitoring`) - Its GitHub repo (the contact) - The labels to apply to cross-squad issues - What work types it accepts (`issues`, `prs`) - Named skills it offers - When you run `squad discover`, Squad reads manifests from: - **Upstream** — repos declared in your `.squad/upstreams/` - **Registry** — any registry sources configured - **Local** — manifests inside the current repo - When you run `squad delegate ""`, Squad finds the target manifest, creates a properly-labeled GitHub issue in its repo, and includes structured cross-squad metadata so the other squad's coordinator picks it up correctly. - --- - ## Commands - ### `squad discover` - List known squads and their capabilities: - ```bash $ squad discover - Discovered Squads (3): - Name Capabilities Repo Accepts ────────────── ────────────────────────── ────────────────────── ──────── platform-squad kubernetes, helm, infra myorg/platform issues frontend-squad react, design-system, ui myorg/web-app issues data-squad etl, dbt, pipelines, ml myorg/data-platform issues, prs ``` - If no squads are discovered, the output reminds you to configure upstreams or check that other repos have published `manifest.json` files. - ### `squad delegate ""` - Create a cross-squad work request in another squad's repository: - ```bash squad delegate platform-squad "Add Grafana dashboards for the auth service's p95 latency" ``` - This creates an issue in `myorg/platform` titled `[cross-squad] Add Grafana dashboards...` with: - Squad's discovered labels applied automatically - A structured body with the originating repo, target squad, description, and acceptance criteria - The created issue's URL printed to your terminal - Required: - The target squad's manifest must include the work type — `accepts: ["issues"]` for the default delegation flow - GitHub CLI (`gh`) installed and authenticated with permission to create issues in the target repo - --- - ## Publishing a manifest for your own squad - To make your squad discoverable by others, create `.squad/manifest.json`: - ```json { "name": "platform-squad", @@ -98,7 +69,6 @@ To make your squad discoverable by others, create `.squad/manifest.json`: "skills": ["k8s-deployment", "helm-chart-authoring", "prometheus-alerting"] } ``` - | Field | Required | Notes | |-------|----------|-------| | `name` | ✅ | Human-readable name (e.g., `platform-squad`). Used in `squad delegate `. | @@ -109,50 +79,31 @@ To make your squad discoverable by others, create `.squad/manifest.json`: | `contact.labels` | optional | Labels applied to every cross-squad issue (so your triage automation can find them) | | `accepts` | ✅ | Work types: `["issues"]`, `["prs"]`, or both | | `skills` | optional | Named skills your squad offers — informational today, filterable in future | - Commit `manifest.json` to your repo's `.squad/` directory and push. Other squads that have your repo in their upstream list will pick it up on next `squad discover`. - --- - ## What the delegated issue looks like - When `squad delegate` creates an issue, it uses this structured body so the receiving squad's coordinator can recognize and route it correctly: - ```markdown ## Cross-Squad Work Request - **From:** this repository **To:** platform-squad (myorg/platform) - ### Description - Add Grafana dashboards for the auth service's p95 latency - ### Acceptance Criteria - - [ ] Work completed and verified - [ ] Originating squad notified of completion - *Created by squad cross-squad orchestration* ``` - The receiving squad sees an issue with their `cross-squad` (and any custom) labels, structured metadata in the body, and clear acceptance criteria. - --- - ## Limitations in v0.10 - - **No automatic completion notification.** When the receiving squad closes the issue, the originating squad doesn't get notified back automatically. Today you watch the issue manually or via standard GitHub notifications. - **Discovery is upstream-driven.** A squad has to know about another squad (via an upstream declaration) before `discover` can see it. There's no global registry. - **No capability filtering on delegate.** `squad delegate ` requires the exact squad name. You can't say "delegate this to any squad that has the `kubernetes` capability" — yet. - **PRs aren't supported in the default flow.** Even though manifests can declare `accepts: ["prs"]`, the v0.10 `delegate` command only creates issues. - These are tracked for follow-up. For now, cross-squad orchestration is a useful but minimal MVP — it removes the "where do I file this?" friction and ensures the right team gets a properly-formatted request. - --- - ## See also - - [Distributed Mesh](/squad/docs/features/distributed-mesh/) — the broader cross-repo coordination architecture - [Multiple Squads](/squad/docs/scenarios/multiple-squads/) — running several squads in one organization - [Upstream Inheritance](/squad/docs/features/upstream-inheritance/) — how upstreams get discovered diff --git a/docs/src/content/docs/features/directives.md b/docs/src/content/docs/features/directives.md index 6097ab3ca..d8971c6eb 100644 --- a/docs/src/content/docs/features/directives.md +++ b/docs/src/content/docs/features/directives.md @@ -1,40 +1,26 @@ # Directives - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to establish team coding standards:** ``` From now on, all tests must use Jest instead of Mocha ``` - **Try this to set formatting rules:** ``` Always use single quotes in TypeScript ``` - **Try this to enforce workflow policies:** ``` Never commit directly to main ``` - Directives are team rules that persist across sessions. When you say "always" or "never", Squad captures it and writes it to the team's permanent memory. Every agent reads these before working. - --- - ## How Directives Work - A directive is a preference, rule, or constraint the team remembers across sessions. When you say "always do X" or "never do Y", Squad captures it as a directive, writes it to the decisions inbox, and the Scribe merges it into `.squad/decisions.md` — the team's permanent memory. - ## How Directives Work - 1. **Signal Word Detection** — The coordinator listens for: "always", "never", "from now on", "remember to", "don't", "make sure to". 2. **Capture** — Directive is written to `.squad/decisions/inbox/{timestamp}-{brief-slug}.md`. 3. **Scribe Merge** — Scribe consolidates inbox files into `decisions.md` during the next coordination cycle. 4. **Agent Awareness** — All agents read `decisions.md` before starting work. Directives shape behavior. - ## Signal Words - | Phrase | Example | |--------|---------| | "always" | "Always use TypeScript strict mode" | @@ -43,141 +29,95 @@ A directive is a preference, rule, or constraint the team remembers across sessi | "remember to" | "Remember to run tests before pushing" | | "don't" | "Don't use var — only let and const" | | "make sure to" | "Make sure to document all public APIs" | - ## Directive Scope - Directives can apply to: - - **Coding style** — Formatting, naming conventions, language features - **Tool preferences** — Linters, formatters, test runners - **Workflow rules** — Branch naming, commit messages, PR templates - **Scope constraints** — "Don't touch legacy/ directory", "Only work on v2 features" - **Review requirements** — "Always have Lead review security changes" - ## Examples - > "Always use single quotes for strings in TypeScript" - **Captured:** ```markdown # Single Quotes for Strings Date: 2024-01-15 Scope: TypeScript code - Use single quotes for string literals. Avoid double quotes unless escaping is required. ``` - > "Never deploy on Fridays" - **Captured:** ```markdown # No Friday Deploys Date: 2024-01-15 Scope: Release process - Do not trigger production deploys on Fridays. Schedule for Monday-Thursday only. ``` - > "From now on, all API endpoints need integration tests" - **Captured:** ```markdown # API Integration Test Coverage Date: 2024-01-15 Scope: Testing - Every new API endpoint requires at least one integration test covering the happy path and one error case. ``` - ## Decisions Inbox - New directives land in `.squad/decisions/inbox/` as individual files: - ``` .squad/decisions/inbox/ ├── 2024-01-15-1420-single-quotes.md ├── 2024-01-15-1435-no-friday-deploys.md └── 2024-01-15-1450-api-test-coverage.md ``` - The Scribe periodically consolidates these into `decisions.md`: - ```markdown # Team Decisions - ## Coding Style - ### Single Quotes for Strings Use single quotes for string literals in TypeScript. Avoid double quotes unless escaping is required. - ## Testing - ### API Integration Test Coverage Every new API endpoint requires at least one integration test covering the happy path and one error case. - ## Release Process - ### No Friday Deploys Do not trigger production deploys on Fridays. Schedule for Monday-Thursday only. ``` - ## Directive Conflicts - When a new directive contradicts an existing one: - 1. **Scribe detects conflict** — Checks for semantic overlap during merge. 2. **User prompt** — "New directive conflicts with existing rule: {old rule}. Replace, merge, or skip?" 3. **Resolution** — Scribe updates `decisions.md` based on your choice. - ## Viewing Directives - > "Show me the team directives" - Coordinator displays `decisions.md` content. - > "What's our rule on testing?" - Coordinator searches `decisions.md` for testing-related directives. - ## Removing Directives - > "Remove the no-Friday-deploy rule" - Scribe edits `decisions.md` and removes that section. - Or edit `.squad/decisions.md` directly. - ## Agent Directive Compliance - Agents are not hard-constrained by directives — they're context-aware guidelines. If an agent violates a directive: - - **Reviewer rejection** — Lead or Tester flags it during review. - **User feedback** — You say "this violates our style rule" and the agent revises. - Directives shape behavior but don't replace code review or linting. - ## Sample Prompts - ``` Always use Prettier with single quotes and no semicolons ``` Creates a coding style directive. All agents will format code accordingly. - ``` Never use `any` type in TypeScript — always define explicit types ``` Establishes a type safety directive. Agents will avoid `any` and use proper types. - ``` From now on, all commit messages must follow Conventional Commits format ``` Sets a workflow directive. Agents will format commits as `feat:`, `fix:`, `docs:`, etc. - ``` Remember to update the CHANGELOG.md for every user-facing change ``` Creates a release process directive. Agents will add changelog entries when appropriate. - ``` Make sure all security-related PRs are reviewed by Lead before merging ``` diff --git a/docs/src/content/docs/features/distributed-mesh.md b/docs/src/content/docs/features/distributed-mesh.md index 33b6d7a21..346fa5344 100644 --- a/docs/src/content/docs/features/distributed-mesh.md +++ b/docs/src/content/docs/features/distributed-mesh.md @@ -1,51 +1,31 @@ # Distributed Mesh - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - **Try this to coordinate squads across machines:** ``` Set up a distributed mesh so my local squad can see the state of our CI squad ``` - **Try this to sync remote squad state:** ``` Run sync-mesh.sh to pull the latest state from all remote squads ``` - The distributed mesh lets squads on different machines coordinate through git and HTTP. Local squads read remote squad state after syncing it locally. - --- - ## What Is the Distributed Mesh? - One sentence: - > **"The filesystem is the mesh, and git is how the mesh crosses machine boundaries."** - Squad agents always read local files. When squads live on different machines, you need to materialize remote state locally before agents can see it. The distributed mesh does this through simple sync scripts — no servers, no federation protocols, no real-time messaging. - --- - ## Three Zones - | Zone | Description | Transport | Complexity | |------|-------------|-----------|------------| | **1 — Local** | Same host/filesystem | Direct file read | Zero | | **2 — Remote-Trusted** | Different host, same org | `git pull` from shared repo | Zero new (git exists) | | **3 — Remote-Opaque** | Different org, no shared auth | `curl` / HTTP fetch | ~15 lines of shell | - **Zone 1 (Local):** `cat ../squad-b/SUMMARY.md` works because the file is on your disk. - **Zone 2 (Remote-Trusted):** Squads push their state to a shared git repo. You pull from that repo to materialize their state locally. - **Zone 3 (Remote-Opaque):** A remote organization publishes their squad's `SUMMARY.md` at an HTTPS URL. You curl it to materialize locally. - --- - ## How It Works - ### Agent Lifecycle with Sync - ``` Agent wakes up │ @@ -55,29 +35,19 @@ Agent wakes up ├─ WRITE: update own billboard, log, drops └─ PUBLISH: git push ``` - Two new steps (SYNC, PUBLISH). Both are transport only — they move files, not change them. - ### What Doesn't Change - - Agents read local files - Write partitioning (each squad owns its directory) - Pull-based coordination - Eventual consistency - LLMs as the relevance engine - ### What Changes - Remote files need to arrive locally before agents can read them. - --- - ## Configuration - ### The `mesh.json` File - One JSON file lists where to find each squad: - ```json { "squads": { @@ -95,47 +65,31 @@ One JSON file lists where to find each squad: } } ``` - ### Sync Scripts - **Bash (requires `jq` and `git`):** - ```bash ./sync-mesh.sh # reads mesh.json, materializes remote state ``` - **PowerShell (requires `git` only):** - ```powershell .\sync-mesh.ps1 # default: reads mesh.json .\sync-mesh.ps1 -MeshJson custom.json # custom config path ``` - Both scripts read `mesh.json`, pull from remote-trusted repos, curl from remote-opaque URLs, and materialize everything into `.mesh/remotes/`. - --- - ## Getting Started - ### Prerequisites - - Git (with SSH or HTTPS auth configured) - A shell (bash/zsh) or PowerShell - `jq` ([github.com/jqlang/jq](https://github.com/jqlang/jq)) for the bash sync script (PowerShell script has no external dependencies) - ### 1. Create the Mesh State Repo - The **mesh state repo** is a shared git repository where squads publish their current state. Nothing more — no code, no automation, no agents. - ```bash git clone git@github.com:our-org/squad-mesh-state.git cd squad-mesh-state ``` - ### 2. Directory Structure - One directory per squad, each with a `SUMMARY.md`: - ``` squad-mesh-state/ ├── README.md # What this repo is, who participates @@ -146,21 +100,15 @@ squad-mesh-state/ └── data-squad/ └── SUMMARY.md # Data squad's current state ``` - ### 3. Register Your Squad - Create your directory, write initial state, push: - ```bash mkdir my-squad echo "# my-squad — active" > my-squad/SUMMARY.md git add . && git commit -m "register my-squad" && git push ``` - ### 4. Configure `mesh.json` - Point at the shared repo: - ```json { "squads": { @@ -172,22 +120,15 @@ Point at the shared repo: } } ``` - ### 5. Run Your First Sync - ```bash ./sync-mesh.sh # reads mesh.json, materializes remote state ls .mesh/remotes/ # should show directories per remote squad ``` - > **Does the mesh state repo need its own Squad?** No. It's a shared data directory — a dumb pipe. No agents, no `.squad/` folder, no automation. Each squad pushes its own state via write partitioning. The repo is just a git-based rendezvous point. If you later want a "mesh observer" that monitors all squads, THAT would be its own Squad project — but it's not required and shouldn't be the state repo itself. - --- - ## Cross-Org Setup (Zone 3) - Remote org publishes `SUMMARY.md` at a URL. Add an HTTP entry to `mesh.json`: - ```json "partner-squad": { "zone": "remote-opaque", @@ -195,54 +136,30 @@ Remote org publishes `SUMMARY.md` at a URL. Add an HTTP entry to `mesh.json`: "sync_to": ".mesh/remotes/partner-squad" } ``` - --- - ## How This Relates to Other Features - ### SubSquads (Streams) - **SubSquads** partition work **within a single repo** using GitHub labels (e.g., `team:ui`, `team:backend`). Each SubSquad runs in its own Codespace but shares the same git repository. - **Distributed mesh** coordinates **across repos and machines** — different organizations, different git repos, potentially no shared authentication. - SubSquads solve "one repo, many teams." Distributed mesh solves "many repos, many machines, crossing org boundaries." - See [SubSquads](./streams.md) for within-repo partitioning. - ### Export & Import - **Export/import** is a **snapshot-based** knowledge transfer. You export a trained squad from one repo and import it into another. It's a one-time copy. - **Distributed mesh** is **continuous coordination**. Remote squads keep working; you sync their latest state every time your agents wake up. - Use export/import when you want to **clone a team**. Use distributed mesh when you want **live coordination**. - See [Multiple Squads scenario](../scenarios/multiple-squads.md) for when to use each approach. - --- - ## Upstream inheritance - The **upstream module** and the **distributed mesh** serve different coordination needs. They're complementary, not competing. - ### Upstream: top-down inheritance - The `upstream/` module (configured in `upstream.json`) is for **hierarchical inheritance**. An organization-level or team-level squad pushes skills, decisions, wisdom, casting policy, and routing rules **down** to project squads. The consuming squad treats upstream content as **read-only** — it inherits conventions but doesn't write back. - ### Mesh: peer coordination - The distributed mesh (configured in `mesh.json`) is for **peer-to-peer coordination**. Squads on equal footing share their **current state** with each other. Each squad **publishes** its own state (SUMMARY.md, billboards) and **reads** everyone else's. It's read-write for each squad's own directory. - ### Use them together - A squad can have **both** an upstream (inheriting org conventions) **and** mesh peers (coordinating with sibling squads). For example: - - Your project squad inherits security policies and routing rules from the org-level squad via `upstream.json` - The same squad coordinates with other project squads (auth, ci, data) via `mesh.json` - ### Comparison - | | Upstream | Mesh | |---|---|---| | **Direction** | Top-down (parent → child) | Peer-to-peer (squad ↔ squad) | @@ -251,35 +168,22 @@ A squad can have **both** an upstream (inheriting org conventions) **and** mesh | **Config file** | `upstream.json` | `mesh.json` | | **Transport** | Local path / git clone / export JSON | Local path / git pull / HTTP curl | | **Use case** | Org policies flowing into team projects | Sibling squads keeping each other informed | - ### What neither does - Neither upstream nor mesh is about **agent-to-agent communication within a single squad**. That's the drop-box pattern — agents write to `decisions/inbox/`, read from `history.md`, and coordinate asynchronously within one `.squad/` directory. - --- - ## Skill scope - When you ask an agent to set up a distributed mesh, the skill produces three things: - 1. **`mesh.json` config file** — defines squads, zones, and sync sources 2. **A decision entry** — records why you configured the mesh this way 3. **Sync scripts** — copies pre-built `sync-mesh.sh` and `sync-mesh.ps1` from the skill's bundled resources - The skill does **not** generate: - - ❌ Code (validators, helpers, utilities) - ❌ Tests (the sync scripts are pre-tested templates) - ❌ Custom sync scripts (bundled scripts are copied, not regenerated) - **Why this matters:** Deterministic skills give you consistent results. The sync scripts are bundled with the distributed-mesh skill. Agents shouldn't waste time generating validators or rewriting sync logic from scratch — they should copy the bundled scripts and configure your `mesh.json`. - If you need to customize the sync behavior, edit the copied scripts in your project root. The mesh skill's job ends at configuration. - --- - ## What We're NOT Building - - ❌ Federation protocol (git push/pull IS federation) - ❌ Discovery service (mesh.json IS discovery) - ❌ Auth system (git auth IS the auth system) @@ -288,31 +192,21 @@ If you need to customize the sync behavior, edit the copied scripts in your proj - ❌ Real-time sync (agents are async; eventual consistency is correct) - ❌ Message queues (agents aren't persistent; nobody's listening) - ❌ CRDTs/conflict resolution (write partitioning; no conflicts possible) - --- - ## Sample Prompts - ``` configure a distributed mesh with our CI squad on GitHub ``` - Creates a `mesh.json` entry for a remote-trusted squad and runs the first sync. - ``` sync remote squad state before starting work ``` - Runs the sync script to materialize the latest state from all configured remote squads. - ``` add a partner squad from https://partner.dev/squad-contracts/SUMMARY.md ``` - Adds a remote-opaque Zone 3 entry to `mesh.json` for cross-org coordination. - ``` show me what remote squads are configured ``` - Lists all squads in `mesh.json` and their zones. diff --git a/docs/src/content/docs/features/dual-mode-deployment.md b/docs/src/content/docs/features/dual-mode-deployment.md index ca31f2752..06538b55a 100644 --- a/docs/src/content/docs/features/dual-mode-deployment.md +++ b/docs/src/content/docs/features/dual-mode-deployment.md @@ -2,96 +2,62 @@ title: Dual-Mode Deployment — Pod-Aware Capabilities description: Run Squad in either agent-per-node or squad-per-pod deployment modes with pod-specific machine capability manifests, controlled by SQUAD_POD_ID and SQUAD_DEPLOYMENT_MODE env vars. --- - # Dual-Mode Deployment — Pod-Aware Capabilities - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - Dual-mode deployment extends [Capability Routing](/squad/docs/features/capability-routing/) to support both classic single-machine setups and modern containerized/Kubernetes deployments where multiple Squad pods may share an organization's workload — each with potentially different machine capabilities. - It introduces two environment variables and a pod-specific manifest lookup pattern so the same Squad config can run identically in either deployment shape. - --- - ## The two deployment modes - | Mode | What it means | Capability manifest | |------|---------------|---------------------| | **`agent-per-node`** (default) | One Squad instance per machine; the machine's capabilities are the squad's capabilities | `.squad/machine-capabilities.json` (shared) | | **`squad-per-pod`** | Multiple Squad pods may run on different machines/containers, each with potentially different capabilities | `.squad/machine-capabilities-{podId}.json` (pod-specific) with fallback chain | - Choose the mode via the `SQUAD_DEPLOYMENT_MODE` environment variable: - ```bash # Classic single-machine setup (default) export SQUAD_DEPLOYMENT_MODE=agent-per-node - # Kubernetes / multi-pod setup export SQUAD_DEPLOYMENT_MODE=squad-per-pod export SQUAD_POD_ID=worker-1 ``` - If neither is set, the SDK defaults to `agent-per-node` for backward compatibility. - --- - ## Environment variables - ### `SQUAD_DEPLOYMENT_MODE` - | Value | Behavior | |-------|----------| | `agent-per-node` | Single shared `machine-capabilities.json` | | `squad-per-pod` | Pod-specific manifests with fallback chain | | (unset) | Same as `agent-per-node` | - ### `SQUAD_POD_ID` - Pod identifier used to construct the pod-specific manifest path. Required when `SQUAD_DEPLOYMENT_MODE=squad-per-pod`; ignored otherwise. - ```bash SQUAD_POD_ID=worker-1 # → .squad/machine-capabilities-worker-1.json SQUAD_POD_ID=gpu-pool-node-3 # → .squad/machine-capabilities-gpu-pool-node-3.json ``` - --- - ## The fallback chain (squad-per-pod mode) - When `SQUAD_DEPLOYMENT_MODE=squad-per-pod` AND `SQUAD_POD_ID` is set, the SDK looks up capabilities in this order: - 1. **`.squad/machine-capabilities-{podId}.json`** — pod-specific (highest priority) 2. **`.squad/machine-capabilities.json`** — shared fallback for capabilities that apply to all pods 3. **`~/.squad/machine-capabilities.json`** — user-home fallback (rarely useful in container deployments) 4. **`null`** — opt-out; capability routing falls back to label-only routing - The first manifest that exists is loaded; the search stops there (no merging). If you need different pods to see different capability sets, give each its own pod-specific file. If you need a shared baseline plus pod-specific additions, merge at the deployment-config level (Helm, Kustomize, etc.) — the SDK doesn't merge automatically. - --- - ## SDK programmatic access - The new exports from `@bradygaster/squad-sdk/ralph/capabilities`: - ```typescript import { getDeploymentMode, getPodId, type DeploymentMode, } from '@bradygaster/squad-sdk/ralph/capabilities'; - const mode: DeploymentMode = getDeploymentMode(); // 'agent-per-node' | 'squad-per-pod' const podId: string | undefined = getPodId(); // e.g. 'worker-1', or undefined ``` - These are pure env-var readers. They don't cache or memoize — each call reads `process.env` directly so changes between reads are visible. - --- - ## Typical Kubernetes deployment shape - In a KEDA-scaled deployment (see [KEDA Scaling](/squad/docs/features/keda-scaling/)), each scaled pod gets a unique `SQUAD_POD_ID` from the pod's name or hash: - ```yaml # Deployment env block env: @@ -102,9 +68,7 @@ env: fieldRef: fieldPath: metadata.name ``` - The pod's mounted volume contains per-pod manifests baked in by the image build or pulled from a ConfigMap, e.g.: - ``` /app/.squad/ ├── machine-capabilities.json # shared baseline (CPU, memory) @@ -112,21 +76,14 @@ The pod's mounted volume contains per-pod manifests baked in by the image build ├── machine-capabilities-gpu-pool-node-2.json # same shape └── machine-capabilities-cpu-pool-node-1.json # no GPU declaration ``` - Pods scheduled onto GPU nodes load a manifest declaring GPU capability; pods on CPU-only nodes get a manifest without GPU. Ralph's issue dispatcher routes `needs:gpu`-labeled work only to pods with the GPU capability. - --- - ## Limitations - - **No automatic pod discovery.** The SDK reads env vars to know who it is; it doesn't enumerate sibling pods or coordinate work distribution. That's the deployment orchestrator's job (KEDA, scheduler). - **No central capability registry.** Pods don't publish their capabilities back to anything; each pod evaluates issues against its own loaded manifest independently. If you need a central view, your orchestrator must aggregate. - **Manifest changes require redeploy or restart.** The fallback lookup happens on capability resolution; manifest content is read from disk each time but the manifest *path* is decided by env vars set at process start. - --- - ## See also - - [Capability Routing](/squad/docs/features/capability-routing/) — the broader machine-capability system - [KEDA Scaling](/squad/docs/features/keda-scaling/) — autoscaling Squad pods on demand - [Labels](/squad/docs/features/labels/) — `needs:*` label conventions used for capability matching diff --git a/docs/src/content/docs/features/error-recovery.md b/docs/src/content/docs/features/error-recovery.md index e9ebea5ce..f275627d3 100644 --- a/docs/src/content/docs/features/error-recovery.md +++ b/docs/src/content/docs/features/error-recovery.md @@ -2,94 +2,60 @@ title: Error Recovery — Standard Failure Patterns description: Built-in skill teaching agents to adapt when things fail — retry with backoff, fallback alternatives, diagnose-and-fix, and escalation patterns. --- - # Error Recovery — Standard Failure Patterns - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - The `error-recovery` skill teaches every squad agent to **adapt** when something fails, not just report the failure. It ships as a built-in skill at `.copilot/skills/error-recovery/SKILL.md` and is available to every spawned agent. - Without this skill, agents tend to encounter a failure (CI test red, API timeout, missing dependency) and stop. With it, they apply standard patterns to diagnose, retry, or escalate the right way. - --- - ## The five recovery patterns - ### 1. Retry with Backoff - **When:** Transient failures — API timeouts, rate limits, network errors, temporary service unavailability. - **Pattern:** 1. Wait briefly, then retry (start at 2s, double each attempt) 2. Maximum 3 retries before escalating 3. Log each attempt with the error received - **Example:** API call returns `429 Too Many Requests` → wait 2s → retry → wait 4s → retry → wait 8s → retry → escalate if still failing. - ### 2. Fallback Alternatives - **When:** Primary tool or approach fails and an alternative exists. - **Pattern:** 1. Attempt primary approach 2. On failure, identify alternative tool/method 3. Try the alternative with the same intent 4. Document which alternative was used and why - **Example:** Primary CLI tool fails → fall back to direct API call for the same operation. Or: `gh pr comment` rate-limited → fall back to `gh api -X POST .../issues/{n}/comments`. - ### 3. Diagnose-and-Fix - **When:** Build failures, test failures, linting errors — structured errors with actionable output. - **Pattern:** 1. Read the full error output carefully (not just the last line) 2. Identify the root cause from error messages 3. Attempt a targeted fix 4. Re-run to verify the fix 5. If 3 fix attempts fail, escalate with a diagnostic summary - **Example:** TypeScript build fails with `Cannot find module '@x/y'` → check `package.json`, run `npm install`, re-run build. - ### 4. Reframe-and-Retry - **When:** The approach itself is wrong (not just the implementation). User feedback like *"that won't work because..."* or *"try a different way"*. - **Pattern:** 1. Stop the current approach immediately 2. Re-read the original task description 3. Identify what assumption was wrong 4. Propose 2 alternative approaches before picking one 5. Get user confirmation if the cost of being wrong again is high - ### 5. Escalation - **When:** Three attempts have failed, OR the failure is outside the agent's domain, OR fixing it would violate a team decision. - **Pattern:** 1. Stop attempting fixes 2. Summarize: what was tried, what failed, what's known 3. Surface to coordinator with a clear ask (*"need lead's call on architecture"* vs. *"need human approval"* vs. *"need access to X system"*) 4. Document the escalation in `decisions/inbox/` if it's a recurring pattern - --- - ## When NOT to apply these patterns - - **Don't retry on user-input errors.** If the user typed `gh repo create my-typo`, don't retry with `my-typoo`. Surface and ask. - **Don't fall back silently on security-sensitive operations.** If `git push origin main` fails because of branch protection, do NOT fall back to `--force`. - **Don't escalate without context.** *"It failed"* isn't an escalation; *"three attempts, each with `EACCES`, suggests user lacks write to `.squad/`, recommend chmod or different storage path"* is. - --- - ## Integration with Reviewer Rejection Protocol - When the failure is a Reviewer rejection (a Reviewer agent rejects an artifact), the [Reviewer Rejection Protocol](/squad/docs/features/reviewer-protocol/) takes precedence. The original author is locked out and a different agent must own the revision. Error-recovery patterns apply within that constraint — the revision agent can use retry/fallback/diagnose patterns freely. - --- - ## See also - - [Reflect](/squad/docs/features/reflect/) — learning from corrections - [Reviewer Protocol](/squad/docs/features/reviewer-protocol/) — when a Reviewer rejects work - [Skills](/squad/docs/features/skills/) — how built-in skills work diff --git a/docs/src/content/docs/features/export-import.md b/docs/src/content/docs/features/export-import.md index 789cdf316..df631e8ee 100644 --- a/docs/src/content/docs/features/export-import.md +++ b/docs/src/content/docs/features/export-import.md @@ -1,60 +1,39 @@ # Export & Import - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to make your team portable:** ``` Export my team to a file — I want to use them on another project ``` - **Try this to bring a trained team to a new repo:** ``` Import the team from squad-export.json ``` - Squad teams are portable. Export your trained agents, casting state, skills, and decisions to a single JSON file. Import them into any repo and they bring all their knowledge with them. - --- - ## Export - ```bash squad export ``` - Creates `squad-export.json` in the current directory — a portable snapshot of your entire team: agents, casting state, skills, and decisions. - ### Custom output path - ```bash squad export --out ./backups/my-team.json ``` - ### Push directly to a GitHub repository - Instead of writing to a local file, you can push the export straight to a GitHub repo via the GitHub Contents API. This is the easiest way to back up your team to a private repo or share it with collaborators without sending a file. - ```bash # Export to a GitHub repo (uses default branch) squad export --repo myorg/squad-backups - # Export to a specific branch squad export --repo myorg/squad-backups --branch nightly ``` - Requirements: - GitHub CLI (`gh`) installed and authenticated with permission to push to the target repo - The repo must exist (the export does NOT create it) - The export lands at the repo root as `squad-export.json` by default. Combine with `--out` to control the filename inside the repo: - ```bash squad export --repo myorg/squad-backups --out my-team-2026-06-11.json ``` - ### What's included - | Data | Included | |------|----------| | Agent charters | ✅ | @@ -62,106 +41,70 @@ squad export --repo myorg/squad-backups --out my-team-2026-06-11.json | Casting state | ✅ | | **Skills** | ✅ **All earned skills export with the team** | | Decisions | ✅ | - > **Skills are portable**: When you export a team, all earned skills from `.copilot/skills/` are included in the JSON manifest. After importing, skills are immediately available to all agents — no loss of knowledge. - --- - ## Import - ```bash squad import squad-export.json ``` - Imports the snapshot into the current repo's `.squad/` directory. - ### Pull directly from a GitHub repository - You can import a snapshot directly from a GitHub repo without downloading the file first: - ```bash # Import from default branch of a repo squad import --repo myorg/squad-backups - # Import a specific filename or branch squad import --repo myorg/squad-backups --branch nightly squad import --repo myorg/squad-backups --out my-team-2026-06-11.json ``` - Requirements: - GitHub CLI (`gh`) installed and authenticated with read access to the source repo - The export file must exist at the named path in the repo (default: `squad-export.json` at repo root) - Use `--force` together with `--repo` for the same archive-then-replace behavior as the file-based import. - ### Collision detection - If `.squad/` already exists, Squad warns you and stops. To archive the existing team and replace it: - ```bash squad import squad-export.json --force ``` - The `--force` flag moves your current team to an archive before importing. Nothing is deleted. - ### History splitting - During import, agent histories are split into two categories: - - **Portable knowledge** — general learnings, conventions, and patterns that transfer across projects - **Project-specific learnings** — context-tagged entries tied to the original repo - Imported agents bring their skills and general knowledge without assuming your project works the same way. - --- - ## Use Cases - | Scenario | Command | |----------|---------| | Back up before a major refactor | `squad export --out ./backup.json` | | Share a trained team with a colleague | Export, send the JSON, they import — **skills included** | | Move a team to a different repo | Export from old repo, import into new repo — **skills travel with agents** | | Reset and start fresh | Export as backup, delete `.squad/`, re-init | - --- - ## Tips - - Export before running `upgrade` if you want a rollback point. - The export file is JSON — you can inspect it to see exactly what your team knows. - Imported agents retain their names and universe. They won't be renamed. - Commit your `.squad/` directory after importing so the team is available to everyone who clones the repo. - **Skills are fully portable** — all earned skills export and import with perfect fidelity. No manual copying needed. - ## Sample Prompts - ``` export the current team ``` - Creates a `squad-export.json` snapshot of the entire team in the current directory. - ``` import squad-export.json into this repo ``` - Imports a team snapshot into the current project's `.squad/` directory. - ``` what was included in that export? ``` - Shows a summary of what data was captured in the most recent export file. - ``` export just the team state, not the full history ``` - Creates a lightweight export with agent charters and skills but minimal history. - ``` import with --force and archive the current team ``` - Overwrites the existing `.squad/` directory after archiving it as a backup. diff --git a/docs/src/content/docs/features/external-state.md b/docs/src/content/docs/features/external-state.md index 925b0e9a4..70af21075 100644 --- a/docs/src/content/docs/features/external-state.md +++ b/docs/src/content/docs/features/external-state.md @@ -1,31 +1,20 @@ # External State Storage - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to move state outside the working tree:** ```bash squad externalize ``` - **Try this to move state back:** ```bash squad internalize ``` - **Try this to check current state location:** ```bash cat .squad/config.json | grep stateLocation ``` - Squad can store `.squad/` state outside the working tree in a platform-specific global directory — solving branch-switch data loss and PR pollution. - --- - ## The Problem - By default, `.squad/` lives in the working tree alongside your code: - ``` my-repo/ .squad/ @@ -34,166 +23,125 @@ my-repo/ team.md routing.md ``` - This creates two problems: - ### 1. Branch-Switch Data Loss - When you switch Git branches, `.squad/` is destroyed: - ```bash git checkout feature-branch # .squad/ exists git checkout main # .squad/ GONE (if not on main) ``` - Your decisions, skills, earned knowledge — all lost. - ### 2. PR Pollution - If you commit `.squad/` to preserve it, every branch includes squad state in PRs: - ```diff + .squad/decisions/log.md + .copilot/skills/ci-setup/SKILL.md + .squad/team.md ``` - Reviewers see squad metadata mixed with your actual code changes. - --- - ## The Solution: External State - -`squad externalize` moves `.squad/` to a platform-specific global directory **outside the working tree**: - +`squad externalize` moves `.squad/` to a platform-specific Squad data root **outside the working tree**: **Platform paths:** - -| OS | Path | -|----|------| -| **Windows** | `%APPDATA%\squad\projects\{repo-name}\` | -| **macOS** | `~/Library/Application Support/squad/projects/{repo-name}/` | -| **Linux** | `~/.config/squad/projects/{repo-name}/` | - +Squad stores externalized state under your platform's standard application-data location, in a repo-specific `projects/{repo-name}/` folder. **Result:** - Squad state persists across branch switches - PRs never contain `.squad/` files - State is isolated per repository (based on repo name) - --- - ## Usage - ### Externalize - Move `.squad/` to external storage: - ```bash squad externalize ``` - **What happens:** -1. Resolves platform-specific global path (e.g., `~/Library/Application Support/squad/projects/my-repo/`) -2. Moves `.squad/` contents to global path -3. Creates thin marker file `.squad/config.json` in working tree: +1. Resolves the platform-specific Squad data root for this repository +2. Moves everything under `.squad/` **except** local-only bootstrap files (`config.json`, `manifest.json`, `workstreams.json`, `upstream.json`, `squad-registry.json`, and `_upstream_repos/`) +3. Writes or updates `.squad/config.json` in the working tree with the external-state marker and project key: ```json { + "version": 1, + "teamRoot": ".", + "projectKey": "my-repo", "stateLocation": "external" } ``` -4. Adds `.squad/` to `.gitignore` (if not already present) - +4. Ensures `.squad/config.json` is listed in `.gitignore` **After externalization:** -- Working tree has only `.squad/config.json` (gitignored marker) -- All squad state lives in global directory -- Branch switches don't affect squad data - +- Mutable squad state lives in the external Squad data root +- `.squad/config.json` stays in the repo as the machine-local marker file +- Other local-only resolver files under `.squad/` can also remain in the working tree +- Branch switches no longer affect the externalized state --- - ### Internalize - Move state back to working tree: - ```bash squad internalize ``` - **What happens:** -1. Reads marker file to find external state location -2. Moves state from global directory back to `.squad/` -3. Removes marker file -4. Removes `.squad/` from `.gitignore` - +1. Reads `.squad/config.json` to find the external project key +2. Copies the externalized entries back into `.squad/` +3. Removes the external-state fields from `.squad/config.json` +4. Deletes `.squad/config.json` only if no other meaningful config remains **After internalization:** -- `.squad/` lives in working tree again -- Can commit squad state if desired -- Vulnerable to branch-switch data loss again - +- Mutable state lives in the working tree again +- Any unrelated `.squad/config.json` settings are preserved +- The command does **not** edit `.gitignore`; the `config.json` ignore entry is left in place --- - ## Configuration - -The thin marker file `.squad/config.json` tracks state location: - +The marker file `.squad/config.json` is the source of truth for externalized state: ```json { + "version": 1, + "teamRoot": ".", + "projectKey": "my-repo", "stateLocation": "external" } ``` - -| Value | Meaning | +| Field | Meaning | |-------|---------| -| `"internal"` | State lives in working tree (`.squad/` in repo) | -| `"external"` | State lives in global directory (platform-specific path) | - +| `"projectKey"` | Stable key used to choose the external directory | +| `"stateLocation": "external"` | This repo should resolve mutable state from the platform-specific external directory | +| `"teamRoot": "."` | Resolver hint preserved in config | **Notes:** -- Marker file is created by `squad externalize` -- Marker file is gitignored — not committed to repo -- Marker file is removed by `squad internalize` - +- `squad externalize` writes these fields while preserving unrelated config keys +- `.squad/config.json` is gitignored because it is machine-local +- `squad internalize` removes the external-state fields, then deletes the file only if nothing meaningful remains --- - ## Global Directory Structure - ``` -~/Library/Application Support/squad/projects/ - my-repo/ - decisions/ - log.md - inbox/ - skills/ - ci-setup/SKILL.md - team.md - routing.md - other-repo/ - decisions/ - skills/ +/ + projects/ + my-repo/ + decisions/ + log.md + inbox/ + skills/ + ci-setup/SKILL.md + team.md + routing.md + other-repo/ + decisions/ + skills/ + team.md ``` - Each repo gets its own isolated directory based on repository name. State is never shared across repos. - --- - ## When to Use External State - **Use `squad externalize` when:** - You switch branches frequently - You want squad state isolated from code PRs - You work on feature branches where `.squad/` isn't committed to base branch - You want squad state to persist across `git clean -fdx` - **Keep internal state when:** - You want squad state committed to the repo (e.g., decisions, skills travel with code) - You rarely switch branches - You want squad state versioned alongside code - --- - ## Multi-Repo Workflows - External state is **isolated per repository** — each repo gets its own global directory. If you work on multiple repos, each maintains separate squad state: - ``` -~/Library/Application Support/squad/projects/ frontend/ decisions/ skills/ @@ -203,93 +151,63 @@ External state is **isolated per repository** — each repo gets its own global skills/ team.md ``` - No cross-repo state pollution. - --- - ## Git Integration - -After externalization, `.squad/` is gitignored. Only the thin marker file exists in the working tree: - -```bash -$ git status -On branch feature-branch -Untracked files: - .squad/config.json # gitignored marker — not committed -``` - -This means: -- PRs never show squad state changes -- Branch switches don't affect squad data -- `git clean -fdx` doesn't delete squad state - +Externalization only adds **`.squad/config.json`** to `.gitignore`. +It does **not** add the whole `.squad/` directory, and it does not remove that entry during `squad internalize`. +That means: +- The machine-local marker file stays out of commits +- Other local `.squad/` files such as `manifest.json` or `workstreams.json` still follow normal Git rules +- Externalized mutable state stays out of PRs because it no longer lives in the working tree +- `git clean -fdx` does not delete the external directory --- - ## Migration - ### From Internal to External - ```bash # Before: .squad/ in working tree ls .squad/ # decisions/ skills/ team.md routing.md - squad externalize - -# After: only marker file in working tree +# After: config.json remains, and some local-only bootstrap files may remain too ls .squad/ -# config.json - +# config.json manifest.json workstreams.json ... # State moved to global directory ls ~/Library/Application\ Support/squad/projects/my-repo/ # decisions/ skills/ team.md routing.md ``` - ### From External to Internal - ```bash squad internalize - # State moved back to working tree ls .squad/ -# decisions/ skills/ team.md routing.md config.json +# decisions/ skills/ team.md routing.md ... +# config.json only remains if it still has other settings ``` - --- - ## Notes - - External state is **opt-in** — default is internal (working tree) - External state is **platform-aware** — uses OS-specific global directories - External state is **isolated per repo** — no cross-repo pollution -- Marker file is **gitignored** — never committed +- `.squad/config.json` is **gitignored** — never committed +- Local resolver/bootstrap files are intentionally left in the working tree +- `squad internalize` does not clean up the `.gitignore` entry for `config.json` - `squad upgrade` respects current state location (doesn't force internal/external) - --- - ## Sample Prompts - ``` squad externalize ``` - Moves squad state to global directory. - ``` squad internalize ``` - Moves squad state back to working tree. - ``` Where is my squad state stored? ``` - Reports current state location (internal vs external). - ``` Show me the external state path ``` - Prints the platform-specific global directory path. diff --git a/docs/src/content/docs/features/fleet-dispatch.md b/docs/src/content/docs/features/fleet-dispatch.md index cd197d75d..5a7f4cdbb 100644 --- a/docs/src/content/docs/features/fleet-dispatch.md +++ b/docs/src/content/docs/features/fleet-dispatch.md @@ -2,54 +2,34 @@ title: Fleet Dispatch — Parallel Issue Triage description: Hybrid dispatch mode for squad watch that batches read-heavy issues into a single Copilot /fleet session for 2.9x faster parallel analysis. --- - # Fleet Dispatch — Parallel Issue Triage - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - **Try this for parallel read-heavy issue triage:** ```bash squad watch --execute --dispatch-mode fleet ``` - **Try this for mixed read + write workloads:** ```bash squad watch --execute --dispatch-mode hybrid ``` - Fleet Dispatch enables `squad watch --execute` to batch **read-heavy issues** (research, review, audit, triage) into a single Copilot CLI `/fleet` session that analyzes them in parallel tracks. The published measurement: **2.9× faster** than sequential dispatch for read-heavy workloads. - It's a `WatchCapability` that runs in the `post-execute` phase of the watch loop, so it composes with the existing per-issue dispatch logic rather than replacing it. - --- - ## Three dispatch modes - | Mode | What gets parallelized | Best for | |------|------------------------|----------| | **`sequential`** (default) | One issue at a time, full agent spawn each | Mixed workloads, debugging | | **`fleet`** | All issues batched into one `/fleet` Copilot session, parallel analysis tracks | Pure triage/review rounds where all issues are read-only | | **`hybrid`** | Read-heavy issues go to fleet; write-heavy issues go sequential | Realistic backlogs with both kinds | - `hybrid` is the recommended mode for most teams — it gets the speedup on the analysis-heavy issues without trying to fleet-dispatch issues that need to write code or modify state. - --- - ## What counts as "read-heavy" - The fleet-dispatch capability classifies issues using the same `classifyIssue` logic used elsewhere in `squad watch`. Read-heavy classification is based on labels and title keywords: - - **Labels:** `triage`, `review`, `audit`, `analyze`, `research`, `investigate`, `discuss`, `question` - **Title keywords:** *"review"*, *"audit"*, *"analyze"*, *"investigate"*, *"why does"*, *"how does"* - Anything that touches code, files, or external systems is **write-heavy** and stays in sequential dispatch — even in `hybrid` mode. - --- - ## How a fleet round works - When `squad watch` decides to dispatch (work items present, no rate-limit hold), and `dispatchMode` is `fleet` or `hybrid`: - 1. Watch's executor calls `findExecutableIssues` to get the work batch 2. FleetDispatch capability runs in `post-execute` phase 3. Read-heavy issues are filtered out of the sequential queue @@ -63,40 +43,26 @@ When `squad watch` decides to dispatch (work items present, no rate-limit hold), 6. The prompt is sent as a single `copilot --fleet` invocation 7. Copilot runs all tracks in parallel, posts comments per issue, exits 8. Watch logs the fleet dispatch result and continues to its next round - A typical fleet prompt looks like: - ``` /fleet Execute these 6 read-only analysis tracks in parallel: - Track 1 (PAO): Issue #421: Triage user-reported bug in login flow Read the issue body. Analyze, assess urgency (P0/P1/P2), recommend next step. Write findings as an issue comment. Do NOT create branches or modify files. - Track 2 (FIDO): Issue #428: Review PR #427's test coverage ... - Rules: All tracks READ-ONLY. Write findings as issue comments. Run in parallel. ``` - --- - ## Measurement methodology - The 2.9× speedup citation comes from comparing 6 read-heavy issues: - - Sequential mode: 6 separate `copilot --agent {role}` invocations → ~18 minutes total (each ~3 min for cold-start + analysis) - Fleet mode: 1 `copilot --fleet` invocation with 6 tracks → ~6 minutes total (one cold-start, parallel analysis tracks) - Speedup is dominated by avoiding 5 cold-starts. It does NOT extend to write-heavy issues because Copilot's `/fleet` doesn't currently support parallel write operations safely (commits would conflict). - --- - ## Configuration - Set the dispatch mode in `.squad/watch-config.json`: - ```json { "execute": true, @@ -105,28 +71,20 @@ Set the dispatch mode in `.squad/watch-config.json`: "copilotFlags": "--allow-all-tools --no-color" } ``` - Or via CLI flag (overrides config): - ```bash squad watch --execute --dispatch-mode fleet squad watch --execute --dispatch-mode hybrid squad watch --execute --dispatch-mode sequential ``` - --- - ## Limitations - - **Read-only only.** Fleet tracks must not modify files or create branches. The capability builds prompts that explicitly forbid this; if your team needs parallel write workflows, sequential dispatch remains the safer choice. - **One track per issue.** No batching of multiple issues into one track — each issue gets its own analysis context. - **Track count limit.** Copilot CLI `/fleet` has its own track-count ceiling. For backlogs with >10 read-heavy issues per round, the capability splits across multiple fleet calls. - **Classification is conservative.** If `classifyIssue` is unsure, it defaults to write-heavy (sequential). Better to lose the speedup than to fleet-dispatch a write-heavy issue accidentally. - --- - ## See also - - [Ralph](/squad/docs/features/ralph/) — the watch loop's broader behavior - [Capability Routing](/squad/docs/features/capability-routing/) — how watch matches work to agents - [Rate Limiting](/squad/docs/features/rate-limiting/) — cooperative rate limiting (composes with fleet dispatch) diff --git a/docs/src/content/docs/features/github-issues.md b/docs/src/content/docs/features/github-issues.md index 16ac2fa18..a6409c229 100644 --- a/docs/src/content/docs/features/github-issues.md +++ b/docs/src/content/docs/features/github-issues.md @@ -1,117 +1,74 @@ # GitHub Issues Mode - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to connect to your backlog:** ``` Show me the open issues for this repo ``` - **Try this to start work on a specific issue:** ``` Work on issue #42 ``` - **Try this to handle PR review feedback:** ``` There's review feedback on PR #24 ``` - Squad connects to your GitHub repository, fetches issues from the backlog, routes work to the right agents, creates branches, implements changes, and opens PRs — all from natural language requests. - --- - ## Requirements - - **`gh` CLI** installed and authenticated (`gh auth status` to check) - A GitHub repository with issues - --- - ## Connect to a Repository - ``` > Connect to myorg/myrepo ``` - Squad stores the issue source in `team.md`. You only need to do this once per project. - --- - ## View the Backlog - ``` > Show the backlog ``` - Squad fetches open issues and displays them in a table: - ``` # Title Labels Assignee 12 Add user authentication backend — 15 Fix responsive nav frontend — 18 Write API integration tests testing — ``` - --- - ## Work on Issues - ### Single issue - ``` > Work on #12 ``` - The coordinator routes the issue to the best-fit agent. That agent: - 1. Creates a branch (e.g., `feature/12-add-user-authentication`) 2. Implements the work 3. Opens a PR linked to the issue - ### Multiple issues - ``` > Work on #12 and #15 ``` - Agents work in parallel — each issue gets its own branch and PR. - --- - ## Handle PR Review Feedback - ``` > There's review feedback on PR #24 ``` - The agent who opened the PR reads the review comments and addresses them. Commits are pushed to the existing branch. - --- - ## Merge Completed Work - ``` > Merge PR #24 ``` - Squad squash-merges the PR, deletes the branch, and closes the linked issue. - --- - ## Check Remaining Work - ``` > What's left? ``` - Squad refreshes the backlog and shows remaining open issues. - --- - ## Workflow Summary - | You say | What happens | |---------|-------------| | `"Connect to myorg/myrepo"` | Stores issue source | @@ -121,43 +78,29 @@ Squad refreshes the backlog and shows remaining open issues. | `"There's review feedback on PR #24"` | Agent addresses review comments | | `"Merge PR #24"` | Squash merge, delete branch, close issue | | `"What's left?"` | Refreshes and shows remaining issues | - --- - ## Tips - - You don't need to assign issues to specific agents — Squad routes based on domain expertise. - If `gh` isn't authenticated, Squad will tell you. Run `gh auth login` first. - For detailed GitHub workflow, see [GitHub Workflow](../concepts/github-workflow.md). - ## Sample Prompts - ``` connect to bradygaster/squad ``` - Links Squad to a GitHub repository for issue-driven development. - ``` show the backlog ``` - Fetches and displays all open issues from the connected repository. - ``` work on issue #23 ``` - Routes the issue to the appropriate agent who creates a branch, implements, and opens a PR. - ``` work on all issues labeled "bug" ``` - Processes multiple issues in parallel based on label filtering. - ``` what's left in the backlog? ``` - Refreshes the issue list and shows remaining open work items. diff --git a/docs/src/content/docs/features/human-team-members.md b/docs/src/content/docs/features/human-team-members.md index e6ce15798..d4076af92 100644 --- a/docs/src/content/docs/features/human-team-members.md +++ b/docs/src/content/docs/features/human-team-members.md @@ -1,34 +1,21 @@ # Human Team Members - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to add a human specialist:** ``` Add Sarah (sarah@example.com) as a frontend developer to the team ``` - **Try this to add a human reviewer:** ``` Add Jordan as design reviewer ``` - Not every team member needs to be AI. Add real people to the roster for decisions that require a human — design sign-off, security review, product approval. - --- - ## Adding a Human - ``` > Add Sarah as design reviewer ``` - Sarah appears in the team roster with a 👤 Human badge, distinct from AI agents. - --- - ## How Humans Differ from AI Agents - | | AI Agent | Human Member | |---|----------|-------------| | Badge | Role-specific emoji | 👤 Human | @@ -36,51 +23,31 @@ Sarah appears in the team roster with a 👤 Human badge, distinct from AI agent | History | ✅ | ❌ | | Spawned as sub-agent | ✅ | ❌ | | Can review work | ✅ | ✅ | - Human team members have no charter, no history file, and are never spawned as sub-agents. They exist on the roster as routing targets. - --- - ## What Happens When Work Routes to a Human - When the coordinator determines that a task should go to a human team member: - 1. **Squad pauses** and tells you that a human needs to act 2. You relay the task to the person outside of Squad 3. When they respond, you tell Squad what happened - If the human hasn't responded after a while, Squad sends **stale reminders** prompting you to follow up. - --- - ## Humans as Reviewers - Human team members can serve as reviewers in the [reviewer protocol](../concepts/your-team.md#reviewer-protocol). This is useful when you want a real person to sign off before work is considered done. - ``` > Add Jordan as security reviewer ``` - When work requires security review, Squad routes it to Jordan and waits. - --- - ## Removing a Human - Same as removing any team member — they move to alumni: - ``` > Remove Sarah from the team ``` - Their entry moves to `.squad/agents/_alumni/`. They can be re-added later. - --- - ## When to add a human member - Not every collaborator needs a roster entry. Use this table to decide: - | Scenario | Add to roster? | Why | |----------|---------------|-----| | Approves architecture decisions before implementation | ✅ Yes | Decision gate — agents route and wait | @@ -88,47 +55,31 @@ Not every collaborator needs a roster entry. Use this table to decide: | Makes the final ship/no-ship call | ✅ Yes | Approval gate | | Occasionally reviews PRs when tagged | ❌ No | Use @mention on the PR instead | | Files issues and contributes code | ❌ No | Normal GitHub collaboration | - **Litmus test:** If you want agents to *stop and wait* for someone's input before proceeding, add them. If they review asynchronously through normal GitHub flows, a roster entry adds no value. - **You don't need to add yourself.** Squad reads `git config user.name` every session, so the team always knows who's driving. Adding yourself to the roster is optional — it formalizes routing and review tracking but isn't required for day-to-day interaction. - --- - ## Tips - - Use human members for approval gates — design review, compliance, final sign-off. - Human members work well alongside [ceremonies](ceremonies.md) — add a human as a required participant in a design review ceremony. - You're the relay. Squad can't message humans directly — it tells you, and you coordinate. - ## Sample Prompts - ``` add Maria as security reviewer ``` - Adds a human team member with a specific review responsibility. - ``` route this auth work to Jordan for approval ``` - Assigns a task to a human team member for external handling. - ``` Jordan approved the design — we can proceed ``` - Unblocks work that was waiting on human input. - ``` who's on the roster? ``` - Shows all team members including both AI agents and human members. - ``` remove Sarah from the team ``` - Moves a human team member to the alumni list. diff --git a/docs/src/content/docs/features/issue-templates.md b/docs/src/content/docs/features/issue-templates.md index adf5a04ad..3b3a03deb 100644 --- a/docs/src/content/docs/features/issue-templates.md +++ b/docs/src/content/docs/features/issue-templates.md @@ -1,348 +1,276 @@ -# Issue Templates for Squad - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -**Try this after setting up templates:** -``` -Ralph, show me untriaged issues -``` - -**Then watch Ralph auto-triage based on labels.** - -When GitHub Issues are your work queue, creating tasks should be frictionless. Issue templates pre-fill labels, structure task descriptions, and work beautifully on mobile — making it possible to add tasks in 10 seconds from anywhere. - ---- - -## Why Issue Templates Matter for Squad - -GitHub provides Issue Templates — a platform feature that pre-fills labels, fields, and structure when creating new issues. This guide shows how to configure templates that work smoothly with Squad's label-based routing. - -Squad operates best when work is captured as GitHub Issues. But creating an issue from scratch takes time: you need to remember the right labels, format the description consistently, and ensure the structure matches what agents expect. - -Issue templates solve this: - -- **Pre-filled labels** — `squad` label applied automatically -- **Structured format** — Task description, acceptance criteria, priority fields -- **Mobile-friendly** — Works in the GitHub mobile app -- **Fast task creation** — Add work while walking the dog, waiting for coffee, or during a meeting - -With templates, creating a Squad task takes 10 seconds instead of 2 minutes. - ---- - -## Basic Squad Task Template - -Create `.github/ISSUE_TEMPLATE/squad-task.yml` in your repository: - -```yaml -name: Squad Task -description: Create a task for the Squad team -title: "[Task]: " -labels: ["squad"] -body: - - type: markdown - attributes: - value: | - Thanks for creating a Squad task! Fill in the details below. - - - type: textarea - id: description - attributes: - label: Task Description - description: What needs to be done? - placeholder: | - Add dark mode support to the settings page. - - Current behavior: Settings page uses light theme only. - Expected behavior: Theme switcher in settings, respects system preference. - validations: - required: true - - - type: textarea - id: acceptance-criteria - attributes: - label: Acceptance Criteria - description: How will we know this is complete? - placeholder: | - - [ ] Theme switcher toggle added to settings - - [ ] Dark mode CSS applied when enabled - - [ ] Preference saved to localStorage - - [ ] System theme preference detected on first load - validations: - required: false - - - type: dropdown - id: priority - attributes: - label: Priority - description: How urgent is this task? - options: - - Low - - Medium - - High - - Critical - validations: - required: false -``` - -### What This Template Does - -- **Applies `squad` label** — Ralph sees it in the untriaged queue -- **Structured sections** — Description, acceptance criteria, priority -- **Markdown support** — Use checklists, code blocks, links -- **Works on mobile** — GitHub app renders forms beautifully - ---- - -## Custom Labels for Routing - -Ralph uses `.squad/routing.md` to route work to agents. Add `squad:{member}` labels to your template for pre-triaging: - -```yaml -name: Documentation Task -description: Create a docs task (auto-routed to PAO) -title: "[Docs]: " -labels: ["squad", "squad:pao"] -body: - - type: textarea - id: description - attributes: - label: What needs documenting? - placeholder: | - Add a guide for setting up Ralph in production. -``` - -When Ralph scans the board, this issue is already labeled `squad:pao` — no triage needed, work goes straight to PAO. - -### Setting up labels for Squad routing - -Create labels in your repository for each squad member: - -```bash -# Using gh CLI -gh label create "squad:pao" --description "DevRel tasks" --color "1d76db" -gh label create "squad:flight" --description "Architecture and planning" --color "d73a4a" -gh label create "squad:fido" --description "Testing and quality" --color "0e8a16" -``` - -Or use the [label sync workflow](../features/labels.md) to automate label management across repositories. - ---- - -## Template Variants - -Different work types need different structures: - -### Bug Report Template - -`.github/ISSUE_TEMPLATE/bug-report.yml`: - -```yaml -name: Bug Report -description: Report a bug for Squad to fix -title: "[Bug]: " -labels: ["squad", "bug"] -body: - - type: textarea - id: description - attributes: - label: Bug Description - description: What went wrong? - validations: - required: true - - - type: textarea - id: repro-steps - attributes: - label: Steps to Reproduce - placeholder: | - 1. Run `squad init` - 2. Create a team with 3 agents - 3. Try to export the configuration - 4. See error: "Cannot read property 'name' of undefined" - validations: - required: true - - - type: textarea - id: expected - attributes: - label: Expected Behavior - description: What should have happened? - validations: - required: false - - - type: input - id: version - attributes: - label: Squad Version - placeholder: "0.8.24" - validations: - required: false -``` - -### Feature Request Template - -`.github/ISSUE_TEMPLATE/feature-request.yml`: - -```yaml -name: Feature Request -description: Suggest a new feature for Squad -title: "[Feature]: " -labels: ["squad", "enhancement"] -body: - - type: textarea - id: problem - attributes: - label: Problem Statement - description: What problem does this feature solve? - placeholder: "As a solo developer, I want to track time spent on tasks so I can invoice clients accurately." - validations: - required: true - - - type: textarea - id: solution - attributes: - label: Proposed Solution - description: How should this feature work? - validations: - required: false - - - type: textarea - id: alternatives - attributes: - label: Alternatives Considered - description: What other approaches did you think about? - validations: - required: false -``` - -### Doc Update Template - -`.github/ISSUE_TEMPLATE/doc-update.yml`: - -```yaml -name: Documentation Update -description: Suggest a docs improvement -title: "[Docs]: " -labels: ["squad", "squad:pao", "documentation"] -body: - - type: textarea - id: what - attributes: - label: What needs updating? - placeholder: "The Ralph deployment guide doesn't mention log rotation." - validations: - required: true - - - type: input - id: page - attributes: - label: Page URL or Path - placeholder: "docs/scenarios/ralph-operations.md" - validations: - required: false -``` - ---- - -## Mobile Workflow - -GitHub Issues + templates work from anywhere: - -**On your phone:** -1. Open GitHub app -2. Navigate to repository -3. Tap **Issues** → **New Issue** -4. Select template -5. Fill form (voice-to-text works!) -6. Tap **Submit new issue** - -**10 seconds later:** -- Issue created with `squad` label -- Ralph sees it in the next scan -- Agent picks it up autonomously - -This workflow enables "capture anywhere, process later" — add tasks while commuting, exercising, or in meetings without context-switching to a laptop. - ---- - -## Template Configuration - -GitHub supports multiple templates. Create a config file to customize the issue creation experience: - -`.github/ISSUE_TEMPLATE/config.yml`: - -```yaml -blank_issues_enabled: false -contact_links: - - name: Squad Community Discussions - url: https://github.com/bradygaster/squad/discussions - about: Ask questions or share ideas in Discussions - - name: Squad Documentation - url: https://squad.dev - about: Read the full Squad documentation -``` - -This disables blank issues (forcing template use) and provides helpful links when users click "New Issue." - ---- - -## Template Best Practices - -- **Keep templates short** — Long forms reduce completion rates -- **Make most fields optional** — Only require what's absolutely necessary -- **Use placeholders** — Show examples of good descriptions -- **Pre-fill smart defaults** — Priority: Medium, Type: Task -- **Test on mobile** — Ensure forms render well in the GitHub app -- **Use dropdown for enums** — Priority, Type, Severity (reduces typos) -- **Add markdown help** — Link to GitHub markdown guide in template - ---- - -## Integration with Ralph - -Ralph's heartbeat workflow (`.github/workflows/squad-heartbeat.yml`) scans for untriaged issues: - -1. Issue created with `squad` label (from template) -2. Heartbeat workflow runs (every 30 min or on issue create) -3. Ralph reads `.squad/routing.md` to determine agent -4. Ralph adds `squad:{member}` label -5. Next heartbeat run (or in-session Ralph) assigns agent - -If your template pre-fills `squad:{member}`, Ralph skips triage and goes straight to assignment. - ---- - -## Sample Prompts - -``` -Show me untriaged squad issues -``` - -Lists all issues with `squad` label but no `squad:{member}` assignment. - -``` -Ralph, triage and assign the backlog -``` - -Ralph reads routing rules, applies member labels, and prepares work for agents. - ---- - -## Notes - -- Templates don't prevent manual issue creation — users can still click "Open a blank issue" -- Templates are stored in `.github/ISSUE_TEMPLATE/` (note the underscore, not dash) -- Use `.yml` or `.yaml` extension (both work) -- Test templates by creating issues yourself before announcing to the team -- Mobile workflow requires GitHub app (iOS or Android) — works on tablets too - ---- - -## See Also - -- [GitHub Issues Mode](./github-issues.md) — Issue-driven development workflow -- [Ralph — Work Monitor](./ralph.md) — Ralph's work monitoring behavior -- [Labels](./labels.md) — Label management and sync workflow -- [Routing](./routing.md) — How Ralph triages work to agents +# Issue Templates for Squad +**Try this after setting up templates:** +``` +Ralph, show me untriaged issues +``` +**Then watch Ralph auto-triage based on labels.** +When GitHub Issues are your work queue, creating tasks should be frictionless. Issue templates pre-fill labels, structure task descriptions, and work beautifully on mobile — making it possible to add tasks in 10 seconds from anywhere. +--- +## Why Issue Templates Matter for Squad +GitHub provides Issue Templates — a platform feature that pre-fills labels, fields, and structure when creating new issues. This guide shows how to configure templates that work smoothly with Squad's label-based routing. +Squad operates best when work is captured as GitHub Issues. But creating an issue from scratch takes time: you need to remember the right labels, format the description consistently, and ensure the structure matches what agents expect. +Issue templates solve this: +- **Pre-filled labels** — `squad` label applied automatically +- **Structured format** — Task description, acceptance criteria, priority fields +- **Mobile-friendly** — Works in the GitHub mobile app +- **Fast task creation** — Add work while walking the dog, waiting for coffee, or during a meeting +With templates, creating a Squad task takes 10 seconds instead of 2 minutes. +--- +## Basic Squad Task Template +Create `.github/ISSUE_TEMPLATE/squad-task.yml` in your repository: +```yaml +name: Squad Task +description: Create a task for the Squad team +title: "[Task]: " +labels: ["squad"] +body: + - type: markdown + attributes: + value: | + Thanks for creating a Squad task! Fill in the details below. + + - type: textarea + id: description + attributes: + label: Task Description + description: What needs to be done? + placeholder: | + Add dark mode support to the settings page. + + Current behavior: Settings page uses light theme only. + Expected behavior: Theme switcher in settings, respects system preference. + validations: + required: true + + - type: textarea + id: acceptance-criteria + attributes: + label: Acceptance Criteria + description: How will we know this is complete? + placeholder: | + - [ ] Theme switcher toggle added to settings + - [ ] Dark mode CSS applied when enabled + - [ ] Preference saved to localStorage + - [ ] System theme preference detected on first load + validations: + required: false + + - type: dropdown + id: priority + attributes: + label: Priority + description: How urgent is this task? + options: + - Low + - Medium + - High + - Critical + validations: + required: false +``` +### What This Template Does +- **Applies `squad` label** — Ralph sees it in the untriaged queue +- **Structured sections** — Description, acceptance criteria, priority +- **Markdown support** — Use checklists, code blocks, links +- **Works on mobile** — GitHub app renders forms beautifully +--- +## Custom Labels for Routing +Ralph uses `.squad/routing.md` to route work to agents. Add `squad:{member}` labels to your template for pre-triaging: +```yaml +name: Documentation Task +description: Create a docs task (auto-routed to PAO) +title: "[Docs]: " +labels: ["squad", "squad:pao"] +body: + - type: textarea + id: description + attributes: + label: What needs documenting? + placeholder: | + Add a guide for setting up Ralph in production. +``` +When Ralph scans the board, this issue is already labeled `squad:pao` — no triage needed, work goes straight to PAO. +### Setting up labels for Squad routing +Create labels in your repository for each squad member: +```bash +# Using gh CLI +gh label create "squad:pao" --description "DevRel tasks" --color "1d76db" +gh label create "squad:flight" --description "Architecture and planning" --color "d73a4a" +gh label create "squad:fido" --description "Testing and quality" --color "0e8a16" +``` +Or use the [label sync workflow](../features/labels.md) to automate label management across repositories. +--- +## Template Variants +Different work types need different structures: +### Bug Report Template +`.github/ISSUE_TEMPLATE/bug-report.yml`: +```yaml +name: Bug Report +description: Report a bug for Squad to fix +title: "[Bug]: " +labels: ["squad", "bug"] +body: + - type: textarea + id: description + attributes: + label: Bug Description + description: What went wrong? + validations: + required: true + + - type: textarea + id: repro-steps + attributes: + label: Steps to Reproduce + placeholder: | + 1. Run `squad init` + 2. Create a team with 3 agents + 3. Try to export the configuration + 4. See error: "Cannot read property 'name' of undefined" + validations: + required: true + + - type: textarea + id: expected + attributes: + label: Expected Behavior + description: What should have happened? + validations: + required: false + + - type: input + id: version + attributes: + label: Squad Version + placeholder: "0.8.24" + validations: + required: false +``` +### Feature Request Template +`.github/ISSUE_TEMPLATE/feature-request.yml`: +```yaml +name: Feature Request +description: Suggest a new feature for Squad +title: "[Feature]: " +labels: ["squad", "enhancement"] +body: + - type: textarea + id: problem + attributes: + label: Problem Statement + description: What problem does this feature solve? + placeholder: "As a solo developer, I want to track time spent on tasks so I can invoice clients accurately." + validations: + required: true + + - type: textarea + id: solution + attributes: + label: Proposed Solution + description: How should this feature work? + validations: + required: false + + - type: textarea + id: alternatives + attributes: + label: Alternatives Considered + description: What other approaches did you think about? + validations: + required: false +``` +### Doc Update Template +`.github/ISSUE_TEMPLATE/doc-update.yml`: +```yaml +name: Documentation Update +description: Suggest a docs improvement +title: "[Docs]: " +labels: ["squad", "squad:pao", "documentation"] +body: + - type: textarea + id: what + attributes: + label: What needs updating? + placeholder: "The Ralph deployment guide doesn't mention log rotation." + validations: + required: true + + - type: input + id: page + attributes: + label: Page URL or Path + placeholder: "docs/scenarios/ralph-operations.md" + validations: + required: false +``` +--- +## Mobile Workflow +GitHub Issues + templates work from anywhere: +**On your phone:** +1. Open GitHub app +2. Navigate to repository +3. Tap **Issues** → **New Issue** +4. Select template +5. Fill form (voice-to-text works!) +6. Tap **Submit new issue** +**10 seconds later:** +- Issue created with `squad` label +- Ralph sees it in the next scan +- Agent picks it up in the background +This workflow enables "capture anywhere, process later" — add tasks while commuting, exercising, or in meetings without context-switching to a laptop. +--- +## Template Configuration +GitHub supports multiple templates. Create a config file to customize the issue creation experience: +`.github/ISSUE_TEMPLATE/config.yml`: +```yaml +blank_issues_enabled: false +contact_links: + - name: Squad Community Discussions + url: https://github.com/bradygaster/squad/discussions + about: Ask questions or share ideas in Discussions + - name: Squad Documentation + url: https://squad.dev + about: Read the full Squad documentation +``` +This disables blank issues (forcing template use) and provides helpful links when users click "New Issue." +--- +## Template Best Practices +- **Keep templates short** — Long forms reduce completion rates +- **Make most fields optional** — Only require what's absolutely necessary +- **Use placeholders** — Show examples of good descriptions +- **Pre-fill smart defaults** — Priority: Medium, Type: Task +- **Test on mobile** — Ensure forms render well in the GitHub app +- **Use dropdown for enums** — Priority, Type, Severity (reduces typos) +- **Add markdown help** — Link to GitHub markdown guide in template +--- +## Integration with Ralph +Ralph's heartbeat workflow (`.github/workflows/squad-heartbeat.yml`) scans for untriaged issues: +1. Issue created with `squad` label (from template) +2. Heartbeat workflow runs (every 30 min or on issue create) +3. Ralph reads `.squad/routing.md` to determine agent +4. Ralph adds `squad:{member}` label +5. Next heartbeat run (or in-session Ralph) assigns agent +If your template pre-fills `squad:{member}`, Ralph skips triage and goes straight to assignment. +--- +## Sample Prompts +``` +Show me untriaged squad issues +``` +Lists all issues with `squad` label but no `squad:{member}` assignment. +``` +Ralph, triage and assign the backlog +``` +Ralph reads routing rules, applies member labels, and prepares work for agents. +--- +## Notes +- Templates don't prevent manual issue creation — users can still click "Open a blank issue" +- Templates are stored in `.github/ISSUE_TEMPLATE/` (note the underscore, not dash) +- Use `.yml` or `.yaml` extension (both work) +- Test templates by creating issues yourself before announcing to the team +- Mobile workflow requires GitHub app (iOS or Android) — works on tablets too +--- +## See Also +- [GitHub Issues Mode](./github-issues.md) — Issue-driven development workflow +- [Ralph — Work Monitor](./ralph.md) — Ralph's work monitoring behavior +- [Labels](./labels.md) — Label management and sync workflow +- [Routing](./routing.md) — How Ralph triages work to agents diff --git a/docs/src/content/docs/features/keda-scaling.md b/docs/src/content/docs/features/keda-scaling.md index 06fb5b1be..b59cd5800 100644 --- a/docs/src/content/docs/features/keda-scaling.md +++ b/docs/src/content/docs/features/keda-scaling.md @@ -1,83 +1,64 @@ ---- -title: KEDA Autoscaling -description: Autoscale Squad agents based on GitHub issue queue depth using the KEDA external scaler template. -order: 38 ---- - -# KEDA Autoscaling - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -**Try this to understand your scaling needs:** -``` -How many issues are currently queued for Squad agents? -``` - -KEDA (Kubernetes Event-Driven Autoscaling) is an open-source component that scales Kubernetes workloads based on external event sources. Squad ships an external scaler template that scales agent pods up and down based on the depth of your GitHub issue queue. - ---- - -## When to Use This - -Use KEDA autoscaling when: - -- Squad agents run as Kubernetes pods (not local machines) -- Issue volume is unpredictable — bursts of work should spawn more agents automatically -- You want zero-agent idle cost when there is no work - -## Prerequisites - -- A Kubernetes cluster with KEDA installed ([keda.sh](https://keda.sh)) -- Squad agents packaged as container images and deployed as a `Deployment` -- A GitHub token with `repo` scope for issue queue polling - -## Setup - -1. Install KEDA on your cluster: - ```bash - helm repo add kedacore https://kedacore.github.io/charts - helm install keda kedacore/keda --namespace keda --create-namespace - ``` - -2. Apply the Squad KEDA `ScaledObject` template from `templates/keda/scaled-object.yaml`: - ```yaml - apiVersion: keda.sh/v1alpha1 - kind: ScaledObject - metadata: - name: squad-agents - spec: - scaleTargetRef: - name: squad-agent-deployment - minReplicaCount: 0 - maxReplicaCount: 10 - triggers: - - type: external - metadata: - scalerAddress: squad-external-scaler:8080 - owner: your-org - repo: your-repo - labels: "squad:ready" - targetQueueLength: "5" - authenticationRef: - name: github-token-secret - ``` - -3. Create the GitHub token secret: - ```bash - kubectl create secret generic github-token-secret \ - --from-literal=personalAccessToken= - ``` - -## Configuration Reference - -| Field | Description | -|-------|-------------| -| `minReplicaCount` | Agents to keep running when idle (use `0` for zero-cost idle) | -| `maxReplicaCount` | Hard ceiling on agent pods | -| `targetQueueLength` | Issues per agent pod (tune for task duration) | -| `labels` | Issue labels to count as "queued work" | - -## See Also - -- [Capability Routing](capability-routing.md) — route specific issues to specific agent types -- [Ralph — Work Monitor](ralph.md) — how Ralph picks up queued issues +--- +title: KEDA Autoscaling +description: Autoscale Squad agents based on GitHub issue queue depth using the KEDA external scaler template. +order: 38 +--- +# KEDA Autoscaling +**Try this to understand your scaling needs:** +``` +How many issues are currently queued for Squad agents? +``` +KEDA (Kubernetes Event-Driven Autoscaling) is an open-source component that scales Kubernetes workloads based on external event sources. Squad ships an external scaler template that scales agent pods up and down based on the depth of your GitHub issue queue. +--- +## When to Use This +Use KEDA autoscaling when: +- Squad agents run as Kubernetes pods (not local machines) +- Issue volume is unpredictable — bursts of work should spawn more agents automatically +- You want zero-agent idle cost when there is no work +## Prerequisites +- A Kubernetes cluster with KEDA installed ([keda.sh](https://keda.sh)) +- Squad agents packaged as container images and deployed as a `Deployment` +- A GitHub token with `repo` scope for issue queue polling +## Setup +1. Install KEDA on your cluster: + ```bash + helm repo add kedacore https://kedacore.github.io/charts + helm install keda kedacore/keda --namespace keda --create-namespace + ``` +2. Apply the Squad KEDA `ScaledObject` template from `templates/keda/scaled-object.yaml`: + ```yaml + apiVersion: keda.sh/v1alpha1 + kind: ScaledObject + metadata: + name: squad-agents + spec: + scaleTargetRef: + name: squad-agent-deployment + minReplicaCount: 0 + maxReplicaCount: 10 + triggers: + - type: external + metadata: + scalerAddress: squad-external-scaler:8080 + owner: your-org + repo: your-repo + labels: "squad:ready" + targetQueueLength: "5" + authenticationRef: + name: github-token-secret + ``` +3. Create the GitHub token secret: + ```bash + kubectl create secret generic github-token-secret \ + --from-literal=personalAccessToken= + ``` +## Configuration Reference +| Field | Description | +|-------|-------------| +| `minReplicaCount` | Agents to keep running when idle (use `0` for zero-cost idle) | +| `maxReplicaCount` | Hard ceiling on agent pods | +| `targetQueueLength` | Issues per agent pod (tune for task duration) | +| `labels` | Issue labels to count as "queued work" | +## See Also +- [Capability Routing](capability-routing.md) — route specific issues to specific agent types +- [Ralph — Work Monitor](ralph.md) — how Ralph picks up queued issues diff --git a/docs/src/content/docs/features/labels.md b/docs/src/content/docs/features/labels.md index 0f9691675..8fc55e39c 100644 --- a/docs/src/content/docs/features/labels.md +++ b/docs/src/content/docs/features/labels.md @@ -1,29 +1,19 @@ # Label Taxonomy - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to apply workflow labels:** ``` Apply the go:yes label to issue #42 and target it for v0.5.0 ``` - **Try this to filter by priority:** ``` Show me all issues with priority:p0 ``` - **Try this to route work to a specific agent:** ``` Add squad:fenster to issue #23 ``` - Squad uses structured, namespaced labels as the state machine. Labels drive workflow automation — not just tags. Five namespaces control lifecycle, priority, ownership, and release targeting. - --- - ## The Five Namespaces - | Namespace | Purpose | Values | Mutual Exclusivity | |-----------|---------|--------|-------------------| | `go:` | Verdict — yes/no/needs-research | `go:yes`, `go:no`, `go:needs-research` | ✅ One per issue | @@ -31,49 +21,33 @@ Squad uses structured, namespaced labels as the state machine. Labels drive work | `type:` | Issue category | `type:feature`, `type:bug`, `type:spike`, `type:docs`, `type:chore`, `type:epic` | ✅ One per issue | | `priority:` | Urgency level | `priority:p0`, `priority:p1`, `priority:p2` | ✅ One per issue | | `squad:{member}` | Agent assignment | `squad:fenster`, `squad:mcmanus`, `squad:hockney` | ❌ Can have multiple (pair work) | - ## Mutual Exclusivity Rules - Within `go:`, `release:`, `type:`, and `priority:` namespaces, **only one label is allowed**. Applying a second label in the same namespace auto-removes the first. - Example: - Issue has `go:needs-research` - You apply `go:yes` - Result: `go:needs-research` removed, `go:yes` applied - The `squad:{member}` namespace allows **multiple labels** for collaborative work: - `squad:fenster` + `squad:hockney` = pair programming or handoff - ## Workflow Automation - Labels drive four automation layers: - ### 1. Enforcement (Mutual Exclusivity) - GitHub Actions workflow `label-enforcement.yml` watches for label changes. If multiple labels from the same namespace are applied, it removes all but the most recent. - ### 2. Sync (Cross-Namespace Consistency) - Some label changes trigger cascading updates: - `go:no` applied → auto-adds `release:backlog`, removes other release targets - `priority:p0` applied → ensures `go:yes` is set (p0 implies approved) - ### 3. Triage (Auto-Assignment) - Ralph (work monitor) uses labels to route work: - `squad:fenster` → Fenster picks it up - No `squad:*` + `type:bug` → Routes to Tester or Lead based on routing.md - `go:needs-research` → Routes to Lead for investigation - ### 4. Heartbeat (Periodic Check) - The `squad-heartbeat.yml` workflow runs every 30 minutes and: - Finds issues with `squad` label but no `squad:{member}` → auto-triages - Finds `go:yes` + `squad:{member}` but no assignee → spawns agent - Finds stale `go:needs-research` (>7 days) → escalates to Lead - ## State Machine Flow - ``` New issue → squad label → Triage ↓ @@ -87,21 +61,14 @@ New issue → squad label → Triage ↓ Merge → Issue closed ``` - ## Adding Labels - Labels are created automatically during `init` or `upgrade`. To add custom labels: - ```bash gh label create "squad:designer" --color "0366d6" --description "Work assigned to Designer" ``` - Or via the GitHub UI: Issues → Labels → New label - ## Label Colors - Squad uses a consistent color scheme: - | Namespace | Color | Hex | |-----------|-------|-----| | `go:` | Green (yes), Red (no), Yellow (research) | `#0e8a16`, `#d73a4a`, `#fbca04` | @@ -109,42 +76,32 @@ Squad uses a consistent color scheme: | `type:` | Purple | `#6f42c1` | | `priority:` | Orange (p0), Yellow (p1), Gray (p2) | `#d93f0b`, `#fbca04`, `#d4c5f9` | | `squad:{member}` | Teal | `#008672` | - ## Querying by Label - ```bash # All approved features for v0.4.0 gh issue list --label "go:yes,release:v0.4.0,type:feature" - # All p0 bugs assigned to Fenster gh issue list --label "priority:p0,type:bug,squad:fenster" - # All issues needing research gh issue list --label "go:needs-research" ``` - ## Sample Prompts - ``` Mark issue #42 as approved for v0.4.0 ``` Applies `go:yes` and `release:v0.4.0` labels. Removes any conflicting labels. - ``` Change issue #15 from needs-research to no ``` Updates verdict: removes `go:needs-research`, applies `go:no`, adds `release:backlog`. - ``` Assign issue #28 to Fenster and Hockney for pair work ``` Applies `squad:fenster` and `squad:hockney` labels. Both agents can pick it up. - ``` List all p0 features approved for the next release ``` Queries: `priority:p0 + type:feature + go:yes + release:{current milestone}`. - ``` Show me all issues in the backlog ``` diff --git a/docs/src/content/docs/features/loop.md b/docs/src/content/docs/features/loop.md index de2829e25..f14503184 100644 --- a/docs/src/content/docs/features/loop.md +++ b/docs/src/content/docs/features/loop.md @@ -1,73 +1,48 @@ # Loop — Prompt-driven work loop - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - **Try this to initialize a loop:** ``` squad loop --init ``` - **Try this to run your loop:** ``` squad loop ``` - **Try this with monitoring:** ``` squad loop --monitor-email --monitor-teams ``` - Loop reads a `loop.md` prompt file and runs it as a continuous work loop. No GitHub issues needed — the prompt is the work driver. Each cycle, Loop sends the prompt to Copilot, collects the work, and loops again at your chosen interval. - --- - ## What loop does - Loop is a prompt-driven work engine. Unlike Ralph (which routes GitHub issues to team members), Loop takes a **single `.md` file** with a prompt and your work directives, then runs that prompt continuously. - The `loop.md` file contains: - - **Frontmatter** — configuration (how often to loop, timeout, whether you've set up the loop) - **Prompt** — the actual work driver (what Copilot should do each cycle) - When you run `squad loop`, it: - 1. Reads `loop.md` 2. Checks that frontmatter is marked `configured: true` 3. Sends the prompt to Copilot 4. Collects the output (work done, decisions made, artifacts created) 5. Waits for the interval 6. Repeats - **Why use Loop instead of Ralph?** - - **No issue queue** — You drive the work with a prompt, not GitHub labels - **Continuous** — One cycle after another, forever (or until you stop it) - **Lightweight** — One file to maintain, no routing rules or complex setup - **Flexible** — Mixed modes (team work queue + monitoring), repeated tasks (watch a folder, check metrics, cleanup jobs) - ## Prerequisites - By default, Loop requires: - - **GitHub CLI (`gh`)** — Loop uses `gh` for its default agent integration - **GitHub Copilot CLI extension (`gh copilot`)** — Loop uses this by default to send prompts to Copilot - **A `loop.md` file** — the prompt file that drives your work - If you don't want to use `gh copilot`, pass `--agent-cmd` to provide an alternative agent command. In that case, `gh` and the Copilot extension are not required for the agent step. - > **MCP auto-injection:** When using the default Copilot agent, `squad loop` automatically injects `--yolo --additional-mcp-config @.mcp.json` into every Copilot invocation. This ensures MCP tools are available in non-interactive (`-p`) mode. See [Copilot CLI MCP Trust Gate](./copilot-mcp-trust.md). - ## Getting started - ### Step 1: Initialize your loop - ```bash squad loop --init ``` - This creates a starter `loop.md` file in your project root: - ```markdown --- configured: false @@ -75,22 +50,15 @@ interval: 10 timeout: 30 description: "My work loop" --- - # Work Loop Prompt - You are a team member on this squad. Each cycle, you will: - 1. Check for pending work 2. Complete what you can within the timeout 3. Document your results - Start with small, focused tasks. Expand the scope once you're confident the loop is working. ``` - ### Step 2: Edit `loop.md` - Update the prompt to describe the work you want done each cycle: - ```markdown --- configured: false @@ -98,24 +66,17 @@ interval: 10 timeout: 20 description: "Monitor and fix failing CI" --- - # CI Monitoring Loop - Each cycle, you will: - 1. Check GitHub Actions workflows for failures in the main branch 2. If any workflow failed in the last 10 minutes, investigate the failure 3. If it's a flaky test, flag it 4. If it's a real issue, create a PR with a fix 5. Report findings (failures found, fixes created, flaky tests) - Keep runs focused — 20 minutes max per cycle. ``` - ### Step 3: Enable the loop - Set `configured: true` in the frontmatter to unlock the loop: - ```markdown --- configured: true @@ -124,28 +85,20 @@ timeout: 20 description: "Monitor and fix failing CI" --- ``` - ### Step 4: Run the loop - ```bash squad loop ``` - Loop will run your prompt every 10 minutes until you press Ctrl+C. - ## Frontmatter reference - The YAML frontmatter at the top of `loop.md` controls Loop's behavior: - | Field | Type | Required | Default | Description | |-------|------|:--------:|:-------:|-------------| | `configured` | boolean | Yes | `false` | Safety check — must be `true` to run. Prevents accidental execution of incomplete loops. | | `interval` | number | No | `10` | Minutes between cycles. Loop will wait this long after each cycle completes before running again. | | `timeout` | number | No | `30` | Max runtime in minutes for each cycle. If Copilot doesn't finish within this time, the cycle is marked incomplete and the next cycle starts. | | `description` | string | No | `"Squad Loop"` | Human-readable description of what this loop does. Shown in logs and status when `description` is omitted. | - Example: - ```markdown --- configured: true @@ -154,18 +107,13 @@ timeout: 45 description: "Process inbox and clean up stale branches" --- ``` - ## Writing a good loop prompt - A good loop prompt is: - - **Specific** — Clear about what work to do each cycle - **Bounded** — Doesn't try to do everything at once; respects the timeout - **Observable** — Reports what it did so you can track progress - **Idempotent** — Safe to run repeatedly; doesn't duplicate work or corrupt state - ### Example: Team work queue - ```markdown --- configured: true @@ -173,21 +121,15 @@ interval: 5 timeout: 15 description: "Process team work queue from Teams" --- - # Team Work Queue - Each cycle: - 1. Query our Teams channel for pending action items (messages with 🎯 emoji) 2. For each item, create a GitHub issue with label `teams:pending` 3. Triage to the right team member based on topic 4. Report how many items were added - Keep this quick — 15 minutes per cycle, process at most 3 items. ``` - ### Example: Monitoring and alerting - ```markdown --- configured: true @@ -195,21 +137,15 @@ interval: 30 timeout: 20 description: "Monitor uptime and alert team" --- - # Uptime Monitor - Each cycle: - 1. Check status.io for incidents on our services 2. Query monitoring dashboard for error rates 3. If any error rate > 5%, post alert to Teams #alerts channel 4. Report findings (status, error rates, alerts sent) - This is passive — no fixing, just reporting. ``` - ### Example: Mixed mode (queue + monitoring) - ```markdown --- configured: true @@ -217,47 +153,32 @@ interval: 10 timeout: 30 description: "Work queue + monitoring + cleanup" --- - # Daily Squad Loop - Each cycle, in order: - 1. **Monitor** — Check for CI failures, Dependabot alerts 2. **Triage** — Create issues for alerts 3. **Work** — Claim the next issue from the backlog 4. **Cleanup** — Delete stale feature branches older than 30 days 5. **Report** — Summary of work, alerts, deletions - Budget: 30 minutes per cycle. Start with most urgent work, drop to less urgent if running tight on time. ``` - ## Composing with capabilities - Loop works with Squad's monitoring and bridge capabilities. Add flags to extend what Loop can see and do: - ```bash # Monitor email for actionable items each cycle squad loop --monitor-email - # Monitor Teams for action items each cycle squad loop --monitor-teams - # Both email and Teams squad loop --monitor-email --monitor-teams - # Enable self-pull (fetch latest code before each cycle) squad loop --self-pull - # Combine multiple capabilities squad loop --monitor-email --monitor-teams --self-pull ``` - When enabled, these capabilities are available inside your loop prompt as context. For example, with `--monitor-email`, your prompt can reference email alerts and action items. - ## CLI reference - All `squad loop` flags: - | Flag | Type | Description | Example | |------|------|-------------|---------| | `--init` | boolean | Create a starter `loop.md` file | `squad loop --init` | @@ -269,41 +190,31 @@ All `squad loop` flags: | `--monitor-email` | boolean | Scan email for alerts each cycle | `squad loop --monitor-email` | | `--monitor-teams` | boolean | Scan Teams for action items each cycle | `squad loop --monitor-teams` | | `--self-pull` | boolean | Run `git fetch && git pull` before each cycle | `squad loop --self-pull` | - ### Examples - **Basic loop:** ```bash squad loop ``` - **Custom loop file:** ```bash squad loop --file scripts/cleanup.md ``` - **Faster interval:** ```bash squad loop --interval 3 --timeout 15 ``` - **With monitoring:** ```bash squad loop --monitor-email --monitor-teams --self-pull ``` - **Override frontmatter with CLI:** ```bash squad loop --interval 2 --timeout 45 ``` - CLI flags override frontmatter values. If your `loop.md` says `interval: 10` but you run `squad loop --interval 3`, Loop uses 3 minutes. - > **Note:** Loop configuration is currently set via frontmatter in `loop.md` and CLI flags. `.squad/config.json` support is planned for a future release. - ## Notes - - Loop is session-scoped — it runs in your terminal and stops when you press Ctrl+C - Each cycle gets its own Copilot session; state is not preserved between cycles unless your prompt explicitly handles it - Loop respects `.squad/` team context: charters, routing, decisions, and directives are all available to the prompt -- For fully unattended monitoring, use `squad watch` instead — it's designed for running in a separate terminal 24/7 +- For event-driven monitoring, use `squad watch` instead — it's designed for running in a separate terminal 24/7 diff --git a/docs/src/content/docs/features/marketplace.md b/docs/src/content/docs/features/marketplace.md index 25355c58c..42d19e71d 100644 --- a/docs/src/content/docs/features/marketplace.md +++ b/docs/src/content/docs/features/marketplace.md @@ -1,30 +1,18 @@ # Marketplace Guide - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Issue:** #39 (M5-16) - --- - ## Overview - The Squad marketplace lets teams export, import, browse, and install agent configurations. This guide covers the full lifecycle: packaging, publishing, discovery, installation, versioning, caching, and security. - ## Export / Import - Export your Squad configuration as a portable bundle: - ```typescript import { exportSquadConfig, importSquadConfig } from '@squad/sdk'; - // Export const bundle = await exportSquadConfig(config, { includeHistory: false, anonymize: true, format: 'json', }); - // Import into another project const result = await importSquadConfig(bundle, targetDir, { merge: true, @@ -32,69 +20,46 @@ const result = await importSquadConfig(bundle, targetDir, { }); console.log(`Applied ${result.changes.length} changes`); ``` - `ExportBundle` contains config, agents, skills, routing rules, and metadata. `splitHistory()` separates shareable history from private data. `detectConflicts()` identifies merge conflicts; `resolveConflicts()` applies resolution strategies (`keep-existing`, `use-incoming`, `merge`, `manual`). - ## Agent Repositories - Pin agents to specific versions for reproducible teams: - ```typescript import { pinAgentVersion, getAgentVersion, configureAgentRepo } from '@squad/sdk'; - await pinAgentVersion({ agentId: 'backend', sha: 'abc123', source: 'github' }); const pin = await getAgentVersion('backend'); // { agentId: 'backend', sha: 'abc123', timestamp: ..., source: 'github' } ``` - `configureAgentRepo()` validates GitHub repository config. `AgentRepoOperations` provides push/pull for agent definitions. - ## Versioning & Caching - `AgentCache` provides TTL-based caching for remote agent definitions: - - Agent definitions: 1-hour TTL (`DEFAULT_AGENT_TTL`) - Skills: 5-minute TTL (`DEFAULT_SKILL_TTL`) - `CacheStats` tracks hits, misses, evictions, and size - `parseSemVer()` and `compareSemVer()` handle version comparison. `bumpVersion()` supports major/minor/patch/prerelease increments. - ## Security - 7 security rules (`SECURITY_RULES`) validate remote agents before installation: - ```typescript import { validateRemoteAgent, generateSecurityReport } from '@squad/sdk'; - const report = await validateRemoteAgent(agentDefinition); if (report.blocked.length > 0) { console.error('Agent blocked:', report.blocked); const sanitized = quarantineAgent(agentDefinition); } ``` - `SecurityReport` includes pass/fail per rule, warnings, blocked items, and a `riskScore`. `quarantineAgent()` strips injection attempts and caps tool permissions. Rules check for: prompt injection, excessive permissions, suspicious tool patterns, and more. - ## Marketplace Browse & Install - `MarketplaceBrowser` provides CLI-based discovery: - ```typescript import { MarketplaceBrowser } from '@squad/sdk'; - const browser = new MarketplaceBrowser(fetcher); const results = await browser.search({ text: 'backend API', category: 'Development', sort: 'downloads', }); - // Install an entry const installResult = await browser.install(results.entries[0], targetDir); ``` - `formatEntryList()` and `formatEntryDetails()` render entries for terminal output. `MarketplaceBackend` provides the reference API. `packageForMarketplace()` bundles a project for publishing; `validatePackageContents()` checks the package before upload. - ## Extensions - `ExtensionAdapter` bridges Squad to the Copilot Extensions API. `toExtensionConfig()` converts Squad config to extension format. `registerExtension()` handles marketplace registration. diff --git a/docs/src/content/docs/features/mcp-frontmatter.md b/docs/src/content/docs/features/mcp-frontmatter.md index ae2c41122..f99b3c29b 100644 --- a/docs/src/content/docs/features/mcp-frontmatter.md +++ b/docs/src/content/docs/features/mcp-frontmatter.md @@ -2,41 +2,27 @@ title: MCP Frontmatter — squad init --mcp-frontmatter description: Write MCP server configuration directly into the Squad agent file's frontmatter instead of .copilot/mcp-config.json, for harnesses that read agent-level MCP config. --- - # MCP Frontmatter — `squad init --mcp-frontmatter` - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - **Try this when your agent harness reads frontmatter-level MCP config:** ```bash squad init --mcp-frontmatter ``` - By default, `squad init` writes MCP server configuration to two places: - `.copilot/mcp-config.json` (workspace-level for Copilot CLI) - `~/.copilot/mcp-config.json` (user-level, ensures `copilot -p` non-interactive mode also sees the MCP — see [#1247](https://github.com/bradygaster/squad/issues/1247)) - The `--mcp-frontmatter` flag changes this: instead of writing JSON config files, MCP server declarations go directly into the YAML frontmatter of `.github/agents/squad.agent.md` (or `.github/agents/squad.md` if you've exported with [Coordinator-as-Agent Export](/squad/docs/features/coordinator-as-agent-export/)). - --- - ## When to use it - | Your setup | Use `--mcp-frontmatter`? | |------------|-------------------------| | Standard Copilot CLI users | ❌ No — default config files work fine | | Custom agent harness that reads MCP from agent frontmatter | ✅ Yes | | Building or distributing a Squad agent as a self-contained file | ✅ Yes — keeps MCP config inline with the agent | | Some VS Code extensions / custom IDE plugins that prefer per-agent MCP declarations | ✅ Yes | - If you're not sure, you don't need this flag. It's specifically for environments where the agent file itself is the source of truth for MCP configuration. - --- - ## What the output looks like - Without `--mcp-frontmatter` (default), the agent file frontmatter is: - ```yaml --- name: squad @@ -45,9 +31,7 @@ model: claude-opus-4.5 tools: ["*"] --- ``` - And `.copilot/mcp-config.json` separately contains: - ```json { "mcpServers": { @@ -59,9 +43,7 @@ And `.copilot/mcp-config.json` separately contains: } } ``` - With `--mcp-frontmatter`, the MCP server moves into the frontmatter: - ```yaml --- name: squad @@ -75,28 +57,17 @@ mcpServers: tools: ["*"] --- ``` - And the standalone `.copilot/mcp-config.json` is not written (or contains only non-squad servers). - --- - ## Effect on `squad upgrade` - `squad upgrade` detects which mode the project is using (looks for the `mcpServers` key in agent frontmatter vs. presence of `.copilot/mcp-config.json`) and preserves the choice. You don't need to re-pass `--mcp-frontmatter` on every upgrade. - To switch modes after init, re-run `squad init --mcp-frontmatter` (or run `squad init` without the flag to switch back). The previous MCP config is migrated. - --- - ## Limitations - - **Less robust for `copilot -p` non-interactive mode.** Standard mode pins MCP at user level too, which solves the workspace-only loading gap (PR [#1251](https://github.com/bradygaster/squad/pull/1251)). Frontmatter mode skips that user-level write — so `copilot -p` may not see the squad MCP unless the harness reads frontmatter directly. - **No second-layer fallback.** If the harness that reads frontmatter MCP fails to load it correctly, there's no `.copilot/mcp-config.json` to fall back to. Test in your specific harness before adopting. - **Schema is harness-specific.** The frontmatter `mcpServers` key follows the Copilot CLI convention, but other harnesses may expect different key names (`mcp_servers`, `mcp.servers`, etc.). Check your harness's spec. - --- - ## See also - - [MCP Integration](/squad/docs/features/mcp/) — the broader MCP system - [Coordinator-as-Agent Export](/squad/docs/features/coordinator-as-agent-export/) — bundling MCP config into a self-contained agent file diff --git a/docs/src/content/docs/features/mcp.md b/docs/src/content/docs/features/mcp.md index 55a7c14f9..1f6058c38 100644 --- a/docs/src/content/docs/features/mcp.md +++ b/docs/src/content/docs/features/mcp.md @@ -1,66 +1,41 @@ # MCP Setup Guide for Squad - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to discover available integrations:** ``` Show me which MCP servers are available ``` - **Try this to enable a specific service:** ``` Configure the GitHub MCP server ``` - MCP (Model Context Protocol) servers extend Squad with external services — GitHub, notifications, deployments, Trello, and more. Agents discover and use MCP tools automatically. - --- - ## What MCP Means for Squad - MCP bridges Squad agents and external services. Agents use MCP tools to send notifications, query GitHub, monitor deployments, integrate with Trello, and more. You define which services are available; agents discover and use them automatically. - --- - ## MCP Configuration Files - There are two places to configure MCP, depending on your platform: - | Platform | Config File | How to Edit | Startup | |----------|------------|-----------|---------| | **Copilot CLI** | `.copilot/mcp-config.json` | Text editor | Add to shell initialization (`~/.bashrc`, `~/.zshrc`, etc.) | | **VS Code** | `.vscode/settings.json` | VS Code Settings GUI or JSON editor | Built-in; restarts Copilot extension | - This guide covers both. Pick the one that matches your workflow. - --- - ## Step-by-Step: CLI Setup - ### Step 1: Create the `.copilot` directory and config file - Open your terminal: - ```bash mkdir -p ~/.copilot touch ~/.copilot/mcp-config.json ``` - ### Step 2: Add your first MCP server - Open `~/.copilot/mcp-config.json` in your editor: - ```bash # macOS/Linux nano ~/.copilot/mcp-config.json - # Windows (PowerShell) notepad $PROFILE\..\mcp-config.json ``` - Paste this base structure: - ```json { "mcpServers": { @@ -74,67 +49,43 @@ Paste this base structure: } } ``` - Replace `/absolute/path/to/github-mcp.js` with the actual path to your MCP server script. The `env` object passes environment variables to the server. - ### Step 3: Add your GitHub token - If you already ran `gh auth login`, your token lives in `~/.config/gh/hosts.yml` (macOS/Linux) or `%APPDATA%\GitHub CLI\hosts.yml` (Windows). - Instead of pasting your token directly into the config file, **use an environment variable**: - ```bash # macOS/Linux: Add to ~/.bashrc or ~/.zshrc export GITHUB_TOKEN=$(gh auth token) - # Windows PowerShell: Add to your profile $env:GITHUB_TOKEN = $(gh auth token) ``` - Then reference it in your config: - ```json "env": { "GITHUB_TOKEN": "$GITHUB_TOKEN" } ``` - ### Step 4: Restart Copilot and verify - ```bash # Restart the CLI copilot ``` - In your Squad session, ask: - ``` > Show me available MCP tools ``` - If configured correctly, you'll see your GitHub server and its available tools (e.g., `github.list_issues`, `github.get_commit`). - --- - ## Step-by-Step: VS Code Setup - ### Step 1: Open VS Code Settings - - **macOS:** Code → Preferences → Settings - **Windows:** File → Preferences → Settings - Or use the keyboard shortcut: `Cmd+,` (macOS) or `Ctrl+,` (Windows). - ### Step 2: Search for "MCP" - In the settings search box, type `MCP` to find Copilot extension settings. - ### Step 3: Add an MCP server - Look for the **"Copilot MCP Servers"** section (or similar — naming varies by Copilot version). - Click **"Edit in settings.json"** to see the raw configuration: - ```json "copilot.mcp.servers": { "github": { @@ -146,31 +97,19 @@ Click **"Edit in settings.json"** to see the raw configuration: } } ``` - The `${env:GITHUB_TOKEN}` syntax reads from your shell environment. Replace `/absolute/path/to/github-mcp.js` with the actual path to your MCP server script. - ### Step 4: Add environment variables - Open your VS Code integrated terminal (`` Ctrl+` `` or `` Cmd+` ``): - ```bash export GITHUB_TOKEN=$(gh auth token) ``` - This sets the token for the current terminal session. To make it permanent, add it to your shell profile (see CLI Step 3 above). - ### Step 5: Reload VS Code - Press `Cmd+Shift+P` (macOS) or `Ctrl+Shift+P` (Windows) and select **"Copilot: Reload Copilot Servers"**. - --- - ## Example: GitHub MCP (Already Included) - Most Squad installs come with GitHub MCP pre-configured. Here's what it looks like: - ### CLI: `.copilot/mcp-config.json` - ```json { "mcpServers": { @@ -184,11 +123,8 @@ Most Squad installs come with GitHub MCP pre-configured. Here's what it looks li } } ``` - Replace `/absolute/path/to/github-mcp.js` with the actual path to your GitHub MCP server script. - ### VS Code: `.vscode/settings.json` - ```json { "copilot.mcp.servers": { @@ -202,38 +138,27 @@ Replace `/absolute/path/to/github-mcp.js` with the actual path to your GitHub MC } } ``` - Replace `/absolute/path/to/github-mcp.js` with the actual path to your GitHub MCP server script. - **What it does:** - List issues, PRs, and branches - Create, update, and search issues - Fetch commit history and diffs - Post and edit PR comments - --- - ## Example: Trello MCP - Trello MCP lets agents interact with your Trello boards — create cards, move them between lists, and update descriptions. - ### Step 1: Get your Trello API key and token - 1. Visit https://trello.com/app-key 2. Copy your **API Key** 3. Click "Tokens" and generate a new token (grant read/write permissions) 4. Copy the **Token** - ### Step 2: Add to your MCP config - Add the Trello server configuration (see [MCP Configuration Files](#mcp-configuration-files) for CLI vs VS Code): - | Variable | How to Get It | |----------|---------------| | `TRELLO_API_KEY` | Visit https://trello.com/app-key | | `TRELLO_TOKEN` | Click "Tokens" on the API key page, generate with read/write permissions | | `TRELLO_BOARD_ID` | Open any card, get ID from URL: `trello.com/c/{{CARD_ID}}/{{BOARD_ID}}/` | - **Config template:** ```json { @@ -250,27 +175,17 @@ Add the Trello server configuration (see [MCP Configuration Files](#mcp-configur } } ``` - Replace `/absolute/path/to/trello-mcp.js` with the actual path to your Trello MCP server script. - Find your **board ID** by opening any card on Trello and looking at the URL: `trello.com/c/{{CARD_ID}}/{{BOARD_ID}}/`. - ### Step 3: Test it - In your Squad session: - ``` > Keaton, create a Trello card for the auth refactor ``` - Agents will now automatically propose Trello tasks for tracking work items. - --- - ## Example: Aspire Dashboard MCP (Deployment Monitoring) - For Aspire projects, configure the Aspire Dashboard MCP for deployment monitoring: - **Config template:** ```json { @@ -286,54 +201,36 @@ For Aspire projects, configure the Aspire Dashboard MCP for deployment monitorin } } ``` - Replace `/absolute/path/to/aspire-mcp.js` with the actual path to your Aspire MCP server script. - ### Step 3: Use it - Agents can now ask: - ``` > Squad, check the Aspire dashboard — any service errors? ``` - The monitoring agent (or any agent) pulls live deployment status and alerts you to issues. - --- - ## How Agents Discover and Use MCP Tools - Agents don't need special setup to discover tools. Here's the flow: - 1. **At spawn time**, the agent receives the MCP configuration 2. **Agent lists available tools** — it reads what's configured and knows what's available 3. **Agent uses tools naturally** — when working, if a tool matches the task (e.g., "create a GitHub issue"), agents call it automatically 4. **Tools return results** — the agent receives structured data back (e.g., issue ID, status, etc.) and continues working - **See also:** [Skills System](./skills.md) — how agents learn reusable patterns for complex MCP workflows. - --- - ## Troubleshooting - ### MCP Server Not Starting - **Symptom:** "MCP server failed to start" error in Copilot logs. - **Fix:** - 1. **Verify the command path:** ```bash ls -la /absolute/path/to/mcp-server.js ``` The file must exist and be executable. Replace `/absolute/path/to/mcp-server.js` with your actual MCP server path. - 2. **Verify Node.js is installed:** ```bash node --version ``` Must be Node 18+. - 3. **Check environment variables:** ```bash echo $GITHUB_TOKEN @@ -342,76 +239,57 @@ Agents don't need special setup to discover tools. Here's the flow: ```bash export GITHUB_TOKEN=$(gh auth token) ``` - 4. **Restart Copilot:** ```bash copilot quit copilot ``` - ### Tools Not Appearing in Agent Responses - **Symptom:** Agent says "I don't have access to GitHub tools" even though you configured MCP. - **Fix:** - 1. **Verify config syntax:** ```bash # CLI cat ~/.copilot/mcp-config.json | jq . # Should be valid JSON; if not, `jq` will error ``` - 2. **Restart Copilot to reload config:** ```bash copilot quit copilot ``` - 3. **Test the MCP server directly:** ```bash node /absolute/path/to/mcp-server.js ``` It should start without errors. If it crashes, there's a server-side issue. Replace `/absolute/path/to/mcp-server.js` with your actual MCP server path. - ### Authentication Errors - **Symptom:** "Authentication failed" or "401 Unauthorized" when an agent tries to use a tool. - **Fix:** - 1. **Verify the token is valid:** ```bash # For GitHub gh auth status ``` Should show "Logged in as {username}". - 2. **For other services (Trello, Discord, etc.):** - Manually test the API key by calling the service: ```bash curl -H "Authorization: Bearer YOUR_TOKEN" https://api.service.com/test ``` - 3. **Check token expiration:** Some services (Discord, Slack) rotate tokens. Regenerate if old. - 4. **Update the config with the new token:** ```bash export TRELLO_TOKEN="new-token" copilot quit copilot ``` - ### Too Many MCP Servers = Startup Lag - **Symptom:** Copilot takes 30+ seconds to start after adding 5+ MCP servers. - **Fix:** - 1. **Only configure servers you actually use:** Remove unused MCP servers from your config. - 2. **Use lazy loading (if your MCP framework supports it):** ```json { @@ -425,55 +303,35 @@ Agents don't need special setup to discover tools. Here's the flow: } ``` This starts the server only when its first tool is called. Replace `/absolute/path/to/github-mcp.js` with your actual MCP server path. - --- - ## Sample Prompts - ### Setting up notifications - ``` I want to get pinged on Teams when agents need input. Walk me through it. ``` - Squad will point you to the [Notifications Guide](./notifications.md#quick-start-teams-simplest-path), where you configure a Teams webhook and an MCP notification server. - ### Adding Trello integration - ``` Connect my Trello board so agents can create cards. My board is at https://trello.com/b/YOUR_BOARD_ID ``` - Agents will ask for your API key and set up the Trello MCP server. - ### Checking MCP health - ``` Show me all configured MCP servers and which ones are working. ``` - Agents will test each server and report status. - ### Using GitHub data in work - ``` Before building the feature, check GitHub for related open issues and PRs. ``` - Agents automatically use the GitHub MCP to search and report findings. - ### Monitoring deployments (Aspire) - ``` Aspire dashboard is running at localhost:18888. Set up monitoring so you can tell me about deployment issues. ``` - Agents configure Aspire MCP and start checking service health automatically. - --- - ## See Also - - [Notifications Guide](./notifications.md) — set up agent notifications via MCP - [Skills System](./skills.md) — how agents learn complex MCP workflows - [GitHub Issues Integration](./github-issues.md) — already configured GitHub MCP in action diff --git a/docs/src/content/docs/features/memory.md b/docs/src/content/docs/features/memory.md index f84d14e05..b4ccc4098 100644 --- a/docs/src/content/docs/features/memory.md +++ b/docs/src/content/docs/features/memory.md @@ -1,46 +1,32 @@ # Memory System - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to query team decisions:** ``` What decisions has the team made about testing strategy? ``` - **Try this to establish a new rule:** ``` Always use single quotes in TypeScript ``` - **Try this to check agent knowledge:** ``` What does Kane remember about the authentication system? ``` - Squad remembers the durable things that help future work — decisions, conventions, architecture patterns, and individual agent learnings. It should not retain secrets, raw logs, transient CI/PR status, or other data that is unsafe or too short-lived to become memory. - --- - ## Memory Layers - Squad's memory is layered. Each layer serves a different purpose, and knowledge grows with every session. - By default, Squad memory is local worktree memory stored in `.squad/` files. Future provider work keeps that local model as the default while adding a governance layer for classification, safety, and optional semantic durable memory such as Copilot Memory. External semantic memory is opt-in; it is not required for `squad init` or Copilot custom agents using the prompt-only `.squad/` fallback. - `squad init` and `squad upgrade` scaffold a local-only governance policy at `.squad/memory/config.json`. The default provider is `local`, Copilot Memory is disabled, and audit records are written to `.squad/memory/audit.jsonl` without storing memory content. - Tool-backed runtimes can use governed operations: - ```text memory.classify memory.write @@ -49,22 +35,17 @@ memory.promote memory.delete memory.audit ``` - Governed memory records include load-guidance metadata so prompts and providers can choose what to load without weakening safety gates: - | Tag | Meaning | | --- | --- | | `[ALWAYS]` | Durable policies and decisions that should be loaded eagerly. | | `[ON-DEMAND]` | Stable local or semantic facts retrieved when relevant to a query. | | `[ARCHIVE]` | Superseded/deleted entries and tombstones kept for audit/history, not active prompt loading. | | `[NEVER]` | Forbidden or transient content that must not be persisted or loaded. | - When an entry is promoted or superseded, the previous index entry is marked `[ARCHIVE]` and records `supersededBy` so tooling can follow the forward link to the active successor. - The CLI exposes the same local bridge: - ```bash squad memory classify "Always run tests before merge" squad memory write --content "Use Vitest for SDK regression tests" --class DECISION --author scribe @@ -72,11 +53,9 @@ squad memory search --query "Vitest" squad memory audit squad memory provider ``` - Use `--log-level none|error|info|debug` (or `--verbose`) when troubleshooting memory command activity. For persistent project-level diagnostics, set the same level in `.squad/config.json`: - ```json { "memory": { @@ -84,109 +63,71 @@ command activity. For persistent project-level diagnostics, set the same level i } } ``` - Precedence is: explicit CLI switch, then `SQUAD_MEMORY_LOG_LEVEL`, then `.squad/config.json` `memory.logLevel`, then the default `none`. Diagnostics are written to stderr and include safe metadata such as the command, provider, load-guidance, path, result counts, and timing. They do not print raw memory content or search text. - Prompt-only Copilot custom agents still fall back to direct `.squad/` file edits. That fallback is intentionally local: it does not claim provider-backed semantic memory, external indexing, policy enforcement, or remote deletion unless a CLI/MCP/tool bridge is installed and used. - Real `provider=copilot` support is unavailable unless a concrete callable Copilot Memory API exists in the installed SDK/tooling. Squad does not invent endpoints or fake a remote memory service. The only current bridge is explicitly named `hostInjectedCopilotAdapter`; it is opt-in and only works when a host supplies a client. Otherwise provider-backed writes fail closed after auditing the rejected attempt. Forbidden content is classified and rejected before any provider call. - The installed Copilot SDK/CLI currently exposes memory as an agent capability/permission concept, not as a documented SDK storage client for write/search/delete. Config files may contain `defaultProvider: "copilot"` for forward compatibility, but status reports it as configured and unavailable, and governed reads/writes fail closed until a real callable API exists. - --- - ## Personal Memory: `history.md` - Each agent has its own history file at `.squad/agents/{name}/history.md`. After every session, agents append what they learned — architecture decisions, conventions, file paths, user preferences. - **Only that agent reads its own history.** This means each team member builds specialized knowledge about their domain. - After a few sessions, agents stop asking questions they've already answered. - ### Progressive summarization - Histories grow over time. When an agent's `history.md` exceeds ~12KB, older entries are archived into a summary section. Recent entries stay detailed; older entries are condensed. This keeps the file within a useful context budget without losing accumulated knowledge. - --- - ## Shared Memory: `decisions.md` - Team-wide decisions live in `.squad/decisions.md`. **Every agent reads this before working.** This is the team's shared brain. - Decisions are captured three ways: - ### 1. From agent work - When an agent makes a decision during a task, it writes to the inbox: - ``` .squad/decisions/inbox/{agent-name}-{slug}.md ``` - ### 2. From user directives - When you say "always..." or "never...", it's captured as a directive: - ``` > Always use single quotes in TypeScript > Never use inline styles > Prefer named exports over default exports ``` - These go directly into `decisions.md`. - ### 3. Scribe merges - The Scribe agent (a silent team member) periodically: - 1. Reads all entries from `.squad/decisions/inbox/` 2. Merges them into the canonical `decisions.md` 3. Deduplicates overlapping decisions 4. Propagates updates to affected agents - ### Decision archiving - As your project grows, `decisions.md` accumulates hundreds of blocks. Stale sprint artifacts, completed analysis docs, and one-time planning fragments consume context window space without adding value. When this happens, old decisions are archived to `.squad/decisions-archive.md` — preserved for reference but no longer loaded into agent context. - Active decisions (ongoing policies, user preferences, current architecture) stay in `decisions.md`. Agents always read the lean, current shared brain. - --- - ## Skills - Reusable knowledge files at `.copilot/skills/{skill-name}/SKILL.md`. See [Skills System](skills.md) for details. - Skills differ from decisions — decisions are project policies ("use PostgreSQL"), while skills are transferable techniques ("how to set up CI with GitHub Actions"). - --- - ## How Memory Compounds - | Stage | What agents know | |-------|-----------------| | 🌱 First session | Project description, tech stack, your name | | 🌿 After a few sessions | Conventions, component patterns, API design, test strategies | | 🌳 Mature project | Full architecture, tech debt map, regression patterns, performance conventions | - --- - ## Memory Architecture - ``` .squad/ ├── decisions.md # Shared — all agents read this @@ -204,11 +145,8 @@ Skills differ from decisions — decisions are project policies ("use PostgreSQL ├── squad-conventions/SKILL.md # Starter skill └── ci-github-actions/SKILL.md # Earned skill ``` - --- - ## Tips - - **Commit intentional `.squad/` state** — anyone who clones the repo gets the team with its accumulated decisions and skills. Never store secrets, credentials, raw logs, or private customer data in Squad memory. @@ -216,35 +154,24 @@ Skills differ from decisions — decisions are project policies ("use PostgreSQL - If an agent keeps making the same mistake, check `decisions.md` — the relevant convention might be missing. - You can edit `decisions.md` and `history.md` files directly. They're plain Markdown. - The first session is always the least capable. Give the team a few sessions to build up context. - ## Sample Prompts - ``` what does Kane remember about the authentication system? ``` - Queries a specific agent's personal history for relevant context. - ``` show me the team decisions about API design ``` - Searches the shared decisions.md file for a particular topic. - ``` what happened in the last session? ``` - Reviews session history and recent agent learnings. - ``` always use single quotes in TypeScript ``` - Adds a directive to the shared decisions that all agents will follow. - ``` search past decisions for database choices ``` - Finds historical decisions related to a specific topic or keyword. diff --git a/docs/src/content/docs/features/model-selection.md b/docs/src/content/docs/features/model-selection.md index 52a481480..059cbe35e 100644 --- a/docs/src/content/docs/features/model-selection.md +++ b/docs/src/content/docs/features/model-selection.md @@ -3,67 +3,46 @@ title: Per-Agent Model Selection description: Route each agent to the right model based on task type, with persistent overrides and economy mode. order: 34 --- - # Per-Agent Model Selection - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to set a persistent preference (survives across sessions):** ``` Always use Opus ``` - **Try this to prioritize quality for the session only:** ``` Have all agents use Opus for the rest of this session ``` - **Try this to optimize costs:** ``` Switch to Haiku — I'm trying to save costs ``` - **Try this to balance quality and budget:** ``` Use Sonnet for code, Haiku for everything else ``` - **Try this to go back to automatic selection:** ``` Switch back to automatic model selection ``` - Squad adjusts model selection based on your directive. Agents writing code get quality models (Sonnet/Opus), agents doing docs/logs get cost-optimized models (Haiku). You can override anytime — and persistent overrides survive across sessions. - --- - ## How It Works - Squad routes each agent to the right model based on what they're doing — not a one-size-fits-all default. The governing principle: **cost first, unless code is being written** — but your preferences always take priority. - ## 5-Layer Model Resolution - Model selection uses a layered system. First match wins: - 1. **Persistent Config** (`.squad/config.json`) — You said "always use opus"? It's saved to disk. Every session, every agent, until you change it. Per-agent overrides (`agentModelOverrides`) take priority over the global `defaultModel`. 2. **Session Directive** — You said "use opus for this session"? Done. Applies until the session ends. 3. **Charter Preference** — The agent's charter specifies a `## Model` section with a preferred model. 4. **Task-Aware Auto-Selection** — The coordinator checks what the agent is actually doing: - | Task Output | Model | Tier | |-------------|-------|------| | Writing code (implementation, refactoring, tests, bug fixes) | `claude-sonnet-4.6` | Standard | | Writing prompts or agent designs | `claude-sonnet-4.6` | Standard | | Non-code work (docs, planning, triage, changelogs) | `claude-haiku-4.5` | Fast | | Visual/design work requiring image analysis | `claude-opus-4.6` | Premium | - 5. **Default** — If nothing matched, `claude-haiku-4.5`. Cost wins when in doubt. - ## Persistent Model Preferences - Squad stores your model preferences in `.squad/config.json`: - ```json { "version": 1, @@ -74,13 +53,10 @@ Squad stores your model preferences in `.squad/config.json`: } } ``` - - **`defaultModel`** — applies to ALL agents unless overridden. Set with "always use X". - **`agentModelOverrides`** — per-agent overrides. Set with "use X for {agent}". - **Clear with** "switch back to automatic" — removes `defaultModel`, returns to auto-selection. - ## Role-to-Model Mapping - | Role | Default Model | Why | |------|--------------|-----| | Core Dev / Backend / Frontend | `claude-sonnet-4.6` | Writes code — quality first | @@ -91,61 +67,42 @@ Squad stores your model preferences in `.squad/config.json`: | Scribe / Logger | `claude-haiku-4.5` | Mechanical file ops | | Git / Release | `claude-haiku-4.5` | Changelogs, tags, version bumps | | Designer / Visual | `claude-opus-4.6` | Vision capability required | - ## 18-Model Catalog - Squad supports 18 models across three tiers: - - **Premium:** claude-opus-4.6, claude-opus-4.6-fast, claude-opus-4.5 - **Standard:** claude-sonnet-4.6, gpt-5.4, gpt-5.3-codex, gpt-5.2-codex, claude-sonnet-4, gpt-5.2, gpt-5.1-codex, gpt-5.1, gpt-5, gemini-3-pro-preview - **Fast/Cheap:** claude-haiku-4.5, gpt-5.1-codex-mini, gpt-4.1, gpt-5-mini - ## Fallback Chains - If a model is unavailable (plan restriction, rate limit, deprecation), Squad silently retries with the next in chain: - ``` Premium: claude-opus-4.6 → claude-opus-4.6-fast → claude-opus-4.5 → claude-sonnet-4.6 Standard: claude-sonnet-4.6 → gpt-5.3-codex → gpt-5.4 → claude-sonnet-4 → gpt-5.2 Fast: claude-haiku-4.5 → gpt-5.1-codex-mini → gpt-4.1 → gpt-5-mini ``` - Never falls back UP in tier — a fast task won't land on a premium model. - ## User Overrides - Tell the coordinator what you want: - - `"use opus for this"` — one-off premium for current task - `"always use opus"` — **persistent** preference saved to `.squad/config.json` (survives sessions) - `"use gpt-5.2-codex for Fenster"` — **persistent** per-agent override - `"switch back to automatic"` — clears persistent preference - ## Economy Mode - Economy mode automatically falls back to cheaper models when rate limits are approaching or when you want to cap spend. It is opt-in — enable it per session or persistently. - **Enable economy mode:** ``` Switch to economy mode ``` - **Disable economy mode:** ``` Turn off economy mode ``` - When economy mode is active, Squad remaps models using the `ECONOMY_MODEL_MAP`: - | Normal Tier | Economy Model | |-------------|--------------| | Standard (Sonnet) | `gpt-4.1` | | Fast (Haiku) | `gpt-4.1` | - **Fallback chains in economy mode** run the same logic as normal fallback chains, but start one tier lower. A code task that would normally use `claude-sonnet-4.6` uses `claude-haiku-4.5` instead. - **Cost tradeoffs:** Economy mode trades output quality for lower cost and reduced rate limit pressure. Use it for bulk triage, log analysis, or changelog generation — not for architecture work or complex refactors where quality matters. - **Persistent economy mode** saves to `.squad/config.json`: ```json { @@ -153,37 +110,25 @@ When economy mode is active, Squad remaps models using the `ECONOMY_MODEL_MAP`: "economyMode": true } ``` - Economy mode is also triggered automatically by the [rate limiting](rate-limiting.md) system when headroom drops to Amber state — you do not have to enable it manually for rate limit protection. - ## Sample Prompts - ``` use opus for this architecture work ``` - Override to premium model for a single high-stakes task. - ``` always use haiku to save costs ``` - Set session-wide preference for the cheapest model tier. - ``` what model did Kane use for that last task? ``` - Check which model was actually used for a completed task. - ``` use gpt-5.2-codex for all backend work ``` - Set a specific model for tasks in a particular domain. - ``` switch back to automatic model selection ``` - Clear any session-wide overrides and return to task-aware auto-selection. diff --git a/docs/src/content/docs/features/notification-level.md b/docs/src/content/docs/features/notification-level.md index d5341722e..9670afb07 100644 --- a/docs/src/content/docs/features/notification-level.md +++ b/docs/src/content/docs/features/notification-level.md @@ -1,61 +1,42 @@ # Notification Level - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - **Try this to silence empty rounds:** ``` squad watch --notify-level important ``` - **Try this to see everything (debugging):** ``` squad watch --notify-level all ``` - **Try this in config for persistent setting:** ```json { "watch": { "notifyLevel": "important" } } ``` - When `squad watch` runs continuously, it prints a board report after every round. In production setups where output is forwarded to Teams, Slack, or email, this creates noise — hundreds of "Round N" messages with no useful content when the board is clear. - --- - ## Notify Levels - | Level | Behavior | When to use | |-------|----------|-------------| | `important` | Only print rounds with actual work items | **Default.** Best for production/Teams channels | | `all` | Print every round, including empty ones | Debugging. Old behavior before this feature | | `none` | Suppress all round output | Headless/CI — only errors matter | - ## Machine and Repo Attribution - Every round header now includes the machine hostname and repo name: - ``` 🔄 Ralph — Round 5 (DEVBOX-01 · my-project) ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 🔴 Untriaged: 2 🟢 Ready to merge: 1 ``` - This tells you **where** the message came from — which machine and which repo — so when multiple watch instances report to the same Teams channel, you can distinguish them. - ## Configuration - ### CLI flag (per-run) - ```bash squad watch --notify-level important # default squad watch --notify-level all # old behavior squad watch --notify-level none # silent ``` - ### Config file (persistent) - In `.squad/config.json`: - ```json { "watch": { @@ -65,13 +46,9 @@ In `.squad/config.json`: } } ``` - Config file settings are overridden by CLI flags when both are present. - ## What Counts as "Important" - A round is considered important (and reported) when **any** board counter is non-zero: - - Untriaged issues - Assigned but unstarted work - Draft PRs @@ -80,10 +57,7 @@ A round is considered important (and reported) when **any** board counter is non - PRs needing review - PRs ready to merge - Issues executed this round - If all counters are zero, the round is silent in `important` mode. - ## See Also - - [Ralph — Work Monitor](/docs/features/ralph) — full Ralph documentation - [Watch capabilities](/docs/features/ralph#watch-mode) — how squad watch works diff --git a/docs/src/content/docs/features/notifications.md b/docs/src/content/docs/features/notifications.md index d3044e8d2..03031710b 100644 --- a/docs/src/content/docs/features/notifications.md +++ b/docs/src/content/docs/features/notifications.md @@ -1,61 +1,38 @@ # Squad pings you - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to get notified on completion:** ``` Notify me when the build finishes ``` - **Try this to stay in the loop:** ``` Ping me on Teams when you need my input ``` - Your squad sends you instant messages when they need input, hit an error, or complete work. Works with Teams, Discord, Slack, webhooks — whatever you configure. - --- - ## How It Works - Your squad can send you instant messages when they need your input. Leave your terminal, get pinged on your phone. - --- - ## How It Works - Squad ships zero notification infrastructure. Instead, it uses **skills** — reusable knowledge files — to teach agents when and how to ping you. You bring your own notification delivery by configuring an MCP notification server in your Copilot environment. - The flow: 1. **Skill** (`human-notification`) tells agents when to ping — blocked waiting for input, decision needed, error hit, work complete 2. **Agent** calls the skill, which invokes your configured MCP server 3. **Your MCP server** (Teams, iMessage, Discord, webhook, etc.) sends the actual message to your device - This means Squad works with any notification service. Pick your favorite messaging platform, configure it once, and your squad has a direct line to you. - --- - ## Quick Start: Teams (Simplest Path) - ### What you need to know - Squad doesn't ship a Teams MCP server. You bring your own — either a community implementation or one you build yourself. Squad agents discover the configured MCP server at spawn time and call it automatically when they need to notify you. - ### Teams Workflows webhook - Teams Workflows (Power Automate) webhooks are the recommended approach. Office 365 Connectors were [retired by Microsoft](https://devblogs.microsoft.com/microsoft365dev/retirement-of-office-365-connectors-within-microsoft-teams/) — use Workflows instead. - 1. **Create a channel for your squad:** - Create a new Team called "My Squads" (or reuse an existing one) - Add a channel, e.g., `#squad-myproject` - 2. **Create a Workflows webhook:** - Open the channel, select the **+** (add a tab) or go to the **Workflows** app in Teams - Choose **"Post to a channel when a webhook request is received"** - Follow the prompts to name the workflow and select your channel - Copy the generated webhook URL (it starts with `https://prod-...logic.azure.com/...`) - 3. **Get a Teams webhook MCP server:** You need an MCP server that can POST to your webhook URL. Options: @@ -63,7 +40,6 @@ Teams Workflows (Power Automate) webhooks are the recommended approach. Office 3 - **Community reference:** [benleane83's teams-webhook-mcp.js](https://gist.github.com/benleane83/f37b5bc1ed3d00e320ba48886109b82a) — a working implementation that sends MessageCard payloads (compatible with Workflows webhooks) - **Build your own:** Use the community reference as a starting point - **Search the MCP marketplace:** Look for Teams-compatible servers at https://mcpmarket.com - 4. **Configure Squad:** Create or edit `.vscode/mcp.json` in your workspace: @@ -82,26 +58,19 @@ Teams Workflows (Power Automate) webhooks are the recommended approach. Office 3 ``` Replace `/absolute/path/to/teams-webhook-mcp.js` with the path to your downloaded or created MCP server script. Replace the `TEAMS_WEBHOOK_URL` value with the URL from step 2. - 5. **Use it:** - Start a Squad session with `copilot squad` - When an agent needs input, your Teams channel lights up - --- - ## Quick Start: iMessage (Mac Only) - iMessage is built into macOS. If you're on a Mac, this is the fastest personal setup. - 1. **Check requirements:** - macOS with Messages.app - Copilot running on the same Mac - System allows Copilot to control Messages (grant permission when prompted) - 2. **Install the iMessage MCP server:** - Search https://mcpmarket.com for "imessage" or compatible MCP servers - Follow its setup steps - 3. **Configure Squad:** - Edit `.vscode/mcp.json`: ```json @@ -119,73 +88,48 @@ iMessage is built into macOS. If you're on a Mac, this is the fastest personal s ``` Replace `/absolute/path/to/imessage-mcp.js` with the actual path to your downloaded MCP server script. - 4. **Test:** - Start a Squad session - When agents need input, it appears in Messages on your phone - **Limitation:** iMessage only works on Mac. If you use Windows, Linux, or CI environments, use Teams or webhook instead. - --- - ## Quick Start: Discord - Discord is flexible and works everywhere (web, mobile, desktop). - ### Option A: Using mcp-notifications (Simplest) - https://www.npmjs.com/package/mcp-notifications supports Discord, Slack, Teams, and custom webhooks. - 1. Install mcp-notifications - ```bash npm install -g mcp-notifications ``` - 1. **Get your Discord webhook:** - In Discord, right-click a channel → "Edit channel" → "Integrations" → "Webhooks" - "New Webhook" → name it "Squad" - Copy the webhook URL - #### Add MCP Server for Github Copilot CLI - 1. **Configure Squad from Github Copilot CLI:** - ```bash /mcp add notifications ``` - * Server Type: [2] stdio * Command: `npx -y mcp-notifications` * Environment Variables: `{ "WEBHOOK_URL": "https://discord.com/api/webhooks/...", "WEBHOOK_TYPE": "discord" }` - #### Add MCP Server in VSCode - 1. From the command palette, search for MCP: Add Server 1. When you run MCP: Add Server, enter the following information - * Type: Command (stdio) * Command: `npx -y mcp-notifications` * Server Id: notifications * Configuration target: Global * When the mcp.json file in your user profile opens, add the following to the mcp server configuration - ```bash "env": { "WEBHOOK_URL": "https://discord.com/api/webhooks/...", "WEBHOOK_TYPE": "discord" } ``` - ### Option B: Using Discord Official MCP - For more advanced Discord integrations, search Discord's MCP marketplace. - --- - ## Quick Start: Custom Webhook - For any HTTP endpoint (custom service, Zapier, IFTTT, etc.): - 1. **Get your webhook URL** from your service - 2. **Use mcp-notifications or build a thin wrapper:** ```json { @@ -202,7 +146,6 @@ For any HTTP endpoint (custom service, Zapier, IFTTT, etc.): ``` Replace `/absolute/path/to/webhook-mcp.js` with the actual path to your MCP server script. - 3. **Your endpoint receives POST:** ```json { @@ -215,13 +158,9 @@ For any HTTP endpoint (custom service, Zapier, IFTTT, etc.): } } ``` - --- - ## What Triggers a Notification - Agents ping you when: - | Trigger | Example | |---------|---------| | **Blocked on input** | "Keaton needs your decision on which API approach to use (Issue #42)" | @@ -229,50 +168,32 @@ Agents ping you when: | **Error hit** | "McManus got an authentication error and needs credentials for the staging API" | | **Work complete** | "Fenster finished the test suite — 142 tests passing, 3 flaky (check the logs)" | | **Review feedback** | "Your PR review on #78 needs a response before Keaton can merge" | - You control which triggers send notifications (see Configuration below). - --- - ## Notification Format - Notifications are **agent-branded, context-rich, and actionable.** - Example notification message: - ``` 🏗️ Keaton needs your input - Blocked: Design decision required for API error handling strategy. Follow the conversation in Issue #42. - → Review issue: github.com/myorg/myrepo/issues/42 ``` - Another example: - ``` ✅ Fenster finished the test suite - 142 tests passing. 3 marked as flaky — review them in the terminal output. - Session still running. Come back to the terminal to decide next steps. ``` - **Anatomy:** - **Agent emoji + name** — who pinged you (matches your squad's cast) - **Context** — why (decision, blocked, complete, etc.) - **What to do** — specific action (check issue, review logs, come back to terminal) - **Link** — clickable GitHub issue, PR, or breadcrumb to your session - --- - ## Configuration - ### Choosing What Triggers Notifications - By default, agents ping on all triggers. To be selective, set environment variables: - ```json { "mcpServers": { @@ -287,13 +208,9 @@ By default, agents ping on all triggers. To be selective, set environment variab } } ``` - This is useful if you only care about being pinged when blocked (not for every decision or completion). - ### Quiet Hours (Optional) - If your MCP server supports it, configure quiet hours to suppress notifications during off-hours: - ```json { "env": { @@ -303,82 +220,53 @@ If your MCP server supports it, configure quiet hours to suppress notifications } } ``` - During quiet hours, notifications queue locally and are batched into a morning digest instead of waking you up. - ### Testing Your Setup - To test without running a full Squad session: - ```bash # Once your MCP server is configured, trigger a test notification: copilot squad test-notification --agent Keaton --reason blocked ``` - This fires a sample notification through your configured server so you can verify delivery and formatting. - --- - ## Troubleshooting - ### Notifications aren't arriving - 1. **Verify the MCP server is running:** - Check your `.vscode/mcp.json` syntax - Restart Copilot - 2. **Check the webhook URL:** - Paste the URL in your browser (or `curl`). If it 404s, the webhook is invalid or expired. - For Teams/Discord webhooks, regenerate them if they're old - 3. **Verify environment variables:** - Ensure all secrets (API keys, webhook URLs) are set in your shell before starting Copilot - Copilot reads `.vscode/mcp.json` at startup — changes require a restart - 4. **Check agent logs:** - In your Squad session, ask agents to log the notification call: `check the human-notification skill logs` - This surfaces any errors from the MCP server - ### Notifications are too frequent - Use the `NOTIFY_*` environment variables (see Configuration above) to disable notifications for non-critical triggers like `NOTIFY_COMPLETE` or `NOTIFY_DECISION`. - ### Wrong channel or user receiving notifications - - **Teams webhook:** Ensure the webhook points to the correct channel - **iMessage:** Verify the `IMESSAGE_TARGET` phone number or email matches your device - **Discord:** Double-check the webhook URL points to your intended channel - ### "MCP server failed to start" - 1. Ensure the MCP server command in `.vscode/mcp.json` points to a valid executable 2. Check that all `env` variables are set and accessible 3. Review the Copilot startup logs for the actual error - --- - ## Architecture Notes - The `human-notification` skill lives in `.copilot/skills/squad-human-notification/SKILL.md`. Agents read it before working and decide whether to ping you. You can edit the skill directly if you want to: - - Add custom notification logic for your team - Change when agents decide to ping (e.g., always notify on errors) - Add metadata to notifications (e.g., priority levels) - For advanced use cases, you can also: - - Create a custom MCP server that combines multiple notification channels (Teams + Slack) - Route notifications based on agent and trigger type (errors to you, completions to your manager) - Add intelligent rate limiting (don't ping for 30 minutes if already pinged once) - --- - ## Sample MCP Configs - Below are complete, copy-pasteable `.copilot/mcp-config.json` examples for each notification platform. Pick the one that matches your setup and copy the entire `mcpServers` block into your config file. - ### Teams Webhook (Simplest) - ```json { "mcpServers": { @@ -392,16 +280,12 @@ Below are complete, copy-pasteable `.copilot/mcp-config.json` examples for each } } ``` - **Setup:** 1. Create a Workflows webhook in your Teams channel (Workflows app → "Post to a channel when a webhook request is received") 2. Download a Teams webhook MCP server (see [community reference implementation](https://gist.github.com/benleane83/f37b5bc1ed3d00e320ba48886109b82a)) 3. Replace `/absolute/path/to/teams-webhook-mcp.js` with the actual path to your MCP server script - --- - ### iMessage (Mac Only) - ```json { "mcpServers": { @@ -415,16 +299,12 @@ Below are complete, copy-pasteable `.copilot/mcp-config.json` examples for each } } ``` - **Setup:** 1. Download an iMessage MCP server from https://mcpmarket.com 2. Replace `/absolute/path/to/imessage-mcp.js` with the actual path to your MCP server script 3. Replace `+1234567890` with your phone number or email address registered in iCloud - --- - ### Discord Webhook - ```json { "mcpServers": { @@ -438,16 +318,12 @@ Below are complete, copy-pasteable `.copilot/mcp-config.json` examples for each } } ``` - **Setup:** 1. In Discord, right-click channel → Edit Channel → Integrations → Webhooks → New Webhook → copy the URL 2. Download or create a Discord webhook MCP server (see mcp-notifications package or build your own) 3. Replace `/absolute/path/to/discord-webhook-mcp.js` with the actual path to your MCP server script - --- - ### Generic Webhook (Zapier, Custom Endpoint, etc.) - ```json { "mcpServers": { @@ -463,49 +339,34 @@ Below are complete, copy-pasteable `.copilot/mcp-config.json` examples for each } } ``` - **Setup:** 1. Create or download a generic webhook MCP server 2. Replace `/absolute/path/to/webhook-mcp.js` with the actual path to your MCP server script 3. Your endpoint receives POST requests with agent name, message, and context - --- - ## See Also - - [MCP Setup Guide](./mcp.md) — detailed MCP configuration walkthrough - [Skills System](./skills.md) — learn how skills encode reusable knowledge - [MCP Documentation](./mcp.md) — how to configure Model Context Protocol - [Model Selection](./model-selection.md) — customize agent behavior per role - ## Sample Prompts - ``` configure Teams webhook for notifications ``` - Guides you through setting up Microsoft Teams as the notification channel. - ``` test my notification setup ``` - Sends a sample notification to verify your MCP server configuration is working. - ``` disable completion notifications ``` - Configures the notification system to only ping on blocks and errors, not completions. - ``` what's my current notification status? ``` - Shows which notification triggers are enabled and what channel is configured. - ``` set quiet hours from 6pm to 9am ``` - Configures the notification system to queue messages during off-hours instead of sending immediately. diff --git a/docs/src/content/docs/features/parallel-execution.md b/docs/src/content/docs/features/parallel-execution.md index 4cb0d3c71..2738a699d 100644 --- a/docs/src/content/docs/features/parallel-execution.md +++ b/docs/src/content/docs/features/parallel-execution.md @@ -1,64 +1,42 @@ # Parallel Execution - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to launch concurrent work streams:** ``` Have three agents work on this in parallel: UI mockups, API spec, and database schema ``` - **Try this to work multiple issues simultaneously:** ``` Work on issues #12, #15, and #18 at the same time ``` - **Try this to control concurrency for cost savings:** ``` Run at most 2 agents at once to save costs ``` - Squad launches independent work in parallel by default — multiple agents work simultaneously, no waiting. You control concurrency limits and can force sequential execution when needed. - --- - ## How Parallel Execution Works - Squad runs agents in parallel whenever possible. The fan-out pattern launches all independent agents simultaneously, waits for results, then proceeds — no sequential bottlenecks unless data dependencies or reviewer gates require them. - ## How Parallel Execution Works - When the coordinator receives work: - 1. **Dependency Analysis** — Check if tasks have data dependencies (A needs output from B). 2. **Fan-Out** — Launch all independent agents in parallel using `mode: "background"`. 3. **Wait** — Coordinator polls agent status until all complete. 4. **Collect** — Aggregate results, check for errors, route to next step. - ### Example: Feature Implementation - > "Implement user authentication: API endpoints, frontend form, tests, and documentation" - Coordinator spawns **4 agents in parallel**: - Backend → API endpoints - Frontend → Login/signup form - Tester → Integration tests - DevRel → Auth documentation - All work simultaneously. No agent waits for another unless there's a code dependency. - ## Background vs Sync Mode - | Mode | When to Use | Behavior | |------|-------------|----------| | `background` | Independent work, no data dependencies | Agent runs in parallel, coordinator polls for completion | | `sync` | Data dependency (one agent needs output from another) | Agent runs sequentially, coordinator waits | | `sync` | Reviewer gate (Lead must approve before continuing) | Agent runs, coordinator waits for review decision | - ### Background Mode - Used for **fan-out parallelism**: - ``` Coordinator → [Agent1, Agent2, Agent3] (background) ↓ ↓ ↓ @@ -66,13 +44,9 @@ Coordinator → [Agent1, Agent2, Agent3] (background) ↓ ↓ ↓ Coordinator collects all ``` - Agents don't see each other's output until the coordinator collects and synthesizes. - ### Sync Mode - Used for **dependencies and gates**: - ``` Coordinator → Agent1 (sync) → Result1 ↓ @@ -80,32 +54,20 @@ Coordinator → Agent1 (sync) → Result1 ↓ Coordinator → Reviewer (sync, gates next step) ``` - Each step blocks until the previous completes. - ## Eager Execution Philosophy - Squad's default is **eager parallelism** — launch everything that can run, let the coordinator handle synchronization. Benefits: - - **Faster throughput** — No artificial sequencing. - **Better resource utilization** — Multiple agents saturate available compute. - **Resilient to blocking** — If one agent stalls, others keep working. - Trade-off: Increased API cost (multiple agents running simultaneously). If cost is a concern, tell the coordinator: - > "Work sequentially to save costs" - Coordinator switches to sync mode for all agents. - ## Deadlock Avoidance - When agents have circular dependencies: - - **Agent A** needs output from **Agent B** - **Agent B** needs output from **Agent A** - The coordinator detects the cycle during dependency analysis and prompts: - ``` ⚠️ Circular dependency detected: A ↔ B Choose resolution: @@ -113,22 +75,15 @@ Choose resolution: 2. Run B first, then A 3. Redesign to remove dependency ``` - ## Reviewer Gates - Some tasks require **sequential review**: - 1. Agent writes code → Draft PR 2. Lead reviews → Approves or rejects 3. If approved → Merge and close 4. If rejected → Reassign or escalate (agent is **locked out**) - This is a **sync gate** — the next step cannot proceed until the reviewer completes. - ## Parallel Execution Logs - The coordinator logs parallel execution in `.squad/orchestration-log/`: - ``` [2024-01-15 14:30:00] FAN-OUT: Spawning 4 agents (Backend, Frontend, Tester, DevRel) [2024-01-15 14:30:15] AGENT: Backend started (background) @@ -141,36 +96,27 @@ The coordinator logs parallel execution in `.squad/orchestration-log/`: [2024-01-15 14:38:20] COLLECT: Tester completed (success) [2024-01-15 14:38:21] FAN-IN: All agents complete ``` - ## Parallel Limits - The coordinator respects concurrency limits to avoid rate limits or resource exhaustion: - - **Default:** 5 agents in parallel - **Adjustable:** `"Run at most 3 agents at once"` → Coordinator batches work in groups of 3 - ## Sample Prompts - ``` Build the new dashboard feature — everyone work in parallel ``` Coordinator spawns all relevant agents (Frontend, Backend, Tester, DevRel) simultaneously. - ``` Implement the API first, then write tests — do it sequentially ``` Forces sync mode: Backend runs, completes, then Tester starts. - ``` Work on issues #12, #15, and #18 at the same time ``` Spawns 3 agents in parallel, one per issue. Assumes no dependencies between issues. - ``` Run at most 2 agents at once to save costs ``` Sets concurrency limit. Coordinator batches work: runs 2, waits for completion, runs next 2. - ``` Why is Tester waiting? Show me the dependency graph. ``` diff --git a/docs/src/content/docs/features/plugins.md b/docs/src/content/docs/features/plugins.md index 13c6bbd28..ecce90267 100644 --- a/docs/src/content/docs/features/plugins.md +++ b/docs/src/content/docs/features/plugins.md @@ -1,29 +1,17 @@ # Plugin Marketplace Guide - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - Plugins package reusable Squad capabilities: agents, knowledge packs, workflows, ceremonies, memory providers, routing guidance, decisions, hook metadata, adapter metadata, typed provider contracts, and generated knowledge artifacts. - The everyday flow is simple: - ```bash squad plugin install ./my-plugin squad plugin enable my-plugin squad plugin refresh my-plugin ``` - After that, spawned agents can receive the plugin's enabled context. For a knowledge plugin such as Graphify, refresh can generate approved artifacts under `.squad/knowledge/graphify/` so agents have a relationship map across code, docs, and decisions. - The MVP is declarative-first with a narrow governed runtime for Squad-owned built-in providers. Squad records hook, adapter, and provider metadata, and approved providers can generate static artifacts. It does not execute plugin-supplied code. - Squad plugins do not replace Copilot plugins or Copilot skills. If a Squad plugin depends on Copilot-owned extensibility, declare that under `copilot.requires`; Squad records and surfaces the dependency, but it does not install it or run Copilot plugin commands. - --- - ## Plugin lifecycle - Install, activation, and artifact refresh are separate steps. - 1. `squad plugin validate ` checks the manifest and prints structured validation errors. 2. `squad plugin dry-run ` prints the files that would be written without changing `.squad/`. 3. `squad plugin install ` copies declared static files, records hashes in `.squad/plugins/lock.json`, and leaves the plugin disabled. @@ -32,13 +20,9 @@ Install, activation, and artifact refresh are separate steps. 6. `squad plugin refresh ` refreshes approved generated artifacts for built-in providers such as Graphify. 7. `squad plugin disable ` deactivates a plugin without deleting installed files. 8. `squad plugin uninstall ` removes files recorded in the lock and clears the registration. - Use `squad plugin list --json` when another tool needs stable machine-readable state. - --- - ## Local MVP commands - ```bash squad plugin validate ./my-plugin squad plugin dry-run ./my-plugin @@ -53,22 +37,16 @@ squad plugin disable my-plugin squad plugin verify squad plugin uninstall my-plugin ``` - The current MVP supports local plugin directories. Marketplace registration still uses the existing commands: - ```bash squad plugin marketplace add github/awesome-copilot squad plugin marketplace list squad plugin marketplace browse awesome-copilot squad plugin marketplace remove awesome-copilot ``` - --- - ## Manifest format - The MVP manifest file is `plugin.manifest.json`. The validator also accepts legacy local names such as `squad-plugin.json` and `plugin.json` while the schema settles. - ```json { "id": "demo-plugin", @@ -134,13 +112,9 @@ The MVP manifest file is `plugin.manifest.json`. The validator also accepts lega ] } ``` - Supported Squad component keys are `agents`, `ceremonies`, `decisions`, `instructions`, `knowledge`, `memory`, `routing`, `templates`, `workflows`, `hooks`, and `adapters`. Capability roles are derived only from these declared components; arbitrary capability strings are not accepted. - Declared files must be relative paths under approved `.squad/` roots such as `agents/`, `knowledge/`, `memory/`, `routing/`, `decisions/`, `ceremonies/`, `prompts/`, `instructions/`, `templates/`, `workflows/`, or `plugins/`. - Copilot plugin dependencies are metadata only: - ```json { "copilot": { @@ -155,58 +129,36 @@ Copilot plugin dependencies are metadata only: } } ``` - Squad validates and records these dependencies so users know what Copilot plugins to install separately. Squad does not fetch, install, execute, or manage Copilot plugins. - External integration metadata is also record-only. Fields such as `repository`, `upstream.installCommand`, and `mcp.installCommand` explain how a human can install external tools separately; Squad never runs those commands. - Provider contracts are the typed extension seam for memory and knowledge systems. A contract declares the provider `type` (`memory`, `knowledge`, `persistence`, `event`, or `policy`), access `mode` (`read`, `write`, or `read-write`), `protocol` (`static-artifact` or `mcp`), optional static artifact binding, optional MCP binding metadata, and capability labels. These fields let spawned agents understand that a plugin represents a memory or knowledge provider without letting the plugin run code. During the MVP, provider contracts are prompt metadata only: Squad does not start MCP servers, call provider tools, query live memory backends, or install provider packages. - --- - ## Runtime behavior - Enabled plugins affect spawned Squad agents through their installed static artifacts and provider contracts. When an agent session is spawned, Squad reads `.squad/plugins/runtime.json`, finds enabled active plugin roles, and injects the installed guidance/metadata files plus provider contract summaries into the agent system context under a `Plugin Context` section. - This is still declarative-first behavior: Squad consumes copied Markdown/metadata from `.squad/`, but it does not install upstream packages, start MCP servers, execute plugin-supplied commands, call provider tools, or query external tools during plugin install or agent spawn. - The MVP also includes a narrow governed runtime for Squad-owned built-in providers. `squad plugin refresh ` and `squad plugin run-lifecycle ` can generate artifacts only when the provider name, lifecycle event, and output paths are allowlisted by Squad. The current approved provider is Graphify, which can refresh deterministic knowledge artifacts under `.squad/knowledge/graphify/`. - Disabled plugins do not contribute prompt context, even if their files remain installed on disk. - --- - ## External integration examples - The repository includes local sample plugins that exercise external integration metadata without adding executable provider code: - | Example | Purpose | | --- | --- | | `samples/plugin-knowledge-graphify` | Knowledge graph profile for the real `safishamsi/graphify` project and PyPI package `graphifyy`. It declares a `knowledge` provider contract bound to a static artifact under `.squad/knowledge/graphify/`, and can refresh governed Graphify artifacts with `squad plugin refresh graphify-knowledge`. | | `samples/plugin-knowledge-index-server` | Instruction and knowledge MCP profile for the real `jagilber-org/index-server` project and npm package `@jagilber-org/index-server`. It declares a metadata-only `knowledge` provider contract for the Index Server MCP catalog. | | `samples/plugin-memory-mempalace` | Memory-palace-style provider profile for the real `MemPalace/mempalace` CLI and optional `mempalace-mcp` server. It declares a metadata-only `memory` provider contract for spatial memory. | - Graphify's Copilot support is a separately installed skill/integration, not a Squad memory provider or a Squad-managed Copilot plugin. Index Server is an MCP-governed instruction/knowledge catalog, not a memory provider, although it is adjacent because agents can persist validated knowledge across sessions and repositories. MemPalace is a real memory system, but Squad still does not install its package, start MCP, or configure assistant hooks. - --- - ## Runtime state - Squad stores plugin state under `.squad/plugins/`: - | File | Purpose | | --- | --- | | `installed.json` | Installed plugins, versions, enabled state, roles, source path, and deployed files. | | `lock.json` | Manifest hash and per-file SHA-256 hashes for reproducibility and verification. | | `runtime.json` | Active plugin bindings by role plus enabled runtime state. | | `audit.jsonl` | JSON Lines lifecycle audit events for install, verify, enable, switch, disable, and uninstall. | - --- - ## Guardrails - The product goal is simple pluggability: install, enable, refresh, and give agents better context. The guardrails keep that model predictable: - - No plugin scripts, commands, shell snippets, or executable files are allowed. - Lifecycle hooks are limited to Squad-owned, capability-gated providers that generate static artifacts. - No plugin content is evaluated or run by Squad. @@ -217,15 +169,10 @@ The product goal is simple pluggability: install, enable, refresh, and give agen - Plugin file writes are limited to declared relative targets under `.squad/`. - Path traversal, absolute paths, symlinks, and script/executable extensions are rejected. - Governed runtime artifacts are generated only by built-in approved providers and only under approved `.squad/` paths. - See [Plugin security model](../reference/plugin-security.md) for the threat model and the negative checks that gate this feature. - The follow-up roadmap tracks remote marketplace distribution, broader built-in providers, and the trusted executable-provider RFC in [#1102](https://github.com/bradygaster/squad/issues/1102), [#1103](https://github.com/bradygaster/squad/issues/1103), [#1104](https://github.com/bradygaster/squad/issues/1104), and [#1105](https://github.com/bradygaster/squad/issues/1105). - --- - ## See also - - [Building extensions](../guide/building-extensions.md) — how to author a local plugin. - [Extensibility guide](../guide/extensibility.md) — how to decide whether an idea belongs in core, a plugin, or team config. - [Skills System](./skills.md) — the existing Squad knowledge layer; plugin manifests should prefer `knowledge` for reusable guidance and use `copilot.requires` for Copilot-owned plugins. diff --git a/docs/src/content/docs/features/prd-mode.md b/docs/src/content/docs/features/prd-mode.md index 42417d95b..367a7b7f9 100644 --- a/docs/src/content/docs/features/prd-mode.md +++ b/docs/src/content/docs/features/prd-mode.md @@ -1,124 +1,68 @@ # PRD Mode - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -**Try this to generate a requirements document:** +**Try this to ingest an existing PRD:** ``` -Write a PRD for a user authentication system with OAuth support +Read the PRD at docs/product-spec.md and summarize the key requirements ``` - -**Try this to break down product specs into work items:** +**Try this to turn an existing spec into next steps:** ``` -Read the PRD at docs/product-spec.md and break it into work items +Read docs/product-spec.md and suggest implementation slices or GitHub issues ``` - -Give Squad a product requirements document and the Lead breaks it into prioritized work items, assigns them to the team, and tracks progress with dependency management. - +Squad can **read an existing PRD or spec** and help you reason about it in chat. Today, this is an ingestion workflow — not a dedicated PRD authoring system or a built-in persistent work-item tracker. --- - -## How to Use - -Give Squad a product requirements document. The Lead agent breaks it into work items, assigns them to the team, and tracks progress. - +## What works today +Squad can: +- Read a PRD you paste into chat +- Read a PRD from a file path you provide +- Summarize requirements, risks, and open questions +- Suggest implementation slices, milestones, or issue breakdowns +There is **no special CLI command or router for PRD mode** in the current implementation. The feature works because you can point Squad at an existing document and ask for analysis. --- - -## How to Use - -Paste a PRD directly into the chat: - +## How to use it +Paste the PRD directly into chat: ``` -> Here's what we're building: +> Here's the PRD for the new onboarding flow: > -> [paste your PRD or spec] +> [paste the document] ``` - -Or reference a file: - +Or reference a file that already exists in the repo: ``` -> Read the PRD at docs/product-spec.md and break it into work items +> Read the PRD at docs/product-spec.md and call out the main requirements ``` - --- - -## How It Works - -1. **Lead decomposes the spec** into discrete work items (WI-1, WI-2, WI-3, etc.) -2. **Each work item gets a priority**: P0 (must-have), P1 (important), P2 (nice-to-have) -3. **Work items are assigned** to agents based on domain expertise -4. **Dependencies are tracked** — Squad won't start WI-4 if it depends on WI-2 finishing first -5. **Parallel work** happens where possible — independent work items run simultaneously - -### Example output - -``` -📋 PRD decomposed into 8 work items: - -WI-1 [P0] Set up project structure → Kane (Backend) -WI-2 [P0] Design database schema → Kane (Backend) -WI-3 [P0] Build component library → Dallas (Frontend) -WI-4 [P1] Implement auth endpoints → Kane (Backend) [depends: WI-2] -WI-5 [P1] Build login/signup UI → Dallas (Frontend) [depends: WI-3] -WI-6 [P1] Write auth integration tests → Lambert (Tester) [depends: WI-4] -WI-7 [P2] Add social login → Kane (Backend) [depends: WI-4] -WI-8 [P2] Build user profile page → Dallas (Frontend) [depends: WI-5] -``` - +## What to expect +A good PRD-ingestion session usually produces: +1. A summary of the product goals and constraints +2. A list of ambiguities or missing decisions +3. A suggested implementation breakdown you can turn into issues or milestones +4. Follow-up prompts for the lead, reviewers, or implementers +This is best thought of as **spec reading and decomposition assistance**. --- - -## Mid-Project PRD Updates - -Requirements change. When they do, give Squad the updated PRD: - -``` -> The PRD has been updated — re-read docs/product-spec.md -``` - -The Lead agent: - -1. Re-reads the PRD -2. Diffs against the existing work items -3. Adjusts the backlog — adds new items, re-prioritizes, or marks items as obsolete - -Work already completed isn't undone. Only the remaining backlog changes. - +## What it does not do today +Be careful not to over-read the feature: +- It does **not** provide a dedicated “write a PRD for me” product mode +- It does **not** maintain a first-class backlog with stored WI-1 / WI-2 style identifiers +- It does **not** persist dependency graphs or a PRD-specific board you can query later with commands like “show me the work items” +If you want durable tracking, use the decomposition output to create GitHub issues, project-board items, or other repo artifacts explicitly. --- - ## Tips - -- P0 work items are tackled first. Use priority levels to control sequencing. -- The Lead handles decomposition — you don't need to break down the spec yourself. -- Dependencies are respected automatically. You won't see an agent start on a dependent task before its prerequisite is done. -- Combine with [GitHub Issues Mode](github-issues.md) to create GitHub issues from work items. - +- Start from a real document — a PRD, spec, RFC, or issue write-up already in the repo. +- Ask for gaps and risks, not just summaries. +- After Squad suggests slices, promote the useful ones into GitHub issues or project-board items. +- Combine with [GitHub Issues Mode](github-issues.md) when you want the decomposition turned into tracked work. ## Sample Prompts - ``` -read the PRD at docs/product-spec.md and break it into work items +read the PRD at docs/product-spec.md and summarize the key requirements ``` - -Ingests a product requirements document and creates a prioritized, dependency-tracked backlog. - +Ingests an existing PRD and produces a concise requirements summary. ``` -show me the work items +read docs/product-spec.md and list open questions, risks, and assumptions ``` - -Displays the current backlog with priorities, assignments, and dependencies. - +Finds the places where the spec is incomplete or needs decisions. ``` -the PRD has been updated — re-read docs/product-spec.md +read docs/product-spec.md and suggest an implementation breakdown ``` - -Re-ingests the PRD and adjusts the backlog based on changes without undoing completed work. - +Proposes slices you can convert into issues, milestones, or workstreams. ``` -start working on approved P0 items +the PRD changed — re-read docs/product-spec.md and tell me what changed materially ``` - -Begins parallel execution of all high-priority work items with no blockers. - -``` -which work items are blocked right now? -``` - -Shows which tasks are waiting on dependencies or other blocking conditions. +Re-ingests the updated document and highlights changes in the requirements. diff --git a/docs/src/content/docs/features/preset.md b/docs/src/content/docs/features/preset.md index e39d7d3c8..cb70ca74a 100644 --- a/docs/src/content/docs/features/preset.md +++ b/docs/src/content/docs/features/preset.md @@ -2,31 +2,20 @@ title: Presets — Curated Agent Collections description: Save, share, and apply pre-configured agent rosters across projects with squad preset commands. --- - # Presets — Curated Agent Collections - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - **Try this to see available presets:** ```bash squad preset list ``` - **Try this to apply a preset to a new project:** ```bash squad init --preset backend-team ``` - Presets are reusable, named bundles of agent charters you can apply to any squad. Built-in presets ship with Squad; you can save your own from any project's current agents and (optionally) sync them across machines via a private GitHub repo. - Each preset is a directory at `~/.squad/presets//` containing a `preset.json` manifest and `agents/` charter files. The preset name is the directory name. - --- - ## What presets capture - Presets capture **agents only** (charters). For full squad snapshots including casting state, skills, routing rules, and decisions — for example to share a configured squad or publish to an agent toolbox — use [`squad export`](/squad/docs/features/export-import/) instead. - | Captured by preset | NOT captured by preset | |---|---| | Agent charters (role, expertise, prompt style) | Casting state (registry, history) | @@ -35,133 +24,86 @@ Presets capture **agents only** (charters). For full squad snapshots including c | | Skills (`.copilot/skills/`) | | | Ceremonies | | | Memory (`.squad/memory/`) | - This split is intentional. Presets are about the **shape** of a team. Skills, decisions, and history are about the **work** that team did. - --- - ## Commands - ### `squad preset list` - Show every preset available in your squad home (`~/.squad/presets/`): - ```bash $ squad preset list - Available Presets (3): - Name Agents Description ─────────────── ────── ──────────────────────────────────────── backend-team 4 Backend-focused squad: lead, API, DB, QA full-stack 6 Full-stack web app: lead, FE, BE, design, QA, devops data-engineering 4 Data pipeline squad: lead, ETL, ML, QA ``` - If no presets directory exists yet, you'll be prompted to run `squad preset init`. - ### `squad preset show ` - Inspect a preset's manifest and agents before applying: - ```bash $ squad preset show backend-team - backend-team v1.0.0 Backend-focused squad: lead, API, DB, QA Author: bradygaster Tags: backend, api - Agents (4): • lead (Lead) — owns architecture, scope, and code review • api (Backend Dev) — REST endpoints, validation, error handling • db (Database Engineer) — schema, migrations, query optimization • qa (Tester) — test coverage, edge cases, CI/CD ``` - ### `squad preset apply [--force]` - Install the preset's agents into the current squad. The current directory must be a Squad project (have a `.squad/` directory). - ```bash squad preset apply backend-team ``` - This copies the preset's `agents/` directory into your project's `.squad/agents/`. Existing agents with the same names are **NOT** overwritten unless you pass `--force`. - ### `squad preset save [--force] [--description "..."]` - Save your current project's agents as a new preset: - ```bash squad preset save my-team --description "My favorite roster for greenfield projects" ``` - This snapshots the agents from the current squad's `.squad/agents/` directory into `~/.squad/presets/my-team/` along with a `preset.json` manifest. Pass `--force` to overwrite an existing preset of the same name. - ### `squad preset init [--remote]` - Initialize the presets directory in your squad home (`~/.squad/presets/`). - - **Local-only** (`squad preset init`) — creates the directory and seeds the built-in presets - **Remote-synced** (`squad preset init --remote`) — creates the directory, seeds built-ins, AND creates a private GitHub repo (`{your-gh-user}/squad-home`) backing the directory so presets sync across machines - Requirements for `--remote`: - GitHub CLI (`gh`) installed and authenticated (`gh auth login`) - Permission to create private repos - On a second machine, run `squad preset init --remote` again and it will detect and clone your existing `squad-home` repo automatically. - --- - ## Applying a preset at init time - The most common usage — bootstrap a new project with a preset team: - ```bash mkdir my-new-project cd my-new-project git init squad init --preset backend-team ``` - This creates the standard `.squad/` scaffold AND applies the preset's agents. If `~/.squad/presets/` doesn't exist yet, `squad init` auto-runs `squad preset init` first to seed the built-in presets. - --- - ## Cross-machine workflow - Use the remote-backed setup if you want presets to follow you to new machines or shared dev environments: - ```bash # Machine A — initial setup squad preset init --remote squad preset save my-team --description "My standard team" - # Machine B — same user, same GitHub account squad preset init --remote # detects existing squad-home repo, clones it squad preset list # my-team is already here ``` - The remote repo lives at `https://github.com//squad-home` (private by default). - --- - ## Sharing presets between users - Today, the simplest path to share a preset with someone else is: - 1. They run `squad preset init --remote` to set up their own squad home 2. You manually copy the preset directory across (or clone yours, copy the directory in, push) - A formal "publish/install from another user's repo" flow is on the roadmap but not in v0.10. - For collaborative team rosters that go beyond just agents (skills, decisions, routing), use [`squad export`](/squad/docs/features/export-import/) instead. - --- - ## What's in a preset directory - ``` ~/.squad/presets/backend-team/ ├── preset.json # manifest (name, version, description, tags, agents list) @@ -175,9 +117,7 @@ For collaborative team rosters that go beyond just agents (skills, decisions, ro └── qa/ └── charter.md ``` - The `preset.json` manifest format: - ```json { "name": "backend-team", @@ -193,13 +133,9 @@ The `preset.json` manifest format: ] } ``` - You can hand-edit this file to refine descriptions or add/remove agents — but the corresponding `agents//charter.md` files must match. - --- - ## See also - - [Export & Import](/squad/docs/features/export-import/) — full squad snapshots including state - [Skills](/squad/docs/features/skills/) — earned knowledge (separate from presets) - [Plugin Marketplace](/squad/docs/features/plugins/) — community-curated bundles diff --git a/docs/src/content/docs/features/project-boards.md b/docs/src/content/docs/features/project-boards.md index 1970e33dc..49696896b 100644 --- a/docs/src/content/docs/features/project-boards.md +++ b/docs/src/content/docs/features/project-boards.md @@ -1,52 +1,32 @@ # Project Boards - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to visualize workflow tracking:** ``` Create a project board for v0.5.0 with columns for each workflow stage ``` - **Try this to link issues to a board:** ``` Sync issue #42 to the project board ``` - Squad integrates with GitHub Projects V2 for visual workflow tracking. Labels are the source of truth, boards are one-way projections that visualize the state machine. - --- - ## Prerequisites - GitHub Projects V2 access requires the `project` scope: - ```bash gh auth refresh -s project ``` - Verify: - ```bash gh auth status ``` - You should see `✓ Token scopes: repo, project, workflow` (or similar). - ## How It Works - Squad treats labels as the state machine and boards as a **read-mostly visualization**: - 1. **Labels drive state** — Issue gets `go:yes` + `squad:fenster` → state changes to "In Progress". 2. **Board updates** — Squad syncs label changes to the project board automatically. 3. **Board changes propagate** — If you drag an issue to "Done" on the board, Squad applies the corresponding label (`status:done`). - Labels are authoritative. Boards reflect labels, not the other way around. - ## Board Structure - Squad's default board has 5 columns matching issue lifecycle: - | Column | Label State | Description | |--------|-------------|-------------| | **Backlog** | `go:no` or `release:backlog` | Not approved or deferred | @@ -54,47 +34,32 @@ Squad's default board has 5 columns matching issue lifecycle: | **Ready** | `go:yes`, no `squad:*` | Approved, awaiting assignment | | **In Progress** | `go:yes` + `squad:{member}` | Agent actively working | | **Done** | Issue closed | Completed and merged | - ## Creating a Board - > "Create a project board for this repository" - Squad runs: - ```bash gh project create --owner {org} --title "Squad Board" --format "Board" ``` - Then adds the 5 default columns and syncs all existing issues based on their labels. - ## Syncing Labels to Board - Squad's `sync-board.yml` workflow runs: - **On label change** — Issue labeled `go:yes` → moves to "Ready" column - **On issue close** → moves to "Done" column - **On PR merge** → linked issue moves to "Done" - ### Manual Sync - ```bash gh project item-list --owner {org} --project {project-id} # For each item, check label state and update column ``` - ## Board-to-Label Sync - When you manually move an issue on the board: - 1. **Board webhook triggers** — GitHub sends `projects_v2_item.moved` event 2. **Squad workflow runs** — Reads new column, infers label change 3. **Labels update** — Applies appropriate `go:*`, `squad:*`, or `status:*` label - Example: - Drag issue from "Backlog" to "Ready" → Squad applies `go:yes` - Drag issue from "Ready" to "In Progress" → Squad prompts: "Assign to which member?" then applies `squad:{member}` - ## Board CLI Commands - | Command | What it does | |---------|--------------| | `gh project list --owner {org}` | List all projects in org/repo | @@ -102,55 +67,40 @@ Example: | `gh project item-add {id} --url {issue-url}` | Add issue to board | | `gh project item-delete {id} --item-id {item}` | Remove issue from board | | `gh project field-list {id}` | List custom fields (Status, Priority, etc.) | - **Note:** `gh project` uses GraphQL, not REST. All operations are against the Projects V2 API. - ## Custom Fields - You can add custom fields to the board (Assignee, Priority, Release): - ```bash gh project field-create {id} --name "Priority" --data-type "SINGLE_SELECT" --options "P0,P1,P2" ``` - Squad syncs these from labels: - `priority:p0` → Board "Priority" field = "P0" - `release:v0.4.0` → Board "Release" field = "v0.4.0" - ## Current Status - GitHub Projects V2 integration is **planned for v0.4.0**. Current capabilities: - - ✅ Label-based state machine (fully implemented) - ✅ CLI access via `gh project` (prerequisite met) - 🚧 Automated board sync workflows (in development) - 🚧 Bidirectional sync (board → labels) (in development) - ❌ Custom field mapping (not yet implemented) - You can manually use `gh project` commands now. Full automation arrives in v0.4.0. - ## Sample Prompts - ``` Create a project board for Squad work ``` Initializes a new GitHub Projects V2 board with default columns and syncs existing issues. - ``` Move issue #42 to In Progress ``` Updates board column and applies `squad:{member}` label (prompts for member if not set). - ``` Sync all issues to the project board ``` Re-scans all open issues, updates board columns based on current label state. - ``` Add a custom Priority field to the board ``` Creates a custom field on the project board and maps it to `priority:*` labels. - ``` Show me the board status — how many issues in each column? ``` diff --git a/docs/src/content/docs/features/ralph.md b/docs/src/content/docs/features/ralph.md index 3aefacca2..482719ff0 100644 --- a/docs/src/content/docs/features/ralph.md +++ b/docs/src/content/docs/features/ralph.md @@ -1,53 +1,35 @@ # Ralph — Work Monitor - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to see active work:** ``` Ralph, show me what everyone is working on ``` - **Try this to identify blockers:** ``` Ralph, what's blocking progress on issue #42? ``` - **Try this to auto-assign work:** ``` Ralph, assign the next high-priority issue ``` - Ralph tracks the work queue, monitors CI status, and ensures the team never sits idle when there's work to do. He's always on the roster and requires GitHub CLI access. - --- - ## What Ralph Does - Ralph is a built-in squad member whose job is keeping tabs on work. Like Scribe tracks decisions, **Ralph tracks and drives the work queue**. He's always on the roster — not cast from a universe — and has one job: make sure the team never sits idle when there's work to do. - Ralph uses intelligent routing to match work to the right agent. Rather than simple keyword matching against role titles, Ralph reads `.squad/routing.md` — your team's work-type definitions and module ownership — to make smart triage and dispatch decisions. This is the same intelligence the in-session coordinator uses. - ## Prerequisites - Ralph requires access to GitHub Issues and Pull Requests via the `gh` CLI. **A GitHub PAT (Personal Access Token) with Classic scope is required.** - ### Why PAT Classic? - The default `GITHUB_TOKEN` provided by Copilot does not have sufficient scopes to read and write GitHub Issues and PRs. Ralph needs to: - List and read issues - Create and update issue labels and assignments - Read and interact with pull requests - Report on CI status - ### Setup - 1. **Create a PAT Classic token:** - Go to https://github.com/settings/tokens - Click "Generate new token (classic)" - Select scopes: `repo` and `project` (full access to repositories and projects) - Copy the token - 2. **Authenticate with `gh`:** ```bash gh auth login @@ -56,71 +38,49 @@ The default `GITHUB_TOKEN` provided by Copilot does not have sufficient scopes t - Select "HTTPS" for protocol - When asked "Authenticate Git with your GitHub credentials?", answer "Yes" - Choose "Paste an authentication token" and paste your PAT Classic token - 3. **Verify authentication:** ```bash gh auth status ``` - Once authenticated, Ralph can monitor your repository's issues and PRs. - ## How It Works - -Once activated, Ralph continuously checks for pending work — open issues, draft PRs, review feedback, CI failures — and keeps the squad moving through the backlog without manual nudges. Ralph's behavior is built on three layers: in-session coordinator, watch mode for local polling, and cloud heartbeat for fully unattended monitoring. - +Once activated, Ralph continuously checks for pending work — open issues, draft PRs, review feedback, CI failures — and keeps the squad moving through the backlog without manual nudges. Ralph's behavior is built on three layers: in-session coordinator, watch mode for local polling, and cloud heartbeat for event-driven monitoring. ### Routing-Aware Triage - Ralph doesn't rely on dumb keyword matching. He reads your `.squad/routing.md` file to understand: - **Work types** — categories like "Core runtime", "Docs & messaging", "Tests & quality" - **Agent assignments** — which agent owns each domain - **Module ownership** — which files belong to which agent (e.g., `src/hooks/` → Baer) - When triaging an issue, Ralph uses this priority order: 1. **Module path match** — If the issue mentions a file in `src/hooks/`, it routes to Baer (primary owner) 2. **Routing rule keywords** — If the issue mentions "docs" or "messaging", Ralph looks up those work types and assigns the matching agent (McManus for "Docs & messaging") 3. **Role keywords** — If no module or routing rule matches, Ralph scans the issue for role titles ("test", "security", "performance") 4. **Lead fallback** — If still no match, escalate to the team Lead for manual review This ensures Ralph makes intelligent decisions based on your team's actual structure, not generic heuristics. - ### In-Session (Copilot Chat) - When you're in a Copilot session, Ralph self-chains the coordinator's work loop: - 1. Agents complete a batch of work 2. Ralph checks GitHub for more: untriaged issues, assigned-but-unstarted items, draft PRs, failing CI 3. Work found → triage, assign, spawn agents 4. Results collected → Ralph checks again **immediately** — no pause, no asking permission 5. Board clear → Ralph idles (use `squad watch` for persistent polling) - -**Ralph never stops on his own while work remains.** He keeps cycling through the backlog until every issue is closed, every PR is merged, and CI is green. When the board clears, Ralph idles — run `squad watch` in a separate terminal for persistent polling, or use the cloud heartbeat for fully unattended monitoring. The only things that stop Ralph's active loop: the board is clear, you say "idle"/"stop", or the session ends. - +**Ralph never stops on his own while work remains.** He keeps cycling through the backlog until every issue is closed, every PR is merged, and CI is green. When the board clears, Ralph idles — run `squad watch` in a separate terminal for persistent polling, or use the cloud heartbeat for event-driven monitoring. The only things that stop Ralph's active loop: the board is clear, you say "idle"/"stop", or the session ends. ### Between Sessions (GitHub Actions Heartbeat) - When no one is at the keyboard, the `squad-heartbeat.yml` workflow runs on event-based triggers (issue close, PR merge, manual dispatch). It: - - Finds untriaged `squad`-labeled issues - Auto-triages based on your routing.md — matching issues to the right agent by work type and module ownership - Assigns `squad:{member}` labels -- For `@copilot` (if enabled with auto-assign): assigns `copilot-swe-agent[bot]` so the coding agent picks up work autonomously - -This creates a fully autonomous loop for `@copilot` — heartbeat triages → assigns → agent works → issue closed → heartbeat finds next issue → repeat. For continuous periodic monitoring, use `squad watch` locally. - +- For `@copilot` (if enabled with auto-assign): assigns `copilot-swe-agent[bot]` so the coding agent picks up work in the background +This creates a background loop for `@copilot` — heartbeat triages → assigns → agent works → issue closed → heartbeat finds next issue → repeat. For continuous periodic monitoring, use `squad watch` locally. ### Work-in-Progress Monitoring - Ralph doesn't just dispatch work and forget about it. Once an issue is assigned or a PR is created, Ralph **watches the work** — tracking its lifecycle from assigned → PR created → review requested → CI running → approved → merged. Each completed step triggers a re-scan: - - **Assigned but no PR**: Ralph checks if the assigned agent has started work - **PR created**: Ralph monitors for review feedback and CI status - **Changes requested**: Ralph routes the feedback back to the author agent - **CI passing**: Ralph marks as ready to merge - **PR merged**: Ralph closes the corresponding issue and picks up the next work item - This continuous watch prevents work from getting stuck in intermediate states — Ralph catches stalled PRs, failed CI, and review bottlenecks automatically. - ### Board State - Ralph maintains an internal view of the work board. Work items flow through these categories: - | Category | Meaning | Label(s) | |----------|---------|----------| | **Untriaged** | Issue has `squad` label but no `squad:{member}` assignment | `squad` only | @@ -131,31 +91,22 @@ Ralph maintains an internal view of the work board. Work items flow through thes | **CI Failure** | PR checks are failing | `squad:{member}` + `ci-failure` | | **Ready to Merge** | PR approved, all checks passing | `squad:{member}` + `approved` | | **Done** | PR merged, issue closed | *(removed from board)* | - Ralph uses these categories internally to decide what action to take next. When you ask for status, Ralph reports the current board state across all these categories. - ### What Wakes Ralph Up - Ralph monitors work at three different layers, each with different wake-up triggers: - **In-Session (Copilot Chat):** - Agent completes work → Ralph immediately checks for next item (no delay) - You say "Ralph, go" or "Ralph, status" → Ralph starts active loop - You say "Ralph, idle" → Ralph stops checking - **Watch Mode (`squad watch` CLI):** - Poll interval expires (default 10 min) → Ralph checks GitHub - You press Ctrl+C → Ralph stops - **Cloud Heartbeat (GitHub Actions events):** - Issue close event → Ralph checks for next item - PR merge event → Ralph checks for next item - Manual dispatch via GitHub Actions UI → Ralph checks GitHub - In all three layers, when Ralph wakes up, he scans the board, triages any untriaged items using routing.md, dispatches work to the right agent, watches in-flight items for progress, and reports results. - ## Talking to Ralph - | What you say | What happens | |---|---| | "Ralph, go" / "Ralph, start monitoring" | Activates the work-check loop | @@ -163,9 +114,7 @@ In all three layers, when Ralph wakes up, he scans the board, triages any untria | "Ralph, status" / "What's on the board?" | Runs one check cycle, reports results | | "Ralph, idle" / "Take a break" | Stops the loop | | "Ralph, scope: just issues" | Monitors only issues, skips PRs/CI | - ## What Ralph Monitors - | Category | Signal | Action | |---|---|---| | **Untriaged issues** | `squad` label, no `squad:{member}` label | Lead triages and assigns | @@ -174,54 +123,45 @@ In all three layers, when Ralph wakes up, he scans the board, triages any untria | **Review feedback** | Changes requested on PR | Route to author agent | | **CI failures** | PR checks failing | Notify agent to fix | | **Approved PRs** | Ready to merge | Merge and close issue | - ## Periodic Check-In - Ralph doesn't run silently forever. Every 3-5 rounds, Ralph reports and **keeps going**: - ``` 🔄 Ralph: Round 3 complete. ✅ 2 issues closed, 1 PR merged 📋 3 items remaining: #42, #45, PR #12 Continuing... (say "Ralph, idle" to stop) ``` - Ralph does **not** ask permission to continue — he keeps working. The only things that stop Ralph: the board is clear, you say "idle"/"stop", or the session ends. - ## Watch Mode (`squad watch`) - Ralph's in-session loop processes work while it exists, then idles. For **persistent polling** when you're away from the keyboard, run the `squad watch` command in a separate terminal. - ### Triage Mode (Default) - Basic usage — triage only, no execution: - ```bash squad watch # polls every 10 minutes (default) squad watch --interval 5 # polls every 5 minutes squad watch --interval 30 # polls every 30 minutes ``` - This runs as a standalone local process (not inside Copilot) that: - Checks GitHub every N minutes for untriaged squad work - Auto-triages issues based on team roles and keywords - Assigns @copilot to `squad:copilot` issues (if auto-assign is enabled) - Runs until Ctrl+C - ### Full Work Monitor Mode (`--execute`) - -Add `--execute` to transform Ralph from a triage bot into a full work monitor that spawns Copilot sessions and actually does the work: - +Add `--execute` to transform Ralph from a triage bot into a full work monitor that spawns Copilot sessions and moves approved work forward: ```bash squad watch --execute # basic work monitor squad watch --execute --interval 15 # check every 15 minutes squad watch --execute --max-concurrent 2 # work on 2 issues in parallel ``` - When `--execute` is enabled, Ralph spawns Copilot CLI sessions for actionable issues (assigned to a squad member, not blocked, not already assigned to a human). Squad automatically injects `--yolo --additional-mcp-config @.mcp.json` into every spawned Copilot invocation so that MCP tools are available in non-interactive (`-p`) mode — see [Copilot CLI MCP Trust Gate](./copilot-mcp-trust.md) for details. - +#### Custom instructions with `.squad/ralph-instructions.md` +If `.squad/ralph-instructions.md` exists, `squad watch --execute` tells the spawned Ralph session to read that file first and follow **all** of its sections. This is the escape hatch for repo-specific Ralph behavior. +Use it when you want Ralph to consistently apply local operating rules such as: +- preferred execution order or escalation rules +- custom branch/PR conventions +- repo-specific definitions of “blocked” or “actionable” +If the file is missing, Ralph falls back to the built-in execution prompt. **Example execution output:** - ``` 🔄 Ralph — Round 1 ━━━━━━━━━━━━━━━━━━━━━━━━━━━━ @@ -234,43 +174,31 @@ When `--execute` is enabled, Ralph spawns Copilot CLI sessions for actionable is ▶ [14:25:44] Executing #45 "Add retry logic" → gh copilot --message "Work on issue #45..." ✓ [14:28:20] #45 completed ``` - ### All Watch Flags - All new features are **opt-in** and disabled by default. Existing `squad watch` behavior is unchanged. - #### Execution Control - | Flag | Description | Example | |------|-------------|---------| | `--execute` | Enable work execution (spawn Copilot to work on issues) | `squad watch --execute` | | `--max-concurrent N` | Max parallel issues per round (default: 1) | `squad watch --execute --max-concurrent 3` | | `--timeout N` | Per-issue timeout in minutes (default: 30) | `squad watch --execute --timeout 45` | | `--copilot-flags "..."` | Pass extra flags to Copilot CLI | `squad watch --execute --copilot-flags "--model gpt-4"` | - #### Issue Scanning - | Flag | Description | Example | |------|-------------|---------| | `--two-pass` | Lightweight list → hydrate actionable only (saves API quota) | `squad watch --two-pass` | | `--wave-dispatch` | Parallel sub-task execution within issues (dependency-aware) | `squad watch --execute --wave-dispatch` | - #### Communication Bridges - | Flag | Description | Example | |------|-------------|---------| | `--monitor-teams` | Scan Teams for actionable messages each round (requires WorkIQ MCP) | `squad watch --monitor-teams` | | `--monitor-email` | Scan email for alerts and action items each round (requires WorkIQ MCP) | `squad watch --monitor-email` | - #### Project Board Lifecycle - | Flag | Description | Example | |------|-------------|---------| | `--board` | Enable project board lifecycle (In Progress / Done / Blocked + reconciliation) | `squad watch --board` | | `--board-project N` | Project board number (default: 1) | `squad watch --board --board-project 2` | - #### Housekeeping & Governance - | Flag | Description | Example | |------|-------------|---------| | `--notify-level LEVEL` | Control round reporting noise: `important` (default), `all`, `none` | `squad watch --notify-level important` | @@ -278,33 +206,25 @@ All new features are **opt-in** and disabled by default. Existing `squad watch` | `--decision-hygiene` | Auto-merge decision inbox when >5 files | `squad watch --decision-hygiene` | | `--cleanup` | Auto-clear scratch files, archive old logs (every 10 rounds) | `squad watch --cleanup` | | `--channel-routing` | Route notifications to specific Teams channels (requires `.squad/teams-channels.json`) | `squad watch --channel-routing` | - ### Common Workflows - **Basic triage + work execution:** ```bash squad watch --execute --interval 10 ``` - **Full monitor with all features:** ```bash squad watch --execute --board --two-pass --monitor-teams --retro --decision-hygiene --max-concurrent 2 --interval 15 ``` - **Cost-conscious (two-pass, lower concurrency):** ```bash squad watch --execute --two-pass --max-concurrent 1 --timeout 20 ``` - **Teams + email bridge only (no issue execution):** ```bash squad watch --monitor-teams --monitor-email --interval 5 ``` - ### Round Cycle (Full Monitor) - When all features are enabled, each round follows this cycle: - 1. **Self-pull**: `git fetch && git pull --ff-only` to stay current 2. **Scan**: Fetch open issues (two-pass if enabled) 3. **Triage**: Label untriaged issues based on routing rules @@ -313,23 +233,15 @@ When all features are enabled, each round follows this cycle: 6. **Monitor**: Scan Teams/email for new actionable items 7. **Housekeep**: Check for retro, merge decision inbox if needed 8. **Report**: Log round summary, sleep until next interval - ### Advanced: `--agent-cmd` (Hidden Flag) - For advanced users who know what they're doing: - ```bash squad watch --execute --agent-cmd "custom-agent-wrapper" ``` - This fully overrides the agent command. The default is `gh copilot --message ""` plus any `--copilot-flags`. Use this to plug in custom agent wrappers or alternative Copilot entry points. - ### Azure DevOps Support - Ralph supports Azure DevOps repos and work items via the SDK's PlatformAdapter. When your git remote points to `dev.azure.com` or `visualstudio.com`, Ralph auto-detects ADO — no flag needed. - **Setup:** - 1. Install Azure CLI: `az extension add --name azure-devops` 2. Authenticate: `az login` 3. Add ADO config to `.squad/config.json`: @@ -342,31 +254,24 @@ Ralph supports Azure DevOps repos and work items via the SDK's PlatformAdapter. } } ``` - **Usage:** ```bash squad watch # auto-detects from git remote squad watch --execute # full work monitor (auto-detects platform) ``` - **Key differences from GitHub:** - ADO uses **tags** instead of labels — `squad:data` becomes a tag on the work item - ADO uses `az boards` CLI instead of `gh` — Ralph checks `az` availability - ADO rate limiting is handled differently — the circuit breaker skips quota checks - ADO PRs don't expose `statusCheckRollup` — CI status columns may be empty - ### Three layers of Ralph - | Layer | When | How | |-------|------|-----| | **In-session** | You're at the keyboard | "Ralph, go" — active loop while work exists | | **Local watchdog** | You're away but machine is on | `squad watch --interval 10` (triage) or `squad watch --execute` (full monitor) | -| **Cloud heartbeat** | Fully unattended | `squad-heartbeat.yml` GitHub Actions events (issue close, PR merge, manual dispatch) | - +| **Cloud heartbeat** | Event-driven | `squad-heartbeat.yml` GitHub Actions events (issue close, PR merge, manual dispatch) | ## Ralph's Board View - When you ask for status: - ``` 🔄 Ralph — Work Monitor ━━━━━━━━━━━━━━━━━━━━━━ @@ -376,52 +281,35 @@ When you ask for status: 🟢 Ready: 1 PR approved, awaiting merge ✅ Done: 5 issues closed this session ``` - ## Heartbeat Workflow Setup - The heartbeat workflow (`squad-heartbeat.yml`) is automatically installed during `init` or `upgrade`. It runs: - - **On issue close**: Checks for next item in backlog - **On PR merge**: Checks for follow-up work - **On manual dispatch**: Trigger via GitHub Actions UI - For persistent polling when you're away, use `squad watch` locally — it polls at your chosen interval without consuming GitHub Actions minutes. - ## Notes - - Ralph is session-scoped — his state (active/idle, round count, stats) resets each session - Ralph appears on the roster like Scribe: `| Ralph | Work Monitor | — | 🔄 Monitor |` - Ralph is exempt from universe casting — always "Ralph" - The heartbeat workflow is the between-session complement to in-session Ralph - ## Sample Prompts - ``` Ralph, go — start monitoring and process the backlog until it's clear ``` - Activates Ralph's self-chaining work loop to continuously process all pending work. - ``` Ralph, status ``` - Runs a single check cycle and shows the current board state without activating the work loop. - ``` squad watch --interval 5 ``` - Starts persistent local polling — checks GitHub every 5 minutes for new squad work and triages automatically. - ``` Ralph, scope: just issues ``` - Configures Ralph to monitor only issues and skip PRs and CI status checks. - ``` Ralph, idle ``` - Fully stops Ralph's work loop and idle-watch polling until manually reactivated. diff --git a/docs/src/content/docs/features/rate-limiting.md b/docs/src/content/docs/features/rate-limiting.md index 516aef117..106ff90d8 100644 --- a/docs/src/content/docs/features/rate-limiting.md +++ b/docs/src/content/docs/features/rate-limiting.md @@ -1,76 +1,50 @@ ---- -title: Rate Limiting -description: Cooperative rate limiting with a predictive circuit breaker that pauses before hitting API limits. -order: 36 ---- - -# Rate Limiting - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -**Try this to check rate limit status:** -``` -What's our current API rate limit headroom? -``` - -**Try this to adjust pacing:** -``` -Slow down — we're hitting rate limits on the LLM -``` - -Squad monitors API rate limit headroom in real time and pauses work before limits are reached — not after. This prevents cascading failures across concurrent agents. - ---- - -## How It Works - -Squad tracks the rate limit headers returned by every API call. Before dispatching the next request, it checks remaining headroom against a configurable threshold. If headroom is below the threshold, it pauses and waits for the window to reset. - -This is cooperative: agents yield voluntarily rather than hammering the API and hitting hard errors. - -## The RAAS Traffic-Light Pattern - -Squad uses a three-state model for rate limit health: - -| State | Meaning | Behavior | -|-------|---------|----------| -| 🟢 Green | Headroom is healthy | Proceed normally | -| 🟡 Amber | Headroom is low (below threshold) | Slow down, reduce concurrency | -| 🔴 Red | At or near limit | Pause all requests, wait for reset | - -The system transitions between states automatically as headroom changes. You do not need to configure thresholds manually — defaults are tuned for typical LLM API quotas. - -## When It Engages - -Rate limiting engages when: - -- Remaining requests in the current window drop below ~20% of the quota -- A `429 Too Many Requests` response is received (reactive fallback) -- Concurrent agent count is high and projected usage exceeds headroom - -## Recovery Behavior - -When the circuit is paused (🔴 Red): - -1. All pending requests queue in memory. -2. Squad polls the rate limit reset timestamp from the API response headers. -3. At reset, Squad resumes from the queue — oldest requests first. -4. State transitions back to 🟢 Green automatically. - -No work is dropped. Queued tasks resume without requiring user intervention. - -## Concurrency and Pacing - -In Amber state, Squad reduces the number of agents dispatching simultaneously. This distributes the remaining quota across a longer window rather than exhausting it instantly. - -``` -Green: all agents active, full concurrency -Amber: concurrency capped at 50% of normal -Red: all requests paused until reset -``` - -## See Also - -- [Model Selection](model-selection.md) — economy mode for cost and rate limit management -- [Parallel Execution](parallel-execution.md) — how concurrent agents share API quota -- [Cost Tracking](cost-tracking.md) — monitor spend alongside rate limit usage +--- +title: Rate Limiting +description: Cooperative rate limiting with a predictive circuit breaker that pauses before hitting API limits. +order: 36 +--- +# Rate Limiting +**Try this to check rate limit status:** +``` +What's our current API rate limit headroom? +``` +**Try this to adjust pacing:** +``` +Slow down — we're hitting rate limits on the LLM +``` +Squad monitors API rate limit headroom in real time and pauses work before limits are reached — not after. This prevents cascading failures across concurrent agents. +--- +## How It Works +Squad tracks the rate limit headers returned by every API call. Before dispatching the next request, it checks remaining headroom against a configurable threshold. If headroom is below the threshold, it pauses and waits for the window to reset. +This is cooperative: agents yield voluntarily rather than hammering the API and hitting hard errors. +## The RAAS Traffic-Light Pattern +Squad uses a three-state model for rate limit health: +| State | Meaning | Behavior | +|-------|---------|----------| +| 🟢 Green | Headroom is healthy | Proceed normally | +| 🟡 Amber | Headroom is low (below threshold) | Slow down, reduce concurrency | +| 🔴 Red | At or near limit | Pause all requests, wait for reset | +The system transitions between states automatically as headroom changes. You do not need to configure thresholds manually — defaults are tuned for typical LLM API quotas. +## When It Engages +Rate limiting engages when: +- Remaining requests in the current window drop below ~20% of the quota +- A `429 Too Many Requests` response is received (reactive fallback) +- Concurrent agent count is high and projected usage exceeds headroom +## Recovery Behavior +When the circuit is paused (🔴 Red): +1. All pending requests queue in memory. +2. Squad polls the rate limit reset timestamp from the API response headers. +3. At reset, Squad resumes from the queue — oldest requests first. +4. State transitions back to 🟢 Green automatically. +No work is dropped. Queued tasks resume without requiring user intervention. +## Concurrency and Pacing +In Amber state, Squad reduces the number of agents dispatching simultaneously. This distributes the remaining quota across a longer window rather than exhausting it instantly. +``` +Green: all agents active, full concurrency +Amber: concurrency capped at 50% of normal +Red: all requests paused until reset +``` +## See Also +- [Model Selection](model-selection.md) — economy mode for cost and rate limit management +- [Parallel Execution](parallel-execution.md) — how concurrent agents share API quota +- [Cost Tracking](cost-tracking.md) — monitor spend alongside rate limit usage diff --git a/docs/src/content/docs/features/reflect.md b/docs/src/content/docs/features/reflect.md index a23c2e9bc..459604e2b 100644 --- a/docs/src/content/docs/features/reflect.md +++ b/docs/src/content/docs/features/reflect.md @@ -2,69 +2,46 @@ title: Reflect — In-Session Learning Capture description: Built-in skill that extracts HIGH/MED/LOW confidence patterns from conversations to prevent repeating mistakes and reinforce successful patterns. --- - # Reflect — In-Session Learning Capture - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - The `reflect` skill is a built-in capability that turns every user correction into a learning opportunity. Agents invoke `reflect` after critical conversation moments — corrections, praise, edge-case discoveries — to capture patterns that prevent repeating mistakes across sessions. - It ships at `.copilot/skills/reflect/SKILL.md` and is automatically available to every spawned agent. The skill complements the existing knowledge layers (`history.md`, `decisions.md`) by capturing **in-flight** learnings that may later graduate to permanent memory. - --- - ## How it fits the memory architecture - Squad has three layers for what agents know: - | Layer | Lifetime | Audience | Reflect's relationship | |-------|----------|----------|------------------------| | `.squad/agents/{name}/history.md` | Permanent | Owner agent + Scribe-propagated cross-updates | Reflect captures candidates; HIGH-confidence ones graduate here | | `.squad/decisions.md` | Permanent | All agents | Reflect surfaces candidates; lead promotes after review | | `reflect` skill | In-flight | Captured during the active session | Working memory for patterns not yet ready to commit | - Workflow: 1. During the session, agents invoke `reflect` to capture learnings 2. At session end, the agent or Scribe reviews captured learnings 3. HIGH-confidence patterns → lead reviews for `decisions.md` promotion 4. Agent-specific patterns → `{agent}/history.md` append - --- - ## Triggers — when to invoke reflect - ### 🔴 HIGH Priority (invoke immediately) - | Trigger | Example phrase | Why critical | |---------|---------------|--------------| | User correction | *"no"*, *"wrong"*, *"not like that"*, *"never do"* | Captures mistakes to prevent repetition | | Architectural insight | *"you removed that without understanding why"* | Documents the *why* behind a design (Chesterton's Fence) | | Immediate fixes | *"debug"*, *"root cause"*, *"fix all"* | Learns from errors in real-time | - ### 🟡 MEDIUM Priority (invoke after multiple instances) - | Trigger | Example phrase | Why important | |---------|---------------|--------------| | User praise | *"perfect"*, *"exactly"*, *"great"* | Reinforces successful patterns | | Tool preferences | *"use X instead of Y"*, *"prefer"* | Builds workflow preferences | | Edge cases | *"what if X happens?"*, *"don't forget"*, *"ensure"* | Captures scenarios to handle | - ### 🟢 LOW Priority (invoke at natural breakpoints) - | Trigger | Example phrase | Why useful | |---------|---------------|--------------| | Workflow refinements | *"better if you..."*, *"next time"* | Iterative improvement | | Style preferences | *"prefer this format"*, *"like this approach"* | Personal style learning | - --- - ## Capture format - Reflect produces structured entries the lead or Scribe can review at session end: - ```markdown ## Reflection — 2026-06-11T16:42:00Z - **Trigger:** User correction — "no, never auto-merge without explicit approval" **Confidence:** HIGH **Pattern:** Auto-merge gating @@ -72,20 +49,14 @@ Reflect produces structured entries the lead or Scribe can review at session end **Promote to:** `decisions.md` (team-wide rule) — surface to lead next ceremony **Cited:** Coordinator session 2026-06-11, user message ~16:41 ``` - --- - ## Anti-patterns - - **Don't capture every interaction.** Reflect is for inflection points — corrections, surprises, breakthroughs. A capture rate >1 per ~10 messages is too high. - **Don't promote LOW-confidence patterns to decisions.md.** Decisions are binding for the whole team; LOW captures are personal preferences and should live in the agent's `history.md` if anywhere. - **Don't reflect on user instructions you already executed correctly.** That's not learning, that's logging. - **Don't paraphrase the user's words when capturing HIGH-priority items.** Verbatim quotes preserve nuance. - --- - ## See also - - [Memory & Knowledge](/squad/docs/concepts/memory-and-knowledge/) — the three-layer model - [Directives](/squad/docs/features/directives/) — how the coordinator captures explicit team rules - [Error Recovery](/squad/docs/features/error-recovery/) — the companion skill for handling failures diff --git a/docs/src/content/docs/features/remote-control.md b/docs/src/content/docs/features/remote-control.md deleted file mode 100644 index 47ff15d6f..000000000 --- a/docs/src/content/docs/features/remote-control.md +++ /dev/null @@ -1,326 +0,0 @@ -# Squad Remote Control - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -Control Copilot CLI from your phone via a secure WebSocket tunnel. Perfect for demos, pairing on mobile, or monitoring runs from anywhere. - -```bash -squad start --tunnel -# Shows QR code → scan with phone → terminal appears in browser -``` - ---- - -## What It Does - -`squad start` spawns Copilot CLI in a pseudo-terminal (PTY) and mirrors output to your phone in real-time via: - -1. **PTY** — Copilot runs in a full interactive terminal -2. **WebSocket server** — Terminal I/O streams live via WebSocket -3. **devtunnel** — Secure public URL with authentication (optional) -4. **Phone browser** — xterm.js terminal renders on your phone - -Architecture diagram: - -``` -[Copilot CLI in PTY] - ↓ (terminal output/input) -[WebSocket Server] - ↓ (bidirectional) -[devtunnel] (optional, provides public URL) - ↓ (HTTPS + private auth) -[Phone Browser (xterm.js)] - ↓ (mobile keyboard shortcuts, replay buffer) -[Your Phone] -``` - ---- - -## Prerequisites - -### Required - -- **devtunnel CLI** (for `--tunnel` mode) - ```bash - # Windows (winget) - winget install Microsoft.devtunnel - - # macOS (Homebrew) - brew install devtunnel - - # Or via GitHub releases - # https://github.com/microsoft/devtunnel/releases - ``` - -- **devtunnel authentication** (required before first use) - ```bash - devtunnel user login - # Browser opens → authenticate → success - ``` - -### Optional - -- **Node.js 18+** (for CLI) -- **Modern browser** on phone (iOS Safari, Chrome, Firefox) - ---- - -## Usage Examples - -### Basic: Local PTY Terminal - -No tunnel, no phone access — just run Copilot in a PTY: - -```bash -squad start -# Output: Started PTY terminal (PID: 12345) -# Copilot running locally -``` - -### With Phone Access (devtunnel + QR) - -Create a tunnel, show QR code, let your phone scan and connect: - -```bash -squad start --tunnel -# Output: Started devtunnel session -# Session ID: abc123xyz -# QR Code: [████████████████] -# URL: https://abc123xyz-dev.devtunnels.ms -# -# Tap or scan QR on your phone → terminal appears -``` - -Scan the QR code with your phone camera. Opens browser → terminal renders with xterm.js. - -### Custom Port - -Specify the WebSocket server port: - -```bash -squad start --port 3456 -# Output: WebSocket listening on localhost:3456 -# Access via: ws://localhost:3456 -``` - -### Custom Command - -Run a different shell or program instead of copilot: - -```bash -squad start --tunnel --command powershell -squad start --tunnel --command "python" -squad start --tunnel --command "bash -i" -``` - -### Pass Copilot Flags Through - -All flags after `--tunnel` pass to copilot: - -```bash -squad start --tunnel --yolo -squad start --tunnel --model gpt-4 -squad start --tunnel --no-config -``` - ---- - -## Security Model - -Remote access has **7 layers** of security: - -### 1. **devtunnel Private Auth** -- URL requires `Authorization` header (devtunnel access token) -- Tunnel is private by default — no public discovery - -### 2. **Session Token (UUID, 4-hour TTL)** -- Each session gets a unique token, valid for 4 hours -- Token embedded in QR code or shown as connection string -- Expires automatically - -### 3. **Ticket-Based WebSocket Auth** -- First request exchanges session token for single-use ticket -- Ticket valid 60 seconds, single use only -- Prevents token replay attacks - -### 4. **HTTP Rate Limiting** -- 30 requests per minute per IP address -- Blocks brute-force connection attempts -- Rate limit resets hourly - -### 5. **Environment Variable Blocklist** -- 27 common secret patterns redacted from output -- Blocks: `PASSWORD`, `TOKEN`, `SECRET`, `KEY`, `AWS_`, `GITHUB_`, `API_KEY`, etc. -- ANSI escape sequences cannot bypass redaction - -### 6. **Secret Redaction (27 Patterns + ANSI Bypass Prevention)** -- Secrets matching patterns replaced with `[REDACTED]` -- ANSI codes cannot hide redaction logic -- Example: `Password=mysecret123` → `Password=[REDACTED]` - -### 7. **Connection Limits** -- **Global:** Max 5 concurrent phone connections per session -- **Per IP:** Max 2 concurrent connections per IP address -- Excess connections rejected with 429 (Too Many Requests) - ---- - -## Mobile Keyboard - -When your phone connects, a key bar appears below the terminal: - -| Key | Action | -|-----|--------| -| **↑** / **↓** | Scroll history / scroll terminal output | -| **←** / **→** | Move cursor left / right | -| **Tab** | Insert tab character (or autocomplete if supported) | -| **Enter** | Send command / newline | -| **Esc** | Send Escape key (menu mode, cancel) | -| **Ctrl+C** | Send interrupt signal (SIGINT) — kills running command | -| **Space** | Insert space | -| **⌫** | Backspace / delete | - ---- - -## Replay Buffer - -When a new phone joins the session: - -1. **Terminal history is replayed** — joins don't see a blank screen -2. **Replay window** — last 1000 lines of terminal output -3. **Scrollback included** — can scroll to see previous commands - -This means late-joiners see context, not blank canvas. - ---- - -## Session Dashboard - -List and manage active devtunnel sessions: - -```bash -squad start --list-sessions -# Output: -# Session 1: abc123xyz (2 phones connected, 1h 23m running) -# Session 2: def456uvw (0 phones, 2m running) -# Session 3: ghi789rst (1 phone, idle) -``` - -Kill a session: - -```bash -squad start --kill-session abc123xyz -# Output: Session closed. Remaining: 2 -``` - ---- - -## Architecture Notes - -### PTY-Only Mode - -Remote Control runs in **PTY-only mode** — no Copilot ACP (Agent Control Protocol) messages flow through the WebSocket. The terminal is a **mirror**, not a command channel: - -- Terminal I/O (text, control codes) ↔ WebSocket -- ACP protocol stays local to the Copilot process -- No agent instructions flow through the tunnel - -This design keeps the tunnel stateless and reduces surface area. - ---- - -## Audit Logging - -All connections, authentication, and security events are logged: - -```bash -~/.cli-tunnel/audit/squad-audit-2025-01-15.jsonl -``` - -Each line is a JSON object: - -```json -{ - "timestamp": "2025-01-15T10:23:45.123Z", - "event": "connection", - "session_id": "abc123xyz", - "phone_ip": "203.0.113.42", - "status": "authenticated" -} -``` - -Events logged: -- `connection` — Phone connected -- `disconnection` — Phone disconnected -- `auth_failure` — Token/ticket validation failed -- `rate_limit` — Rate limit exceeded -- `redaction` — Secret pattern matched and redacted -- `command` — Command executed (summary, no args) - -Rotate daily, keep 30 days by default. - ---- - -## Troubleshooting - -### "devtunnel not found" - -Install devtunnel: - -```bash -winget install Microsoft.devtunnel -``` - -Or check `PATH`: - -```bash -where devtunnel -# Should show path to executable -``` - -### "Not authenticated to devtunnel" - -Log in: - -```bash -devtunnel user login -``` - -### Phone doesn't connect (QR code error) - -1. Check QR code isn't expired (valid for 5 minutes) -2. Verify phone is on same network or has internet -3. Try manual URL instead of QR: - ```bash - # Copy URL from terminal and paste in phone browser - https://abc123xyz-dev.devtunnels.ms - ``` - -### Terminal freezes - -This is typically Copilot waiting for input. Type a command or press Enter: - -``` -squad > [CURSOR BLINKING] -``` - -Press Enter to see the prompt. - -### Audit logs missing - -Ensure `~/.cli-tunnel/` directory exists: - -```bash -mkdir -p ~/.cli-tunnel/audit -``` - -Logs are created on first event. - ---- - -## See Also - -- [CLI Reference](../reference/cli.md) — All commands -- [Getting Started](../get-started/installation.md) — Squad setup -- [VS Code Integration](./vscode.md) — Remote Control in VS Code diff --git a/docs/src/content/docs/features/response-modes.md b/docs/src/content/docs/features/response-modes.md index 29efdfd33..9066b6851 100644 --- a/docs/src/content/docs/features/response-modes.md +++ b/docs/src/content/docs/features/response-modes.md @@ -1,132 +1,86 @@ # Response Modes - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to get concise answers:** ``` Respond in terse mode — just the facts ``` - **Try this to speed up simple tasks:** ``` Use lightweight mode for quick fixes ``` - Squad automatically picks the right response mode based on complexity — from instant direct answers (2s) to full multi-agent parallel work (60s). You can override anytime. - --- - ## The Four Modes - Not every request needs the full agent machinery. Squad automatically selects a response mode based on the complexity of your message. - | Mode | Time | What Happens | When Used | |------|------|-------------|-----------| | **Direct** | ~2–3s | Coordinator answers without spawning an agent | Status checks, factual questions | | **Lightweight** | ~8–12s | One agent, minimal prompt — skips charter, history, and decisions | Small fixes, quick follow-ups | | **Standard** | ~25–35s | Full agent spawn with charter, history, and decisions | Normal work requests | | **Full** | ~40–60s | Multiple agents spawn in parallel, each with full context | Complex multi-domain tasks | - --- - ## Direct - The coordinator handles it alone — no sub-agent is spawned. This isn't a response mode in the SDK sense; the coordinator answers the question itself using context it already has (team roster, decisions, history). - ``` > What port does the server run on? > Where are we on the auth work? > Who's on the team? ``` - Fast answers from context the coordinator already has. If the coordinator responds with `DIRECT:`, no agent session is created. - ## Lightweight - One agent is spawned with a reduced prompt — skips loading charter, history, and decisions to save time. - ``` > Fix the typo in the README > Add that missing import > Update the version number ``` - Good for small, well-defined tasks where full context isn't needed. - ## Standard - Full agent spawn. The agent reads its charter, history, and team decisions before working. - ``` > Build the user profile API endpoint > Refactor the auth middleware > Write tests for the payment module ``` - This is the default mode for most work. - ## Full - Multiple agents spawn in parallel, each with full context. A [design review ceremony](ceremonies.md) may trigger first. - ``` > Team, build the dashboard > Rebuild the authentication system > Implement the search feature end-to-end ``` - Used for complex tasks that span multiple domains (frontend, backend, testing). - --- - ## How Modes Are Selected - The coordinator picks the mode automatically based on: - - **Complexity** of the request - **Number of domains** involved - **Whether context is needed** (history, decisions, skills) - You don't need to specify a mode. When uncertain, the coordinator biases toward upgrading — it's better to spend a few extra seconds loading context than to miss something. - --- - ## Tips - - If a response feels slow for a simple question, it's likely using Standard when Direct would suffice. This is rare — the coordinator is good at picking the right mode. - "Team, ..." prompts typically trigger Full mode. - Direct-named agent prompts ("Kane, ...") typically trigger Standard mode. - Response times depend on the Copilot platform. The numbers above are approximate. - ## Sample Prompts - ``` force lightweight mode for this quick fix ``` - Explicitly requests a reduced-context spawn for a simple task. - ``` what port does the API run on? ``` - Quick factual question that triggers Direct mode with no agent spawn. - ``` Kane, do a thorough analysis of the auth system ``` - Requests Standard mode with full context load for complex work. - ``` what response mode was used for that last task? ``` - Checks which mode the coordinator selected for the previous request. - ``` Team, rebuild the authentication system end-to-end ``` - Multi-domain prompt that triggers Full mode with parallel agent spawns. diff --git a/docs/src/content/docs/features/reviewer-protocol.md b/docs/src/content/docs/features/reviewer-protocol.md index fa69b5e6b..8eca57259 100644 --- a/docs/src/content/docs/features/reviewer-protocol.md +++ b/docs/src/content/docs/features/reviewer-protocol.md @@ -1,40 +1,26 @@ # Reviewer Rejection Protocol - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to request a code review:** ``` Review the changes in src/auth/ and check for security issues ``` - **Try this to trigger peer review:** ``` Lead, review the PR from Fenster ``` - When a reviewer (Lead, Tester) rejects work, the original agent is locked out from self-revision. This prevents endless fix-retry loops and forces human oversight or reassignment. - --- - ## How It Works - 1. **Agent submits work** — Creates draft PR, requests review from Lead or Tester. 2. **Reviewer evaluates** — Checks code quality, test coverage, adherence to directives. 3. **Reviewer decision:** - **Approve** → PR merges, issue closes, agent unlocked. - **Request changes** → Agent is **locked out**, work routes to another agent or escalates. - ## Strict Lockout - Once a reviewer rejects work, the **original agent cannot revise their own submission**. This is a hard constraint: - - Agent A writes code → Lead rejects - Agent A **cannot** fix and resubmit - Coordinator must **reassign** to Agent B or **escalate** to user - ### Why Lockout? - Without lockout: - Agent A writes buggy code - Lead rejects: "This has race conditions" @@ -42,85 +28,59 @@ Without lockout: - Lead rejects again: "Still broken" - Agent A fixes, resubmits - Infinite loop, no progress - With lockout: - Agent A writes buggy code - Lead rejects: "This has race conditions" - Agent A **locked out** - Coordinator assigns Agent B (fresh perspective) or escalates to user - Work gets done or human intervenes - ## Reassign vs. Escalate - When rejection happens, coordinator has two options: - | Option | When to Use | How It Works | |--------|-------------|--------------| | **Reassign** | Another agent has the skill | Route work to different squad member with relevant expertise | | **Escalate** | No other agent fits, or multiple rejections | Notify user, ask for manual intervention or guidance | - ### Reassign Example - 1. Fenster (Frontend) writes a React component → Lead rejects: "Accessibility issues" 2. Fenster locked out 3. Coordinator checks skills: Hockney (Frontend) has accessibility expertise 4. Work reassigned to Hockney 5. Hockney fixes and resubmits - ### Escalate Example - 1. Backend writes API logic → Tester rejects: "Integration tests fail" 2. Backend locked out 3. Coordinator reassigns to Core Dev → Core Dev also fails review 4. Core Dev locked out 5. **All agents exhausted** → Coordinator escalates to user: "Issue #42 rejected twice. Need guidance or manual fix." - ## Lockout Scope and Duration - | Scope | Duration | |-------|----------| | **Task-specific** | Lockout applies to the specific PR/issue, not all work | | **Session-persistent** | Lockout survives session restarts (stored in `.squad/orchestration-log/`) | | **Clearable** | User can manually unlock: "Unlock Fenster for issue #42" | - An agent locked out of issue #42 can still work on issue #43, #44, etc. Lockout is not a global ban. - ## Deadlock Handling - If **all capable agents are locked out**: - 1. Coordinator detects deadlock: no available agents for work. 2. Coordinator escalates to user: "All agents locked out for issue #42. Options: 1) Manual fix, 2) Unlock an agent and provide guidance, 3) Close as won't-fix." 3. User chooses resolution. - This prevents the team from getting stuck in a state where no one can proceed. - ## Reviewer Authority - Only **designated reviewers** can lock out agents: - | Reviewer | Authority | Scope | |----------|-----------|-------| | **Lead** | Code quality, architecture, security | All code submissions | | **Tester** | Test coverage, correctness | Test-related changes | | **User (you)** | Final arbiter | Can override any decision | - Other agents (Frontend, Backend, DevRel) cannot lock out peers. - ## Unlocking an Agent - > "Unlock Fenster for issue #42" - Coordinator clears the lockout. Fenster can now revise the PR. Use this when: - - Reviewer feedback was unclear, you've provided better guidance - Agent legitimately misunderstood requirements - External factors (API change, dependency update) invalidated the original rejection - ## Lockout Logs - Lockouts are recorded in `.squad/orchestration-log/`: - ``` [2024-01-15 15:45:30] REVIEW: Lead rejected PR #12 (author: Fenster) [2024-01-15 15:45:31] LOCKOUT: Fenster locked out for issue #42 @@ -128,69 +88,45 @@ Lockouts are recorded in `.squad/orchestration-log/`: [2024-01-15 16:20:10] REVIEW: Lead approved PR #13 (author: Hockney) [2024-01-15 16:20:11] UNLOCK: Fenster unlocked (issue #42 resolved) ``` - ## Trust Levels for PR Management - This section covers the spectrum of human oversight for Squad-created PRs: - ### 1. Full Review (Default) - Every PR requires human approval before merge. This is the default and recommended for team repos, shared codebases, and anything with external collaborators. - **When to use:** Team repositories, public packages, shared codebases where multiple people depend on stability. - **Risk:** Low — human gate on every change. - ### 2. Selective Review - Squad creates and reviews PRs, but the human only reviews PRs that touch specific paths or domains they care about. Everything else merges after agent review. - **When to use:** Personal projects with established patterns where you trust Squad's judgment on routine changes (dependency updates, test fixes, doc improvements). - **Risk:** Medium — some changes skip human eyes. - ### 3. Self-Managing (Personal Repos Only) - Squad creates, reviews, approves, and merges its own PRs. The human only jumps in when an issue is explicitly flagged for review. - **When to use:** Solo personal projects where you're the sole maintainer and experimentation speed matters more than pre-merge safety. - **Risk:** Higher — but fast; review PRs retroactively. - ### Decision Matrix - | Trust Level | When | Risk | |-------------|------|------| | Full review | Team repos, shared codebases, public packages | Low — human gate on every change | | Selective review | Personal projects with established patterns | Medium — some changes skip human eyes | | Self-managing | Solo personal projects, experimentation | Higher — but fast; review PRs retroactively | - **Important:** Self-managing mode doesn't mean unmonitored. Use Ralph's work monitoring, Teams notifications, and periodic code review to stay informed. The difference is that you review *after* merge rather than *before*. - --- - ## Sample Prompts - ``` Lead, review PR #15 ``` Triggers review. Lead evaluates code and either approves (merge + unlock) or rejects (lockout original author). - ``` Why is Fenster locked out? ``` Coordinator explains: "Fenster was locked out for issue #42 after Lead rejected PR #15 due to security concerns." - ``` Unlock Fenster for issue #42 — I've given him better guidance ``` Clears lockout. Fenster can now revise the PR with your additional context. - ``` Reassign issue #42 from Fenster to Hockney ``` Manual reassignment. Fenster remains locked out, Hockney takes over the work. - ``` Escalate issue #42 to me — the team is stuck ``` diff --git a/docs/src/content/docs/features/routing.md b/docs/src/content/docs/features/routing.md index 69f631faf..77c00c8e3 100644 --- a/docs/src/content/docs/features/routing.md +++ b/docs/src/content/docs/features/routing.md @@ -1,48 +1,29 @@ # Work Routing - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to set domain-specific routing:** ``` Route all database-related work to Basher ``` - **Try this to direct work explicitly:** ``` Fenster, fix the login validation bug ``` - **Try this to check routing logic:** ``` Who handles src/api/ changes? ``` - The coordinator routes work to the right agent using named routing (explicit), domain routing (pattern matching), and skill-aware routing (capability checking). No manual triage needed. - --- - ## Routing Strategies - The coordinator decides who handles each piece of work using a three-layer routing system: named routing (explicit assignments), domain routing (pattern matching), and skill-aware routing (capability checking). The goal: get work to the right agent without manual triage overhead. - ## Routing Strategies - ### 1. Named Routing - You explicitly name who should do the work: - > "Fenster, fix the login validation bug" - Coordinator assigns directly to Fenster. No lookup required. - ### 2. Domain Routing - The coordinator checks `.squad/routing.md` for pattern matches: - ```markdown ## Routing Table - | Pattern | Owner | Reason | |---------|-------|--------| | `src/api/**` | Backend | API implementation | @@ -51,33 +32,22 @@ The coordinator checks `.squad/routing.md` for pattern matches: | `docs/**` | DevRel | Documentation | | `package.json`, `tsconfig.json` | Lead | Config changes | ``` - When work involves `src/api/auth.ts`, it routes to Backend automatically. - ### 3. Skill-Aware Routing - If no domain match, the coordinator checks `.copilot/skills/` for capability fit: - ```markdown # authentication.md Members with authentication expertise: - Backend (OAuth, JWT, session management) - Lead (security review, architecture) ``` - Work tagged with authentication routes to Backend or Lead based on task type (implementation vs. review). - ## The Routing Table - `.squad/routing.md` is the canonical routing manifest. It's structured as: - ```markdown # Work Routing - Default assignments for common patterns. - ## Routing Table - | Pattern | Owner | Reason | |---------|-------|--------| | `src/frontend/**` | Frontend | UI implementation | @@ -85,95 +55,63 @@ Default assignments for common patterns. | `*.test.js` | Tester | Test coverage | | `README.md`, `docs/**` | DevRel | User-facing docs | | `.github/workflows/**` | Lead | CI/CD config | - ## Fallback - If no match: route to Lead for triage. ``` - ## Adding Routing Rules - Tell the coordinator: - > "From now on, route all database migrations to Backend" - Coordinator adds to routing.md: - ```markdown | `migrations/**`, `*.sql` | Backend | Database schema changes | ``` - Or edit `.squad/routing.md` directly. - ## Routing Ambiguity - When multiple patterns match: - 1. **Most specific wins** — `src/api/auth.ts` matches both `src/api/**` and `src/**`, but `src/api/**` is more specific. 2. **Named > Domain > Skill** — Explicit assignment always overrides pattern matching. 3. **Fallback to Lead** — If no clear owner, route to Lead for triage. - ## Issue Label Routing - GitHub issues with `squad:{member}` labels route directly: - - `squad:fenster` → Fenster picks it up - `squad:mcmanus` → McManus handles it - No `squad:*` label → Coordinator triages and assigns - Ralph (the work monitor) uses this to auto-assign based on routing rules. - ## Multi-Agent Work - Some tasks require multiple agents: - > "Fenster, implement the API. Hockney, write the tests." - Coordinator spawns both agents in parallel. They work independently and coordinate via the shared `.squad/` state. - ## Routing Logs - The coordinator logs routing decisions to `.squad/orchestration-log/`: - ``` [2024-01-15 14:23:10] ROUTE: Issue #42 → Backend (pattern: src/api/**) [2024-01-15 14:24:05] ROUTE: Issue #43 → Lead (no match, fallback) [2024-01-15 14:25:30] ROUTE: "Fenster, fix bug" → Fenster (named) ``` - Useful for debugging why work went to a specific agent. - ## Sample Prompts - ``` Route all CSS files to Frontend ``` Adds a routing rule: `*.css` → Frontend. - ``` Who handles authentication work? ``` Coordinator checks routing.md and skills/authentication.md, reports the responsible agent(s). - ``` From now on, McManus reviews all user-facing documentation before merge ``` Creates a routing rule + directive: docs/** routes to McManus for review. - ``` Why did issue #42 go to Backend? ``` Coordinator explains the routing decision based on pattern match or skill fit. - ``` Fenster, implement the new search API. Hockney, write integration tests for it. ``` Named routing to two agents. Both spawn in parallel. - --- - ## See Also - - [Your Team](../concepts/your-team.md) — How team members and roles are defined - [Architecture](../concepts/architecture.md) — How the coordinator uses routing to dispatch work - [Parallel Work](../concepts/parallel-work.md) — Multi-agent parallel execution diff --git a/docs/src/content/docs/features/scratch-dir.md b/docs/src/content/docs/features/scratch-dir.md index 529e8de7e..6a57eef16 100644 --- a/docs/src/content/docs/features/scratch-dir.md +++ b/docs/src/content/docs/features/scratch-dir.md @@ -1,68 +1,44 @@ # Scratch Directory - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to see what's in scratch:** ```bash ls .squad/.scratch/ ``` - **Try this to create a scratch file:** ```typescript import { scratchFile } from '@bradygaster/squad-sdk'; - const promptPath = scratchFile(squadRoot, 'coordinator-prompt', '.txt', promptContent); ``` - Squad provides `.squad/.scratch/` as the canonical location for ephemeral temp files — prompt files, commit drafts, processing artifacts — keeping the repo root clean. - --- - ## Why Scratch Dir? - Before scratch dir, agents wrote temp files to the repo root: - `prompt-123.txt` - `commit-draft-456.txt` - `processing-temp-789.json` - This polluted the working directory and risked accidental commits. Scratch dir solves this by providing a **dedicated ephemeral space** that: - 1. **Lives in `.squad/.scratch/`** — clearly separated from project files 2. **Gitignored by default** — automatically excluded during `squad init` 3. **Auto-created on demand** — no setup required 4. **Cleaned regularly** — purged during `squad watch` cleanup cycles - --- - ## API - ### `scratchDir(squadRoot: string): string` - Resolves and creates the scratch directory. - ```typescript import { scratchDir } from '@bradygaster/squad-sdk'; - const scratchPath = scratchDir('/path/to/repo'); // Returns: /path/to/repo/.squad/.scratch // Side effect: Creates directory if it doesn't exist ``` - **Behavior:** - Returns absolute path to `.squad/.scratch/` - Creates directory if missing (including parent `.squad/` if needed) - Idempotent — safe to call multiple times - --- - ### `scratchFile(squadRoot: string, prefix: string, ext: string, content: string): string` - Creates a named temp file in scratch dir. - ```typescript import { scratchFile } from '@bradygaster/squad-sdk'; - const promptPath = scratchFile( '/path/to/repo', 'coordinator-prompt', @@ -72,30 +48,23 @@ const promptPath = scratchFile( // Returns: /path/to/repo/.squad/.scratch/coordinator-prompt-abc123.txt // Side effect: Writes file with given content ``` - **Parameters:** - `squadRoot` — path to repository root - `prefix` — file name prefix (e.g., `'coordinator-prompt'`) - `ext` — file extension with leading dot (e.g., `'.txt'`, `'.json'`) - `content` — file content to write - **Behavior:** - Auto-generates unique suffix (timestamp + random hex) - Returns absolute path to created file - Creates scratch dir if missing - Overwrites file if it already exists (rare due to unique suffix) - **Example filenames:** - `coordinator-prompt-20250125-a3f2.txt` - `commit-draft-20250125-b8d1.txt` - `processing-temp-20250125-c4e9.json` - --- - ## Common Use Cases - ### Coordinator Prompts - ```typescript const promptPath = scratchFile( squadRoot, @@ -103,12 +72,9 @@ const promptPath = scratchFile( '.txt', buildCoordinatorPrompt(issue) ); - await spawnCopilot(promptPath); ``` - ### Commit Message Drafts - ```typescript const draftPath = scratchFile( squadRoot, @@ -116,12 +82,9 @@ const draftPath = scratchFile( '.txt', buildCommitMessage(changes) ); - await git(['commit', '-F', draftPath]); ``` - ### Processing Artifacts - ```typescript const dataPath = scratchFile( squadRoot, @@ -129,74 +92,51 @@ const dataPath = scratchFile( '.json', JSON.stringify(intermediateData, null, 2) ); - // Process data... // File will be cleaned up during next cleanup cycle ``` - --- - ## Lifecycle - **Created:** - During `squad init` — `.squad/.scratch/` created and added to `.gitignore` - On-demand by `scratchDir()` or `scratchFile()` — auto-created if missing - **Populated:** - By agents during work sessions (prompts, drafts, artifacts) - By SDK/CLI when spawning Copilot sessions - By coordinator when orchestrating multi-agent work - **Cleaned:** - During `squad watch` cleanup cycles (default: every 10 rounds) - Manual cleanup: `rm -rf .squad/.scratch/*` - --- - ## Migration - Old code that wrote to repo root: - ```typescript // ❌ Before: pollutes repo root const promptPath = path.join(repoRoot, `prompt-${Date.now()}.txt`); fs.writeFileSync(promptPath, content); ``` - New code using scratch dir: - ```typescript // ✅ After: uses scratch dir const promptPath = scratchFile(repoRoot, 'prompt', '.txt', content); ``` - --- - ## Notes - - Scratch dir is **ephemeral by design** — nothing in `.squad/.scratch/` should be committed or preserved long-term - Cleanup is safe — scratch files are temporary and safe to delete anytime - Scratch dir is **team-wide** — not per-agent, not per-session - `.gitignore` entry is added during `squad init` and preserved during `squad upgrade` - --- - ## Sample Prompts - ``` Show me what's in the scratch directory ``` - Lists all files currently in `.squad/.scratch/`. - ``` Clear the scratch directory ``` - Deletes all files in `.squad/.scratch/` (manual cleanup). - ``` Create a scratch file for debugging ``` - Uses `scratchFile()` to create a temp file for debugging output. diff --git a/docs/src/content/docs/features/self-upgrade.md b/docs/src/content/docs/features/self-upgrade.md index 20fd7f86a..628701683 100644 --- a/docs/src/content/docs/features/self-upgrade.md +++ b/docs/src/content/docs/features/self-upgrade.md @@ -1,260 +1,179 @@ # Self Upgrade - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to upgrade Squad CLI:** ```bash squad upgrade --self ``` - **Try this to upgrade to latest insider build:** ```bash squad upgrade --self --insider ``` - **Try this to upgrade both CLI and repo templates:** ```bash squad upgrade --self && squad upgrade ``` - Squad can upgrade itself to the latest stable or insider release, then automatically refresh your repo templates. Insider builds are published from the `dev` branch to the npm `insider` dist-tag. - --- - ## What It Does - `squad upgrade --self` upgrades the Squad CLI package to the latest stable release: - 1. **Detects package manager** — auto-detects npm, pnpm, or yarn based on lock files 2. **Upgrades package** — runs `npm install -g @bradygaster/squad@latest` (or pnpm/yarn equivalent) 3. **Runs repo upgrade** — automatically runs `squad upgrade` to apply new templates - **Result:** - Squad CLI upgraded to latest stable - Your repo's `.squad/` templates refreshed with latest version - All in one command - --- - ## Usage - ### Upgrade to Latest Stable - ```bash squad upgrade --self ``` - **Output:** ``` 🔄 Upgrading Squad CLI... Detected package manager: npm Running: npm install -g @bradygaster/squad@latest - ✅ Squad CLI upgraded to v0.8.0 Running: squad upgrade (to refresh repo templates) - ✅ Repo templates upgraded to v0.8.0 ``` - --- - ### Upgrade to Latest Insider - ```bash squad upgrade --self --insider ``` - **What's different:** - Installs latest **prerelease** version (e.g., `v0.9.0-insider.3`) - May include experimental features - Used for testing bleeding-edge changes - **Output:** ``` 🔄 Upgrading Squad CLI (insider)... Detected package manager: pnpm Running: pnpm add -g @bradygaster/squad@insider - ✅ Squad CLI upgraded to v0.9.0-insider.3 Running: squad upgrade (to refresh repo templates) - ✅ Repo templates upgraded to v0.9.0-insider.3 ``` - --- - ## Package Manager Auto-Detection - Squad auto-detects your package manager based on lock files in the current directory: - | Lock File | Detected Manager | Command Used | |-----------|------------------|--------------| | `pnpm-lock.yaml` | pnpm | `pnpm add -g @bradygaster/squad@latest` | | `yarn.lock` | Yarn | `yarn global add @bradygaster/squad@latest` | | `package-lock.json` | npm | `npm install -g @bradygaster/squad@latest` | | *(none)* | npm (fallback) | `npm install -g @bradygaster/squad@latest` | - **Notes:** - Detection runs in current working directory - If no lock file found, defaults to npm - For insider upgrades, `@latest` becomes `@insider` - --- - ## Auto-Refresh Repo Templates - After upgrading the CLI, `squad upgrade --self` automatically runs `squad upgrade` to refresh your repo's `.squad/` templates. This ensures: - - Built-in skills updated to latest versions - Charter templates refreshed - Routing/team file patterns updated - New features added (e.g., cleanup config, scratch dir, external state) - **Skip auto-refresh:** - If you want to upgrade the CLI without refreshing repo templates: - ```bash squad upgrade --self --skip-repo-upgrade ``` - *(This flag may not exist yet — just showing the pattern. For now, self-upgrade always runs repo upgrade.)* - --- - ## Permission Errors - If upgrade fails with permission denied: - ``` ❌ Error: EACCES: permission denied ``` - **Solutions:** - 1. **Use sudo (macOS/Linux):** ```bash sudo squad upgrade --self ``` - 2. **Fix npm permissions:** ```bash # Option A: Change npm's default directory npm config set prefix ~/.npm-global export PATH=~/.npm-global/bin:$PATH - # Option B: Fix permissions for /usr/local sudo chown -R $(whoami) /usr/local/lib/node_modules ``` - 3. **Use a version manager (recommended):** - **nvm** (Node Version Manager) — avoids global permission issues - **volta** — handles global installs without sudo - --- - ## Version Check - Check current Squad version: - ```bash squad --version ``` - **Output:** ``` @bradygaster/squad v0.8.0 ``` - Check if a newer version is available: - ```bash npm outdated -g @bradygaster/squad ``` - **Output:** ``` Package Current Wanted Latest Location @bradygaster/squad 0.7.5 0.8.0 0.8.0 global ``` - --- - ## Release Channels - | Channel | Tag | Description | |---------|-----|-------------| | **Stable** | `@latest` | Production-ready releases (e.g., `v0.8.0`) | | **Insider** | `@insider` | Prerelease builds for testing (e.g., `v0.9.0-insider.3`) | - **When to use insider:** - You want to test upcoming features - You're contributing to Squad development - You need a bug fix before the next stable release - **When to use stable:** - Production use - You want predictable, tested releases - You follow semantic versioning - --- - ## Workflow - **Typical upgrade workflow:** - 1. **Check current version:** ```bash squad --version ``` - 2. **Upgrade CLI to latest stable:** ```bash squad upgrade --self ``` - 3. **Verify new version:** ```bash squad --version ``` - 4. **Repo templates auto-refreshed** — no extra step needed - --- - ## Notes - - Self-upgrade requires network access to npm registry - Self-upgrade modifies global npm packages — may require elevated permissions - Repo upgrade (template refresh) runs automatically after successful CLI upgrade - If CLI upgrade fails, repo upgrade is skipped - Insider builds may have breaking changes — read release notes before upgrading - --- - ## Sample Prompts - ``` squad upgrade --self ``` - Upgrades Squad CLI to latest stable and refreshes repo templates. - ``` squad upgrade --self --insider ``` - Upgrades Squad CLI to latest insider/prerelease build. - ``` squad --version ``` - Checks current Squad CLI version. - ``` npm outdated -g @bradygaster/squad ``` - Checks if a newer version is available without upgrading. diff --git a/docs/src/content/docs/features/skill-security-scanner.md b/docs/src/content/docs/features/skill-security-scanner.md index 5e3fe415a..23e627087 100644 --- a/docs/src/content/docs/features/skill-security-scanner.md +++ b/docs/src/content/docs/features/skill-security-scanner.md @@ -2,37 +2,22 @@ title: Skill Security Scanner description: Markdown-aware security scanner that catches embedded credentials, download-and-execute patterns, and privilege escalation in skill files before they ship. --- - # Skill Security Scanner - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - The skill security scanner is a markdown-aware safety check that runs as part of `scripts/security-review.mjs` to inspect every SKILL.md file in `.copilot/skills/` and `.squad/skills/`. It catches three classes of problem before a skill gets installed or merged: - 1. **Embedded credentials** — API keys, tokens, passwords pasted into skill text 2. **Download-and-execute patterns** — `curl ... | bash`, `Invoke-Expression`, and friends 3. **Privilege escalation commands** — `sudo`, `Set-ExecutionPolicy Bypass`, `chmod 777`, etc. - It ships as Phase 1 — focused on the highest-signal issues with **zero false positives on the existing 35 skill files** at the time of release. - --- - ## How it integrates - The scanner is invoked by the existing security-review pipeline (`scripts/security-review.mjs`), which is triggered: - - On every PR that touches `.copilot/skills/**` or `.squad/skills/**` (via the Security Review CI workflow) - Manually: `node scripts/security-review.mjs --scope skills` - As part of [Plugin Marketplace](/squad/docs/features/plugins/) install (skills from external sources get scanned before landing on disk) - A finding produces a CI failure with the file path, line number, pattern type, and the matched substring (redacted for credentials). - --- - ## What it catches - ### Credentials - | Pattern type | Example match | |--------------|---------------| | Generic API key | `API_KEY=` | @@ -40,79 +25,55 @@ A finding produces a CI failure with the file path, line number, pattern type, a | AWS access key | `AKIA<16-character-key>` | | Bearer tokens | `Authorization: Bearer ` | | Database connection strings with embedded passwords | `postgres://user:@host/db` | - ### Download-and-execute patterns - | Pattern type | Example match | |--------------|---------------| | Curl-to-bash | `curl https://... \| bash`, `curl ... \| sh`, `wget ... \| sh` | | PowerShell invoke-expression | `iex (irm https://...)`, `Invoke-Expression $downloaded` | | Unsafe eval | `eval $(curl ...)`, `eval $(wget ...)` | - ### Privilege escalation - | Pattern type | Example match | |--------------|---------------| | `sudo` invocations | `sudo apt install`, `sudo -i`, `sudo bash` | | Permissive chmod | `chmod 777`, `chmod a+rwx`, `chmod -R 777` | | PowerShell policy bypass | `Set-ExecutionPolicy Bypass`, `Set-ExecutionPolicy Unrestricted` | | Windows admin escalation | `Start-Process ... -Verb RunAs`, `runas /user:Administrator` | - --- - ## Suppression — the false-positive guardrails - The scanner is markdown-aware, which means it understands when a "dangerous" pattern is actually in a code block being **shown as an anti-pattern** vs. in prose advising users to run something: - | Where pattern appears | Action | |----------------------|--------| | Inside a fenced code block (three backticks) | **Suppressed** — treated as documentation, not advice | | Inside an inline code span (single backtick) | **Suppressed** — treated as a reference | | In prose with a placeholder token (``, ``, `xxx`, `***`) | **Suppressed** — clearly an example | | In prose without any of the above | **Flagged** as a finding | - The placeholder-token list covers common safe markers: ``, ``, ``, `xxx`, `***`, `placeholder`, `example`, `PLACEHOLDER`. - This is why the existing 35 skill files have zero false positives — most discuss security patterns inside fenced code blocks or with placeholder tokens. - --- - ## Local invocation - ```bash # Scan all skills in the current repo node scripts/security-review.mjs --scope skills - # Scan a single skill file node scripts/security-review.mjs --file .copilot/skills/my-skill/SKILL.md - # JSON output for tooling integration node scripts/security-review.mjs --scope skills --format json ``` - Exit codes: - `0` — no findings - `1` — findings detected (CI fails the build) - `2` — scanner error (couldn't read file, malformed markdown, etc.) - --- - ## What it doesn't catch - This is **Phase 1**. The scanner is deliberately conservative — it would rather miss something than false-positive a legitimate skill. Things NOT in scope today: - - **Obfuscated patterns** — base64-encoded credentials, character-class regex tricks, etc. - **Multi-line patterns** — the scanner is line-oriented; a credential split across lines won't match - **Skill scripts (`.js`/`.mjs` files in `scripts/`)** — only the SKILL.md narrative is scanned; executable handlers need their own audit - **Semantic context** — the scanner doesn't understand whether a `sudo` example is contextually safe; if it's in prose without a placeholder marker, it flags - **Hooks beyond `.copilot/skills/` and `.squad/skills/`** — other markdown files (charters, decisions, README) aren't scanned by this rule - Phase 2 work tracked in the issue tracker would extend coverage to scripts and add an LLM-based semantic pass. - --- - ## See also - - [Skills](/squad/docs/features/skills/) — the broader skills system - [Plugin Marketplace](/squad/docs/features/plugins/) — how external skills get installed - [Secret Handling](/squad/docs/features/skills/) — see also the `secret-handling` built-in skill diff --git a/docs/src/content/docs/features/skills.md b/docs/src/content/docs/features/skills.md index d3f7cc7b8..ae666628e 100644 --- a/docs/src/content/docs/features/skills.md +++ b/docs/src/content/docs/features/skills.md @@ -1,47 +1,29 @@ # Skills System - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to see what your team learned:** ``` Show me what skills this team has learned ``` - **Try this to list all accumulated knowledge:** ``` List all skills ``` - **Try this to document a reusable pattern:** ``` Create a skill for our deployment process ``` - Agents learn from real work and write skill files — reusable patterns, conventions, and techniques. Skills compound over time, making your team smarter with each project. - --- - ## Where Skills Live - ``` .copilot/skills/{skill-name}/SKILL.md ``` - Each skill is a directory containing a `SKILL.md` file. Skills are **team-wide knowledge** — not tied to individual agents. All agents can read and use any skill. - > **Legacy path**: Skills are also discovered at `.squad/skills/` for backward compatibility. If both paths contain a skill with the same name, the `.squad/skills/` version takes precedence. New skills are always written to `.copilot/skills/`. - > **Portable across projects**: Skills export and import with your team. When you move a trained team to a new repo, all their earned knowledge comes with them. - --- - ## Types of Skills - ### Built-in Skills - Squad ships with **8 built-in skills** that provide foundational patterns for every squad. These are automatically installed during `squad init` and refreshed during `squad upgrade`: - 1. **squad-conventions** — Core squad patterns and file layout 2. **error-recovery** — Graceful failure handling and retry patterns 3. **secret-handling** — Credential safety and secrets management @@ -50,107 +32,70 @@ Squad ships with **8 built-in skills** that provide foundational patterns for ev 6. **reviewer-protocol** — Code review gates and approval flow 7. **test-discipline** — Test-first discipline and coverage expectations 8. **agent-collaboration** — Multi-agent handoff and parallel work patterns - Built-in skills are prefixed with their domain (e.g., `github-`, `secrets-`, `session-`). They're overwritten on upgrade to ensure you always have the latest patterns. - ### Starter skills - Legacy term for built-in skills. Previously called "starter skills" and prefixed with `squad-` (e.g., `squad-conventions`). Now standardized as domain-prefixed built-in skills. - ### Session Recovery - The `session-recovery` skill teaches agents to find and resume interrupted Copilot CLI sessions. When a session is interrupted (terminal crash, network drop, machine restart), in-progress work may be left incomplete. This skill uses `session_store` SQL queries to detect abandoned sessions, inspect checkpoint progress, and resume work. See [`.copilot/skills/session-recovery/SKILL.md`](https://github.com/bradygaster/squad/blob/dev/.copilot/skills/session-recovery/SKILL.md) for query patterns and examples. - ### Earned skills - Written by agents from real work on your project. When an agent discovers a reusable pattern — a deployment strategy, a testing technique, an API integration approach — it writes a skill file. - --- - ## Confidence Lifecycle - Earned skills have a confidence level that reflects how battle-tested they are: - | Level | Meaning | |-------|---------| | **Low** | First written — based on a single experience | | **Medium** | Applied successfully in multiple contexts | | **High** | Well-established, consistently reliable | - Confidence only goes up, never down. A skill that reaches `high` stays there. - --- - ## How Skills Are Used - 1. **Before working**, agents read skill files relevant to the task at hand 2. **Skill-aware routing** — the coordinator checks available skills when deciding which agent to spawn. An agent with a relevant earned skill may be preferred over one without. 3. **After working**, agents may write new skills or update existing ones based on what they learned - --- - ## Example - After successfully setting up a CI pipeline, an agent might create: - ``` .copilot/skills/ci-github-actions/SKILL.md ``` - ```markdown # CI with GitHub Actions - **Confidence:** medium - ## Pattern - Use `actions/checkout@v4` for repo access - Cache node_modules with `actions/cache@v4` using hash of package-lock.json - Run lint, test, and build as separate jobs for parallel execution - Use `concurrency` groups to cancel superseded runs - ## Learned from - Initial CI setup (session 3) - Pipeline optimization after slow builds (session 7) ``` - --- - ## Tips - - Skills compound over time. A mature project has skills covering testing patterns, deployment procedures, API conventions, and more. - Built-in skills are overwritten on upgrade. Earned skills are never touched. - **Skills are shared across the whole team** — any agent can read any skill. They're stored in a flat `.copilot/skills/` directory, not per-agent files. - You can manually edit skill files if you want to seed knowledge (e.g., paste your team's existing conventions into a `SKILL.md`). - **Skills survive export/import** — your team's accumulated knowledge is fully portable across projects. - ## Sample Prompts - ``` list all skills ``` - Shows all skill files in `.copilot/skills/` with confidence levels for earned skills. - ``` what's the confidence level for the CI skill? ``` - Checks how battle-tested a specific earned skill is. - ``` create a skill for our deployment process ``` - Manually creates a new skill file and guides you through documenting the pattern. - ``` which skills have low confidence? ``` - Finds recently-created skills that haven't been validated across multiple contexts yet. - ``` bump the testing skill to high confidence ``` - Manually increases the confidence level after successful repeated use. diff --git a/docs/src/content/docs/features/squad-rc.md b/docs/src/content/docs/features/squad-rc.md index 998d0803c..e7ce89690 100644 --- a/docs/src/content/docs/features/squad-rc.md +++ b/docs/src/content/docs/features/squad-rc.md @@ -1,24 +1,16 @@ # squad rc - > **Full remote control of GitHub Copilot from any device.** ACP passthrough mode for complete Copilot CLI access via secure tunnel. - --- - ## What It Does - `squad rc` (remote control) exposes GitHub Copilot CLI over a secure WebSocket tunnel, letting you chat with Copilot from your phone, tablet, or any browser. Unlike `squad start` (which mirrors terminal output), `squad rc` uses **ACP passthrough** — raw JSON-RPC communication directly with Copilot's Agent Communication Protocol. You get full Copilot capabilities, not just terminal visibility. - ```bash squad rc --tunnel # → QR code appears # → Scan with phone # → Chat with Copilot in browser (full capabilities) ``` - --- - ## How It Differs from `squad start` - | Feature | `squad rc` | `squad start` | |--------------------------|-------------------------------------|----------------------------------| | **Mode** | ACP passthrough (JSON-RPC) | PTY mirror (terminal streaming) | @@ -30,62 +22,45 @@ squad rc --tunnel | **Mobile Optimized** | Yes (PWA, QR code, chat UI) | Yes (xterm.js, keyboard overlay) | | **Startup Time** | ~15-20s (MCP server loading) | Immediate | | **Team Roster** | Loaded from `.squad/team.md` | Not applicable | - **When to use `squad rc`:** You want to control Copilot remotely (ask questions, run commands, access full agent capabilities). - **When to use `squad start`:** You want to mirror a terminal session to your phone (demos, pairing, watching long-running processes). - -> 💡 **Looking for terminal mirroring?** See [squad start](./remote-control.md). - +> 💡 **Looking for terminal mirroring?** Use `squad start` for PTY-based terminal streaming, or stay on this page for full remote Copilot control. --- - ## Prerequisites - ### Required - - **GitHub Copilot CLI** — Install with: ```bash npm install -g @github/copilot ``` Verify with: `copilot --version` (v0.0.420+ recommended) - - **devtunnel CLI** (for `--tunnel` mode) ```bash # Windows winget install Microsoft.devtunnel - # macOS brew install devtunnel - # Linux # Download from https://aka.ms/devtunnels/download ``` - ### Setup - 1. **Authenticate devtunnel** ```bash devtunnel user login ``` Sign in with your Microsoft or GitHub account. - 2. **Verify Copilot CLI** ```bash copilot --version ``` Should return `0.0.420` or higher. - --- - ## Quick Start - **Local testing (no tunnel):** ```bash squad rc # → Prints: Bridge running on port 3000 # → Open http://localhost:3000 ``` - **Remote access (with tunnel):** ```bash squad rc --tunnel @@ -93,42 +68,31 @@ squad rc --tunnel # → Shows QR code # → Scan with phone or copy URL ``` - **Custom port:** ```bash squad rc --port 8080 ``` - **Different directory:** ```bash squad rc --path ~/my-project --tunnel ``` - --- - ## All Flags & Options - | Flag | Description | Default | |---------------------|--------------------------------------------------|----------------------| | `--tunnel` | Create a devtunnel for remote access | `false` (local only) | | `--port ` | HTTP server port | `0` (random) | | `--path ` | Working directory for Copilot | Current directory | - **Example:** ```bash # Local access on port 5000 squad rc --port 5000 - # Remote tunnel from specific project squad rc --tunnel --path ~/repos/my-app ``` - --- - ## How It Works - ### Architecture Diagram - ``` ┌─────────────────────────────────────────────────────────────┐ │ Remote Browser (PWA) │ @@ -171,103 +135,78 @@ squad rc --tunnel --path ~/repos/my-app │ • Full file system access in working directory │ └─────────────────────────────────────────────────────────────┘ ``` - ### Message Flow - **Outbound (Remote → Copilot):** 1. User types message in browser 2. WebSocket sends JSON payload: `{ type: 'prompt', text: '...' }` 3. RemoteBridge writes to Copilot stdin: `\n` 4. Copilot processes request - **Inbound (Copilot → Remote):** 1. Copilot writes JSON-RPC to stdout 2. RemoteBridge reads line from Copilot stdout 3. RemoteBridge broadcasts to all WebSocket clients 4. Browser renders Copilot response - ### Team Roster Loading - If `.squad/team.md` exists, `squad rc` parses the Active members table: - ```markdown | Name | Role | Status | |-----------|---------------|--------| | Fenster | Core Dev | Active | | Edie | TypeScript | Active | ``` - Agents appear in the `/agents` command and are available for direct messages (`@Fenster ...`). - ### Connection Monitoring - Every 5 seconds, the bridge logs connected client count: ``` ● 2 client(s) connected ``` - --- - ## Security Model - `squad rc` implements 7 layers of security: - ### 1. Session Token Authentication - **What:** UUID session token generated on bridge startup - **Where:** `RemoteBridge` constructor (line 47 in `bridge.ts`) - **How:** All API routes check `Authorization: Bearer ` header or `?token=` query param - **Enforcement:** Line 123-128 in `bridge.ts` - ### 2. One-Time Ticket System - **What:** Exchange session token for single-use WebSocket ticket - **Where:** `/api/auth/ticket` endpoint (line 112-120 in `bridge.ts`) - **Why:** Session token can't be observed in WebSocket URL logs - **TTL:** 60 seconds, consumed on first use - **Cleanup:** Expired tickets garbage-collected every 30s (line 51-56 in `bridge.ts`) - ### 3. Rate Limiting - **HTTP:** 30 requests/minute per IP (line 97-107 in `bridge.ts`) - **WebSocket:** 20 messages/minute per connection (enforced in `handleMessage`) - **Penalty:** 429 Too Many Requests (HTTP) or connection close (WebSocket) - ### 4. Secret Redaction - **What:** Environment variable patterns redacted from messages - **Patterns:** `API_KEY=*`, `TOKEN=*`, `PASSWORD=*`, `SECRET=*` - **Applied:** Before broadcast to clients (message content sanitized) - ### 5. Connection Limits - **Per IP:** 5 concurrent WebSocket connections - **Global:** Enforced via `ipConnections` map in `RemoteBridge` - **Rejection:** Connection denied in `verifyClient` callback - ### 6. Content Security Policy Headers - **Headers:** `X-Frame-Options: DENY`, `X-Content-Type-Options: nosniff`, `Referrer-Policy: no-referrer`, `Strict-Transport-Security: max-age=31536000`, `Cache-Control: no-store` - **Where:** Line 156-161 in `rc.ts` (static file handler) - **Effect:** Prevents clickjacking, MIME sniffing, referrer leaks - ### 7. Devtunnel Private Auth - **What:** Tunnel is private by default (only your MS/GitHub account can connect) - **Where:** `devtunnel create` command (line 47-58 in `rc-tunnel.ts`) - **Labels:** Tunnel tagged with `squad`, `repo`, `branch`, `machine` labels - **Expiry:** 24 hours (line 48 in `rc-tunnel.ts`) - ### Session Expiry - **TTL:** 4 hours from bridge startup - **Check:** Every 60 seconds (line 60-67 in `bridge.ts`) - **Enforcement:** New connections rejected, existing connections closed - --- - ## Built-in Commands - Type these in the PWA chat: - ### `/status` Shows current bridge state: ``` Squad RC | Repo: squad-pr | Branch: main | Agents: 5 | Copilot: passthrough | Connections: 2 ``` - ### `/agents` Lists all agents from `.squad/team.md`: ``` @@ -278,45 +217,34 @@ Team Roster: • Rabin (Distribution) • Keaton (PM) ``` - ### `@agentName ` Direct message to a specific agent: ``` @Edie Can you review the TypeScript types in src/index.ts? ``` Routed to the named agent if supported by your squad configuration. - --- - ## Mobile Experience - ### QR Code When `--tunnel` is enabled, a QR code is printed to the terminal. Scan with your phone's camera to open the remote control URL instantly. - ### Keyboard Shortcuts - **Enter:** Send message - **Shift+Enter:** New line in message - **Cmd/Ctrl+K:** Clear chat history (client-side) - **Cmd/Ctrl+R:** Reconnect WebSocket - ### Replay Buffer All messages are stored in the bridge's replay buffer (default: 500 messages). New connections automatically receive full conversation history on connect. - ### Progressive Web App (PWA) The remote UI is a PWA with: - **Offline support:** Service worker caches UI assets - **Install prompt:** Add to Home Screen on iOS/Android - **Responsive layout:** Mobile-first design, adapts to desktop - --- - ## Audit Logging - ### Log Location ``` ~/.cli-tunnel/audit/squad-audit-.jsonl ``` - ### What's Logged - WebSocket connections/disconnections - All prompts (user input) @@ -324,7 +252,6 @@ The remote UI is a PWA with: - Tool calls - Permission requests - Errors - ### Log Format JSONL (JSON Lines) — one event per line: ```json @@ -332,146 +259,99 @@ JSONL (JSON Lines) — one event per line: {"timestamp":"2026-03-13T10:15:10Z","type":"prompt","clientId":"abc123","text":"What's the latest commit?"} {"timestamp":"2026-03-13T10:15:12Z","type":"response","agentName":"Copilot","content":"The latest commit is..."} ``` - ### Accessing Logs ```bash # View live logs tail -f ~/.cli-tunnel/audit/squad-audit-*.jsonl - # Search for prompts grep '"type":"prompt"' ~/.cli-tunnel/audit/squad-audit-*.jsonl | jq . ``` - --- - ## Troubleshooting - ### `Copilot not available` error - **Symptom:** ``` ⚠ Copilot not available: spawn copilot ENOENT ``` - **Cause:** Copilot CLI not installed or not in PATH. - **Fix:** ```bash npm install -g @github/copilot ``` - --- - ### `devtunnel CLI not found` - **Symptom:** ``` ⚠ devtunnel CLI not found. Install with: winget install Microsoft.devtunnel ``` - **Cause:** `devtunnel` binary not in PATH. - **Fix:** - **Windows:** `winget install Microsoft.devtunnel` - **macOS:** `brew install devtunnel` - **Linux:** Download from https://aka.ms/devtunnels/download - --- - ### `Tunnel failed: devtunnel host exited with code 1` - **Symptom:** ``` ⚠ Tunnel failed: devtunnel host exited with code 1 Running in local-only mode. ``` - **Cause:** Not authenticated with devtunnel. - **Fix:** ```bash devtunnel user login ``` Sign in with your Microsoft or GitHub account, then retry. - --- - ### WebSocket connection refused - **Symptom:** Browser console shows `WebSocket connection to 'wss://...' failed: Error during WebSocket handshake` - **Cause:** Session token mismatch or session expired. - **Fix:** 1. **Refresh the QR code:** Stop `squad rc` (Ctrl+C) and restart 2. **Check expiry:** Sessions expire after 4 hours. Restart the bridge. 3. **Verify token:** Ensure you're using the URL from the QR code exactly as printed. - --- - ### Copilot responses are slow or not appearing - **Symptom:** You send a message but see no response for 20+ seconds. - **Cause:** Copilot's MCP servers are still loading (first 15-20s after `squad rc` starts). - **Expected behavior:** ``` Spawning copilot --acp (MCP servers loading ~15-20s)... ✓ Copilot ACP passthrough active ``` - **Fix:** Wait ~20 seconds after seeing the "Spawning copilot" message. Copilot is loading its Model Context Protocol servers (GitHub, Bing, etc.) and won't respond until ready. - --- - ### `[Copilot passthrough not active] Echo: ...` - **Symptom:** Responses are prefixed with `[Copilot passthrough not active]`. - **Cause:** Copilot CLI failed to spawn (binary missing, unsupported OS, or crashed). - **Fix:** 1. Verify Copilot CLI: `copilot --version` 2. Check logs for "Spawning copilot" errors 3. On Windows, ensure `copilot.exe` is at `C:\ProgramData\global-npm\node_modules\@github\copilot\node_modules\@github\copilot-win32-x64\copilot.exe` (line 185-189 in `rc.ts` for hardcoded fallback) - --- - ### Port already in use - **Symptom:** ``` Error: listen EADDRINUSE: address already in use 127.0.0.1:3000 ``` - **Cause:** Another process is using the default port. - **Fix:** ```bash squad rc --port 0 # Auto-assign free port # OR squad rc --port 8080 # Specific port ``` - --- - ### Can't connect from mobile (tunnel URL works on desktop) - **Symptom:** Desktop browser connects fine, mobile shows "Connection refused" or "Unauthorized". - **Cause 1:** Tunnel auth requires same Microsoft/GitHub account on mobile. **Fix:** Sign in to your MS/GitHub account in your mobile browser, then open the tunnel URL. - **Cause 2:** Tunnel expired (24-hour TTL). **Fix:** Restart `squad rc --tunnel` to create a new tunnel. - --- - ## See Also - -- [squad start](./remote-control.md) — PTY mirror mode for terminal streaming +- `squad start` — PTY mirror mode for terminal streaming (see the CLI reference for command details) - [CLI Reference](../reference/cli.md) — All squad commands - [Remote Control Protocol](https://github.com/bradygaster/squad/blob/main/packages/squad-sdk/src/remote/protocol.ts) — Wire protocol types - [RemoteBridge SDK](https://github.com/bradygaster/squad/blob/main/packages/squad-sdk/src/remote/bridge.ts) — Server implementation diff --git a/docs/src/content/docs/features/state-backends.md b/docs/src/content/docs/features/state-backends.md index 6d8a5ed87..f6019b616 100644 --- a/docs/src/content/docs/features/state-backends.md +++ b/docs/src/content/docs/features/state-backends.md @@ -1,125 +1,82 @@ # State Backends - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - Squad supports multiple **state backends** for storing `.squad/` state (decisions, agent memories, session logs, skills). Each backend determines _where_ and _how_ this data is persisted — without changing how agents interact with it. Once configured, everything is automatic. - --- - ## The Problem - By default, Squad stores `.squad/` state as regular files in your working tree. This works for solo workflows but has real trade-offs for teams: - - **Branch pollution:** `.squad/` files appear in diffs and PRs - **Branch-switch loss:** State can be lost when switching branches (if not committed) - **Merge conflicts:** Multiple team members modifying `.squad/` files creates frequent conflicts - State backends solve this by moving `.squad/` data into Git-native structures that live outside the working tree — keeping your PRs clean and your state safe across branches. - --- - ## Getting Started - ### New project — choose a backend during init - ```bash # Default (local — files in .squad/, same as always) squad init - # Orphan branch (state on a dedicated squad-state branch) squad init --state-backend orphan - # Two-layer (recommended for teams — orphan branch + git notes) squad init --state-backend two-layer ``` - > **Default backend:** if you don't pass `--state-backend`, Squad uses the > `local` backend (regular `.squad/` files in your working tree). The > `orphan` and `two-layer` backends are opt-in — you must pass the explicit > flag during `squad init` or `squad upgrade` to activate them. - The backend is stored in `.squad/config.json` — you never need to pass it again. All subsequent commands (`squad watch`, interactive sessions, etc.) read from config automatically. - ### Existing project — migrate with upgrade - ```bash # Migrate from local to orphan or two-layer squad upgrade --state-backend two-layer - # Or: squad upgrade --state-backend orphan ``` - This migrates existing state, creates the orphan branch, and installs git hooks for automatic multi-user sync. - ### What gets installed automatically - When you choose `orphan` or `two-layer`: - **Git hooks** (pre-push, post-merge, post-checkout, post-rewrite, pre-commit, post-commit) are installed in `.git/hooks/` - The sync hooks (pre-push, post-merge, post-checkout, post-rewrite) keep the `squad-state` branch in sync automatically when you push/pull - The **pre-commit** hook guards against accidentally staging two-layer mutable state (decisions, histories, casting, routing) into a working-tree commit — it refuses with an explanation if detected - The **post-commit** hook flushes any pending two-layer state onto the orphan branch after each commit (best-effort, never blocks) - Hooks chain with existing hooks (husky, etc.) — nothing is overwritten - --- - ## Available Backends - ### Local (default) - State lives as regular files in `.squad/` inside the working tree. This is the standard behavior — what you get out of the box. - **Pros:** - Simple and familiar — files on disk - Easy to inspect, edit, and commit - Works with all Git tools and IDEs - **Cons:** - Files appear in `git status` and diffs - Branch switches can lose uncommitted state - **Best for:** Most projects, especially when you want squad state committed alongside code. - --- - ### Git Notes (Deprecated → Two-Layer) - > ⚠️ **Deprecated:** The standalone `git-notes` backend has been removed as a user-facing option. If your config still references `git-notes`, it will be **automatically migrated to `two-layer`** at runtime. > > **Why:** Standalone git-notes stores all state as a single JSON blob on the root commit. This fundamentally cannot handle concurrent writes from multiple team members — `git notes merge` cannot merge opaque JSON, causing silent data loss. > > **Replacement:** The `two-layer` backend uses git notes as best-effort commit annotations (the "why" layer) while storing durable state on an orphan branch with per-file granularity (the "state" layer). This gives you the clean working tree of git-notes with the team-safe mergeability of the orphan approach. - --- - ### Orphan Branch - State lives on a dedicated orphan branch (`squad-state` by default). The branch has no common history with your main branches — it's a completely separate tree used only for squad data. - **How it works:** - An orphan branch `squad-state` is created automatically on first write - Each state file is stored as a blob in the branch's tree - Reads use `git show squad-state:`, writes create new commits on the branch - The branch is never checked out — all operations use Git plumbing commands - **Pros:** - Working tree stays clean - State is versioned with full Git history - Easy to inspect: `git log squad-state`, `git show squad-state:decisions.md` - Pushes/fetches with normal branch operations - **Cons:** - An extra branch in your repository - Slightly more complex than `local` for debugging - Concurrent writes to the branch can conflict (single-writer recommended) - **Best for:** Teams who want Git-versioned state without polluting the main branch history. - --- - ## Configuration - The state backend is set once (during `squad init` or `squad upgrade`) and stored in `.squad/config.json`: - ```json { "version": 1, @@ -127,23 +84,15 @@ The state backend is set once (during `squad init` or `squad upgrade`) and store "stateBackend": "two-layer" } ``` - All squad commands read from this file automatically. You don't need to pass `--state-backend` on every invocation. - > **Note:** If no `stateBackend` field exists, the default is `local` (current behavior, no change). - ### Fallback Behavior - If a non-default backend fails to initialize (e.g., Git is not available, permissions issue), Squad automatically falls back to the **local** backend with a warning: - ``` Warning: State backend 'two-layer' failed: . Falling back to 'local'. ``` - --- - ## Comparison - | Feature | Local | Orphan Branch | Two-Layer | |---------|-------|---------------|-----------| | Working tree clean | ❌ | ✅ | ✅ | @@ -155,54 +104,38 @@ Warning: State backend 'two-layer' failed: . Falling back to 'local'. | Sharing across clones | Normal push/pull | Normal branch push/pull | Normal branch push/pull | | Concurrent-write safe | ✅ (filesystem) | ⚠️ (single writer) | ✅ (per-file merge) | | Team-safe (multi-user) | ❌ (merge conflicts) | ⚠️ (needs coordination) | ✅ (designed for teams) | - --- - ## Inspecting State - ### Local - ```bash cat .squad/decisions.md ls .copilot/skills/ ``` - ### Git Notes - ```bash # Show all state as JSON (anchored to root commit) git notes --ref=squad show $(git rev-list --max-parents=0 HEAD) - # Pretty-print git notes --ref=squad show $(git rev-list --max-parents=0 HEAD) | python -m json.tool ``` - ### Orphan Branch - ```bash # List all state files git ls-tree --name-only -r squad-state - # Read a specific file git show squad-state:decisions.md - # View commit history git log --oneline squad-state ``` - --- - ## SDK Usage - The state backend is available programmatically via the Squad SDK: - ```typescript import { resolveSquadState, resolveStateBackend, type StateBackend, } from '@bradygaster/squad-sdk'; - // Option 1: Full context resolution (recommended) // Resolves paths + backend from config + CLI override in one call const ctx = resolveSquadState(process.cwd(), 'two-layer'); @@ -211,7 +144,6 @@ if (ctx) { ctx.backend.append('log.md', 'New entry\n'); ctx.backend.delete('inbox/processed.md'); } - // Option 2: Backend-only resolution const backend: StateBackend = resolveStateBackend( '.squad', // squadDir @@ -220,9 +152,7 @@ const backend: StateBackend = resolveStateBackend( ); backend.write('decisions.md', '# Decisions\n...'); ``` - All backends implement the same `StateBackend` interface: - ```typescript interface StateBackend { read(relativePath: string): string | undefined; @@ -234,121 +164,82 @@ interface StateBackend { readonly name: string; } ``` - --- - ## Security - State backends include hardening against common injection attacks: - - **Path traversal:** `..` segments are rejected - **Null byte injection:** `\0` characters are rejected - **Newline injection:** `\n` and `\r` characters are rejected (prevents Git plumbing manipulation) - **Tab injection:** `\t` characters are rejected (prevents mktree format corruption) - **Empty segments:** Double slashes (`//`) are rejected - All validation is centralized in `validateStateKey()` and applied uniformly across all backends. - --- - ## Content Fidelity - All backends preserve content exactly as written — including trailing newlines, leading whitespace, and empty lines. This is critical for append-only files like `history.md` and `decisions.md` where multiple agents append entries over time. - The orphan and two-layer backends use raw `execFileSync` for content reads (without trimming) to ensure faithful round-trips. Git plumbing helpers that trim output are only used for non-content operations like `rev-parse` and `ls-tree`. - --- - ## Worktree Awareness - When running in a git worktree, `resolveSquadState()` uses `git rev-parse --show-toplevel` to determine the actual current worktree root — not the parent of `.squad/`. This ensures that git-native backends (orphan, two-layer) operate in the correct repository context, even when `.squad/` is resolved from the main checkout via the worktree fallback strategy. - --- - ## Notes - - State backends are **opt-in** — the default is `local` (no behavior change) - All backends implement the same interface — agents don't know or care which backend is active - Empty directories are automatically pruned after the last file is deleted (orphan backend) - The `external` backend type exists as a stub for future external storage (see [External State](./external-state)) - State backends are available in the **insider** release channel (`@bradygaster/squad-cli@insider`) - 63 unit tests + 46 E2E tests cover all backends including security hardening, content fidelity, and directory pruning - --- - ## Using with Copilot CLI Sessions - The SDK's `StateBackend` interface handles programmatic state for Squad internals, but Copilot agents also need a way to write commit-scoped context — decisions, research, reviews — without creating `.squad/` file changes that pollute PRs. - The solution: agents use **git notes CLI commands** directly for mutable, commit-scoped state. The `notes-protocol.md` template defines the contract. - ### How it works - 1. Each agent writes to its own namespace: `refs/notes/squad/{agent-name}` 2. Notes are JSON with required fields: `agent`, `timestamp`, `type`, `content` 3. Notes are invisible in PR diffs — they travel as git refs, not files 4. Ralph promotes notes with `"promote_to_permanent": true` to `decisions.md` after merge 5. If a PR is rejected, notes on those commits are NOT promoted (desired behavior) - ### Setup - When you enable `stateBackend: "two-layer"` or `stateBackend: "orphan"`, copy the notes protocol and helper scripts into your project: - ```bash # Copy from Squad's templates (after squad init) cp .squad/templates/notes-protocol.md .squad/notes-protocol.md cp -r .squad/templates/scripts/notes/ scripts/notes/ - # One-time git config for notes fetch ./scripts/notes/fetch.ps1 -Setup ``` - ### Copilot Instructions Integration - Add the following to your `.github/copilot-instructions.md` (or `.copilot/copilot-instructions.md`) to teach agents the notes protocol: - ````markdown ## Git Notes — State Protocol - **Every agent uses git notes for commit-scoped state.** Do not write to `.squad/decisions.md` or other `.squad/` files directly on feature branches. - ### On every work round - 1. **Start**: `git fetch origin 'refs/notes/*:refs/notes/*'` 2. **When making a decision**: Write it as a note on the relevant commit 3. **End**: `git push origin 'refs/notes/*:refs/notes/*'` - ### Write pattern - ```bash git notes --ref=squad/{your-agent} add \ -m '{"agent":"{Name}","timestamp":"{ISO8601}","type":"decision","content":"..."}' \ HEAD ``` - Use `git notes append` if a note already exists on the commit. - ### Key rules - - Write only to your own namespace (`refs/notes/squad/{your-name}`) - Notes MUST be valid JSON - Set `"promote_to_permanent": true` for decisions that should outlast the branch - Set `"archive_on_close": true` for research worth keeping even if the PR is rejected - Fetch before write, push after your round - See `.squad/notes-protocol.md` for the full contract. ```` - ### Example: Agent writes a decision, Ralph promotes it - 1. **Data** makes an architecture choice and writes a note: ```bash git notes --ref=squad/data add -m \ @@ -356,137 +247,89 @@ See `.squad/notes-protocol.md` for the full contract. HEAD git push origin 'refs/notes/*:refs/notes/*' ``` - 2. **PR merges** into the default branch. - 3. **Ralph** runs promotion on the next watch cycle: - Fetches all notes - Finds Data's note with `promote_to_permanent: true` on a merged commit - Appends the decision to `decisions.md` via the state backend - Notes on rejected PRs are silently ignored - ### Template files - When `stateBackend` is set to `two-layer` or `orphan`, the following templates are available: - | Template | Purpose | |----------|---------| | `notes-protocol.md` | The full agent contract for git notes | | `scripts/notes/fetch.ps1` | Fetch + setup refspec + merge after conflict | | `scripts/notes/write-note.ps1` | Agent helper — handles JSON, conflicts, push | - ### Automatic Coordinator Integration - **You don't need to manually add copilot-instructions.md snippets.** When `stateBackend` is set in `.squad/config.json`, the Squad coordinator (`squad.agent.md`) automatically adapts its agent spawn prompts: - | Backend | Agent reads | Agent writes | Scribe commits to | |---------|-------------|--------------|-------------------| | `local` | `.squad/` files on disk | `.squad/` files on disk | Working branch | | `orphan` | `.squad/` files on disk (synced) | `.squad/` files on disk | `squad-state` orphan branch (NOT working branch) | | `two-layer` | Git notes + orphan branch | Git notes via `write-note.ps1` + orphan | Pushes note refs + orphan branch | - **Config vs State distinction:** - **Static config** (charters, team.md, routing.md, casting/) — always on disk, all backends - **Mutable state** (history.md, decisions/inbox/, logs, orchestration-log/) — backend-dependent - The coordinator passes `STATE_BACKEND` into every agent spawn prompt. Agents receive backend-specific instructions for reading and writing state. Scribe receives backend-specific commit instructions. This is fully automatic — no user configuration beyond setting `stateBackend` in config.json is needed. - --- - ## Migrating an Existing Squad - Use `squad upgrade` to migrate — it handles everything: - ```bash squad upgrade --state-backend two-layer # or: squad upgrade --state-backend orphan ``` - This will: 1. Update `.squad/config.json` with the new backend 2. Create the `squad-state` orphan branch (if needed) 3. Install git hooks for automatic sync 4. Preserve all existing state - **What happens:** Existing `.squad/` files are migrated to the orphan branch and may be removed from the working tree on subsequent commits. New decisions and state writes go to the orphan branch (and git notes for two-layer). The pre-commit hook prevents you from accidentally re-committing mutable state files into the working tree. - ### Switching between orphan and two-layer - Change `stateBackend` in `.squad/config.json`. The coordinator adapts on the next session. Both use the `squad-state` orphan branch, so existing state is preserved. Two-layer additionally enables git notes for commit-scoped annotations. - --- - ## Steady-state safety net - Once you've migrated to `orphan` or `two-layer`, two additional git hooks enforce the invariant that mutable state never lands in your working branch. - ### `pre-commit` — blocks state from entering the working tree - Before every commit, this hook scans the staged index for files that belong on the orphan branch: - - `.squad/decisions.md` - `.squad/agents/*/history.md` - `.squad/casting/` - `.squad/routing/` - If any of those paths are staged, the commit is refused with: - ``` ⚠ squad pre-commit: refusing to commit two-layer state into the working tree. Unstage the state files and let the post-commit hook sync them: git restore --staged .squad/decisions.md .squad/agents/*/history.md ``` - **Why files might reappear:** A tool, editor save, or agent code path that writes directly via `fs.writeFile` (bypassing `StateBackend`) will recreate the file on disk. Staging it and attempting a commit triggers this hook. - For the full recovery flow see [troubleshooting](#squad-pre-commit-refusing-to-commit-two-layer-state-into-the-working-tree). - ### `post-commit` — keeps the orphan branch current - After every successful commit, `squad sync --quiet` is called automatically. This pushes any pending state from the in-memory queue onto the `squad-state` branch, so the orphan branch stays up to date without manual intervention. - ### `SQUAD_SYNC_ACTIVE=1` bypass - Setting `SQUAD_SYNC_ACTIVE=1` in the environment causes both hooks to exit immediately without running. This is used **internally** by `squad sync` itself to avoid hook recursion. - > ⚠️ **Do not use `SQUAD_SYNC_ACTIVE=1` routinely.** Bypassing the pre-commit hook lets state files land in your working branch commits — exactly the situation `two-layer` is designed to prevent. Any PR created from that branch will carry squad state in the diff, defeating the clean-PR promise of the two-layer backend. Use the recovery flow instead. - ## Troubleshooting - ### "Pre-commit hook refused my commit" - **Cause:** You staged `.squad/` files that belong on the `squad-state` orphan branch (decisions.md, agent histories, casting/, routing/). The pre-commit hook blocks these to keep mutable state off your working-tree branches. - **Fix:** - ```bash # Unstage the offending paths git restore --staged .squad/decisions.md .squad/agents/*/history.md .squad/casting/ .squad/routing/ - # Then commit normally — only your code changes go through git commit ``` - **If you need to bypass** (e.g., during initial migration or manual repair): - ```bash SQUAD_SYNC_ACTIVE=1 git commit -m "manual state repair" ``` - > ⚠️ Only bypass when you understand why — the hook exists to prevent state from leaking into PRs. - ### "My state disappeared after switching branches" - **Cause:** You're using the default `local` backend. State files are branch-local. - **Fix:** Switch to `orphan` or `two-layer` backend. Both persist state across branches: - Orphan: state lives on a dedicated branch (accessible via `git show squad-state:`) - Two-layer: orphan branch + git notes for commit-scoped annotations - ### "State files are showing up in my PR" - **Cause:** Using `local` backend, or an agent accidentally committed state files on orphan/two-layer backend. - **Fix:** 1. If using local backend: switch to `orphan` or `two-layer` 2. If using orphan/two-layer: Scribe's State Leak Guard should catch this automatically. If it missed: @@ -494,11 +337,8 @@ SQUAD_SYNC_ACTIVE=1 git commit -m "manual state repair" git reset HEAD -- .squad/decisions.md .squad/agents/*/history.md .squad/log/ .squad/orchestration-log/ git checkout HEAD -- .squad/decisions.md .squad/agents/*/history.md ``` - ### "Orphan branch doesn't exist" - **Cause:** The `squad-state` branch hasn't been created yet. - **Fix:** Create it manually: ```bash git checkout --orphan squad-state @@ -507,45 +347,27 @@ mkdir .squad && echo "# Squad State" > .squad/README.md git add .squad/ && git commit -m "init: squad-state orphan branch" git checkout main ``` - Scribe will auto-create it on the next session if it doesn't exist (via git plumbing: `mktree`, `commit-tree`, `update-ref`). - ### "Git notes not found on root commit" - **Cause:** The agent wrote the note to HEAD instead of the root commit. - **Known issue:** Some agents write notes to the current HEAD instead of `$(git rev-list --max-parents=0 HEAD)`. The note still exists on the ref and is readable, but the root-commit anchor pattern isn't being followed precisely. - **Workaround:** The note is still accessible via `git notes --ref=squad/{agent} show {commit-sha}`. The ref itself (`refs/notes/squad/{agent}`) is visible from all branches regardless of which commit the note is on. - ### "Config.json doesn't have stateBackend" - **This is fine.** The default is `local` — the current behavior. No config change needed unless you want a different backend. - --- - ## Multi-User Synchronization - When multiple team members work on the same repo with Squad, the state backend determines how state stays in sync. - ### Local backend - Each user has their own `.squad/` files in the working tree. If committed, they merge like any other files — which means **merge conflicts are common** when two people modify decisions or histories simultaneously. This is the main reason teams choose orphan or two-layer backends. - ### Orphan backend - The `squad-state` branch is a normal Git branch. Synchronization works like any other branch: - ```bash # Before a squad session — pull latest state git fetch origin squad-state:squad-state - # After a squad session — push your state changes git push origin squad-state ``` - **Conflict handling:** If two users push to `squad-state` simultaneously, the second push will be rejected (non-fast-forward). Resolution: - ```bash git fetch origin squad-state:squad-state git checkout squad-state @@ -553,110 +375,70 @@ git merge origin/squad-state # resolve conflicts, then: git push origin squad-state git checkout main ``` - In practice, Squad's watch loop handles this automatically — Scribe's commit logic retries on push failure. - > **Tip:** For teams, consider protecting the `squad-state` branch with GitHub branch protection rules that allow force-push from the CI bot but require linear history from humans. - ### Two-layer backend - Two-layer uses **both** the orphan branch (same as above) **and** git notes. Notes are per-commit annotations that travel as refs: - ```bash # Fetch notes from the remote git fetch origin 'refs/notes/*:refs/notes/*' - # Push notes to the remote git push origin 'refs/notes/*:refs/notes/*' ``` - **Why this is team-safe:** Notes are scoped to individual commits — there are no merge conflicts because each commit has its own annotation namespace. The orphan branch stores the aggregated permanent state, and Ralph promotes note data to it after PRs merge. - ### Automatic fetch in `squad watch` - When `squad watch` starts, it automatically: 1. Fetches the `squad-state` branch (if orphan or two-layer) 2. Fetches `refs/notes/*` (if two-layer) 3. On each watch cycle, pushes any state changes back - **No manual sync is needed** when using `squad watch`. Manual sync is only needed if you're running one-off squad commands outside of watch mode. - ### Git config for automatic notes fetch - To make `git pull` automatically fetch notes, add this to `.git/config` (or use the setup script): - ```bash # One-time setup per clone git config --add remote.origin.fetch '+refs/notes/*:refs/notes/*' ``` - After this, every `git fetch origin` includes notes automatically. - --- - ## FAQ - ### What's the default state backend? - **`local`**. If you don't set `stateBackend` in `.squad/config.json`, Squad stores state as regular files in `.squad/` on your working branch. This is the simplest setup — no extra configuration needed. - ### When should I switch away from `local`? - Switch when any of these apply: - Your PRs are cluttered with `.squad/` file changes - You lose state when switching branches - Multiple team members are getting merge conflicts on `.squad/` files - You want squad state to be invisible in code reviews - ### Why would I choose `orphan` over `two-layer`? - **Choose `orphan` when you want simplicity.** It stores all state on a single dedicated branch. Easy to understand, inspect, and debug. One branch, one source of truth. - **Choose `two-layer` when you need commit-scoped context.** Two-layer adds git notes — annotations attached to specific commits. This means: - A decision made on commit `abc123` stays linked to that commit - Ralph can decide whether to promote or discard decisions based on whether the PR was merged or rejected - Research notes on a rejected PR are automatically ignored (not promoted) - **Bottom line:** `orphan` is for teams who just want clean PRs. `two-layer` is for teams who want intelligent state lifecycle management (decisions that survive or die with their PRs). - ### Can I use `orphan` and later upgrade to `two-layer`? - Yes. Both use the same `squad-state` orphan branch for permanent state. Switching from `orphan` to `two-layer` simply enables the additional git notes layer. Your existing state is fully preserved. - ### What happens if two people run Squad simultaneously? - - **Local backend:** File-level merge conflicts when both push (just like any Git merge conflict). - **Orphan backend:** The second push to `squad-state` fails with a non-fast-forward error. Squad's watch loop retries automatically. In the worst case, you manually merge the branch. - **Two-layer backend:** Notes are per-commit, so they never conflict. The orphan branch layer has the same retry behavior as the orphan backend. - ### Does the `squad-state` branch show up in my PRs? - No. The `squad-state` branch is an **orphan branch** — it has no common ancestor with your main branch. GitHub doesn't include it in PR diffs. It's completely invisible in code reviews. - ### How do I inspect state on the orphan branch? - ```bash # List all state files git ls-tree --name-only -r squad-state - # Read a specific file git show squad-state:decisions.md - # View state history git log --oneline squad-state ``` - ### Does this work with GitHub Actions / CI? - Yes. If your CI/CD workflow needs to read squad state: - **Orphan backend:** `git fetch origin squad-state && git show squad-state:` - **Two-layer:** Same as orphan, plus `git fetch origin 'refs/notes/*:refs/notes/*'` for notes - **Local backend:** State is on the working branch — just read `.squad/` files directly - ### What if I forget to push the `squad-state` branch? - State stays local to your machine. Other team members won't see your latest decisions or agent histories until you push. This is no different from forgetting to push any other branch — Git is distributed, and state only syncs when you push/fetch. - ### Can the `squad-state` branch be deleted safely? - **No.** Deleting it loses all permanent squad state (decisions, agent histories, logs). Treat it like your main branch — push it to the remote and don't delete it. You can recover from a local deletion by re-fetching from the remote: `git fetch origin squad-state:squad-state`. diff --git a/docs/src/content/docs/features/storage-provider.md b/docs/src/content/docs/features/storage-provider.md index 1888262d3..b04bff0f6 100644 --- a/docs/src/content/docs/features/storage-provider.md +++ b/docs/src/content/docs/features/storage-provider.md @@ -1,209 +1,152 @@ -# Storage Provider - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -**Try this to switch storage backends:** -``` -Use SQLite for persistent team state -``` - -**Try this to see where session data is stored:** -``` -Where is my squad data stored? -``` - -**Try this to build a cloud storage backend:** -``` -Create a StorageProvider for Azure Blob Storage -``` - -All of Squad's data — sessions, decisions, agent memories, event logs — flows through a pluggable storage interface. Pick the provider that matches your deployment: filesystem, database, or cloud. - ---- - -## What is StorageProvider? - -`StorageProvider` is Squad's I/O contract. Every read, write, delete, and directory operation goes through this interface. This decoupling means: - -- **Local development** uses the filesystem. Sessions and state live in `.squad/`. -- **Testing** uses in-memory storage. No disk I/O, no test pollution. -- **Production** can use SQLite, cloud storage, or a database. -- **Multi-team deployments** can route different squads to different backends. - -The interface is minimal — just 12 core async methods[^1]: - -```typescript -read(filePath: string): Promise -write(filePath: string, data: string): Promise -append(filePath: string, data: string): Promise -exists(filePath: string): Promise -list(dirPath: string): Promise -delete(filePath: string): Promise -deleteDir(dirPath: string): Promise -isDirectory(targetPath: string): Promise -mkdir(dirPath: string, options?: { recursive?: boolean }): Promise -rename(oldPath: string, newPath: string): Promise -copy(srcPath: string, destPath: string): Promise -stat(targetPath: string): Promise -``` - ---- - -## Built-in Providers - -### FSStorageProvider - -**What it is:** Node.js filesystem wrapper. Standard, portable, no setup. - -**When to use it:** -- Local development -- Single-machine deployments -- Monorepo setups where squad data is part of the project - -**How it works:** Maps all Squad paths to disk directories. Create parent directories on write. Returns `undefined` on ENOENT instead of throwing. - -### InMemoryStorageProvider - -**What it is:** HashMap-backed, zero I/O. - -**When to use it:** -- Unit tests for agent logic -- Ephemeral sessions that don't need persistence -- CI environments where `.squad/` is discarded - -**How it works:** All paths stored in memory as POSIX strings. Fast, isolated, perfect for test fixtures. - -### SQLiteStorageProvider - -**What it is:** SQLite-backed provider using sql.js (WASM). Single `.db` file, cross-platform. - -**When to use it:** -- Small to medium teams -- Need a portable database file -- Windows/Linux/Mac without platform-specific binaries - -**How it works:** Stores file content in a `files(path, content, updated_at)` table. sql.js runs entirely in WASM — no native compilation, no dependencies. - -| Feature | FSStorageProvider | InMemoryStorageProvider | SQLiteStorageProvider | -|---------|---|---|---| -| Persistence | Disk | None (ephemeral) | Single `.db` file | -| Setup | None | None | 1 import + init | -| Speed | Disk I/O latency | Instant (memory) | Query overhead | -| Portability | Windows/Linux/Mac | Yes | Yes | -| Suitable for | Development, production | Tests | Portable deployments | - ---- - -## Create a Custom Provider - -Implement the `StorageProvider` interface to plug in any backend. Here's a skeleton: - -```typescript -import type { StorageProvider, StorageStats } from '@bradygaster/squad-sdk'; - -export class MyCustomStorageProvider implements StorageProvider { - async read(filePath: string): Promise { - // Fetch from your backend (S3, Cosmos, etc.) - // Return undefined if not found - } - - async write(filePath: string, data: string): Promise { - // Write to your backend - // Create parent directories as needed - } - - async append(filePath: string, data: string): Promise { - // Append to a file, creating it if missing - } - - async exists(filePath: string): Promise { - // Check if path exists - } - - async list(dirPath: string): Promise { - // Return entry names in directory - // Return empty array if directory doesn't exist - } - - async delete(filePath: string): Promise { - // Delete a file (no-op if missing) - } - - async deleteDir(dirPath: string): Promise { - // Recursively delete directory (no-op if missing) - } - - async isDirectory(targetPath: string): Promise { - // Return true if path is a directory - } - - async mkdir(dirPath: string, options?: { recursive?: boolean }): Promise { - // Create directory - } - - async rename(oldPath: string, newPath: string): Promise { - // Move/rename file or directory - } - - async copy(srcPath: string, destPath: string): Promise { - // Copy file, creating parent dirs for destination - } - - async stat(targetPath: string): Promise { - // Return file metadata: size, mtime, isDirectory - // Return undefined if path doesn't exist - } - - // Sync variants (deprecated, but still required for Wave 1 compat) - readSync(filePath: string): string | undefined { /* ... */ } - writeSync(filePath: string, data: string): void { /* ... */ } - appendSync(filePath: string, data: string): void { /* ... */ } - existsSync(filePath: string): boolean { /* ... */ } - listSync(dirPath: string): string[] { /* ... */ } - deleteSync(filePath: string): void { /* ... */ } - deleteDirSync(dirPath: string): void { /* ... */ } - isDirectorySync(targetPath: string): boolean { /* ... */ } - mkdirSync(dirPath: string, options?: { recursive?: boolean }): void { /* ... */ } - renameSync(oldPath: string, newPath: string): void { /* ... */ } - copySync(srcPath: string, destPath: string): void { /* ... */ } - statSync(targetPath: string): StorageStats | undefined { /* ... */ } -} -``` - -Pass it to the runtime: - -```typescript -import { SquadClient } from '@bradygaster/squad-sdk'; - -const client = new SquadClient({ - storageProvider: new MyCustomStorageProvider(), - teamRoot: '.squad', -}); -``` - -See `storage-provider-azure` and `storage-provider-sqlite` samples for complete, production-ready implementations. - -[^1]: The full interface also includes 12 deprecated synchronous variants (`readSync`, `writeSync`, `appendSync`, `existsSync`, `listSync`, `deleteSync`, `deleteDirSync`, `isDirectorySync`, `mkdirSync`, `renameSync`, `copySync`, `statSync`) — 24 methods total. The sync methods exist for backward compatibility and will be removed in Wave 2. New code should use the async methods exclusively. - ---- - -## Choose the Right Provider - -| Goal | Provider | -|------|----------| -| **Local development** | FSStorageProvider | -| **Unit testing agents** | InMemoryStorageProvider | -| **Small team, portable DB** | SQLiteStorageProvider | -| **Scale across multiple machines** | Custom provider (your database, blob store, or message queue) | -| **Azure Blob Storage** | Use `storage-provider-azure` sample as reference | -| **DynamoDB, Firestore, etc.** | Implement StorageProvider — the interface maps cleanly | - ---- - -## Sample Projects - -- **storage-provider-sqlite** — Complete SQLite implementation using sql.js -- **storage-provider-azure** — Azure Blob Storage backend with connection pooling - -Both live in `/samples` and demonstrate patterns for production providers. +# Storage Provider +**Try this to switch storage backends:** +``` +Use SQLite for persistent team state +``` +**Try this to see where session data is stored:** +``` +Where is my squad data stored? +``` +**Try this to build a cloud storage backend:** +``` +Create a StorageProvider for Azure Blob Storage +``` +All of Squad's data — sessions, decisions, agent memories, event logs — flows through a pluggable storage interface. Pick the provider that matches your deployment: filesystem, database, or cloud. +--- +## What is StorageProvider? +`StorageProvider` is Squad's I/O contract. Every read, write, delete, and directory operation goes through this interface. This decoupling means: +- **Local development** uses the filesystem. Sessions and state live in `.squad/`. +- **Testing** uses in-memory storage. No disk I/O, no test pollution. +- **Production** can use SQLite, cloud storage, or a database. +- **Multi-team deployments** can route different squads to different backends. +The interface is minimal — just 12 core async methods[^1]: +```typescript +read(filePath: string): Promise +write(filePath: string, data: string): Promise +append(filePath: string, data: string): Promise +exists(filePath: string): Promise +list(dirPath: string): Promise +delete(filePath: string): Promise +deleteDir(dirPath: string): Promise +isDirectory(targetPath: string): Promise +mkdir(dirPath: string, options?: { recursive?: boolean }): Promise +rename(oldPath: string, newPath: string): Promise +copy(srcPath: string, destPath: string): Promise +stat(targetPath: string): Promise +``` +--- +## Built-in Providers +### FSStorageProvider +**What it is:** Node.js filesystem wrapper. Standard, portable, no setup. +**When to use it:** +- Local development +- Single-machine deployments +- Monorepo setups where squad data is part of the project +**How it works:** Maps all Squad paths to disk directories. Create parent directories on write. Returns `undefined` on ENOENT instead of throwing. +### InMemoryStorageProvider +**What it is:** HashMap-backed, zero I/O. +**When to use it:** +- Unit tests for agent logic +- Ephemeral sessions that don't need persistence +- CI environments where `.squad/` is discarded +**How it works:** All paths stored in memory as POSIX strings. Fast, isolated, perfect for test fixtures. +### SQLiteStorageProvider +**What it is:** SQLite-backed provider using sql.js (WASM). Single `.db` file, cross-platform. +**When to use it:** +- Small to medium teams +- Need a portable database file +- Windows/Linux/Mac without platform-specific binaries +**How it works:** Stores file content in a `files(path, content, updated_at)` table. sql.js runs entirely in WASM — no native compilation, no dependencies. +| Feature | FSStorageProvider | InMemoryStorageProvider | SQLiteStorageProvider | +|---------|---|---|---| +| Persistence | Disk | None (ephemeral) | Single `.db` file | +| Setup | None | None | 1 import + init | +| Speed | Disk I/O latency | Instant (memory) | Query overhead | +| Portability | Windows/Linux/Mac | Yes | Yes | +| Suitable for | Development, production | Tests | Portable deployments | +--- +## Create a Custom Provider +Implement the `StorageProvider` interface to plug in any backend. Here's a skeleton: +```typescript +import type { StorageProvider, StorageStats } from '@bradygaster/squad-sdk'; +export class MyCustomStorageProvider implements StorageProvider { + async read(filePath: string): Promise { + // Fetch from your backend (S3, Cosmos, etc.) + // Return undefined if not found + } + async write(filePath: string, data: string): Promise { + // Write to your backend + // Create parent directories as needed + } + async append(filePath: string, data: string): Promise { + // Append to a file, creating it if missing + } + async exists(filePath: string): Promise { + // Check if path exists + } + async list(dirPath: string): Promise { + // Return entry names in directory + // Return empty array if directory doesn't exist + } + async delete(filePath: string): Promise { + // Delete a file (no-op if missing) + } + async deleteDir(dirPath: string): Promise { + // Recursively delete directory (no-op if missing) + } + async isDirectory(targetPath: string): Promise { + // Return true if path is a directory + } + async mkdir(dirPath: string, options?: { recursive?: boolean }): Promise { + // Create directory + } + async rename(oldPath: string, newPath: string): Promise { + // Move/rename file or directory + } + async copy(srcPath: string, destPath: string): Promise { + // Copy file, creating parent dirs for destination + } + async stat(targetPath: string): Promise { + // Return file metadata: size, mtime, isDirectory + // Return undefined if path doesn't exist + } + // Sync variants (deprecated, but still required for Wave 1 compat) + readSync(filePath: string): string | undefined { /* ... */ } + writeSync(filePath: string, data: string): void { /* ... */ } + appendSync(filePath: string, data: string): void { /* ... */ } + existsSync(filePath: string): boolean { /* ... */ } + listSync(dirPath: string): string[] { /* ... */ } + deleteSync(filePath: string): void { /* ... */ } + deleteDirSync(dirPath: string): void { /* ... */ } + isDirectorySync(targetPath: string): boolean { /* ... */ } + mkdirSync(dirPath: string, options?: { recursive?: boolean }): void { /* ... */ } + renameSync(oldPath: string, newPath: string): void { /* ... */ } + copySync(srcPath: string, destPath: string): void { /* ... */ } + statSync(targetPath: string): StorageStats | undefined { /* ... */ } +} +``` +Pass it to the runtime: +```typescript +import { SquadClient } from '@bradygaster/squad-sdk'; +const client = new SquadClient({ + storageProvider: new MyCustomStorageProvider(), + teamRoot: '.squad', +}); +``` +See `storage-provider-azure` and `storage-provider-sqlite` samples for complete, production-ready implementations. +[^1]: The full interface also includes 12 deprecated synchronous variants (`readSync`, `writeSync`, `appendSync`, `existsSync`, `listSync`, `deleteSync`, `deleteDirSync`, `isDirectorySync`, `mkdirSync`, `renameSync`, `copySync`, `statSync`) — 24 methods total. The sync methods exist for backward compatibility and will be removed in Wave 2. New code should use the async methods exclusively. +--- +## Choose the Right Provider +| Goal | Provider | +|------|----------| +| **Local development** | FSStorageProvider | +| **Unit testing agents** | InMemoryStorageProvider | +| **Small team, portable DB** | SQLiteStorageProvider | +| **Scale across multiple machines** | Custom provider (your database, blob store, or message queue) | +| **Azure Blob Storage** | Use `storage-provider-azure` sample as reference | +| **DynamoDB, Firestore, etc.** | Implement StorageProvider — the interface maps cleanly | +--- +## Sample Projects +- **storage-provider-sqlite** — Complete SQLite implementation using sql.js +- **storage-provider-azure** — Azure Blob Storage backend with connection pooling +Both live in `/samples` and demonstrate patterns for production providers. diff --git a/docs/src/content/docs/features/team-setup.md b/docs/src/content/docs/features/team-setup.md index eb1ec7fe3..b3ef0b7f7 100644 --- a/docs/src/content/docs/features/team-setup.md +++ b/docs/src/content/docs/features/team-setup.md @@ -1,44 +1,28 @@ # Team Setup & Init Mode - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to initialize for a specific stack:** ``` Set up a team for a React + Node.js API with PostgreSQL ``` - **Try this to expand capabilities:** ``` Add a security specialist to the team ``` - **Try this to view the roster:** ``` Show me the current team roster ``` - Squad analyzes your project and proposes a team roster with 3-7 members tailored to your stack. You can accept as-is, customize during setup, or modify the team anytime after. - --- - ## How Init Works - When you first run Squad in a repository, it doesn't impose a team — it proposes one. The init flow analyzes your project, suggests roles and members, waits for your confirmation, then creates the `.squad/` directory structure and installs the crew. - ### Character Casting - By default, Squad uses the **CastingEngine** to assign agent names from fictional universes (The Usual Suspects, Ocean's Eleven, etc.). The LLM proposes roles and team composition; the engine allocates curated character names, personalities, and backstories from the selected universe. Use `squad init --roles` to opt into the base role catalog (Lead, Backend, Frontend, Tester) instead of universe casting. - ## How Init Works - 1. **Discovery** — Squad scans your repository: language distribution, file structure, test frameworks, dependencies, existing workflows. 2. **Proposal** — Based on what it finds, Squad proposes a team roster with 3-7 members and their roles. 3. **Confirmation** — You review the proposal and can accept as-is, add members, remove members, or change roles. 4. **Creation** — Squad writes `.squad/team.md`, creates agent directories under `.squad/agents/{member}/`, and sets up the coordinator. - ### File Structure Created - ``` .squad/ ├── team.md # Team roster @@ -59,9 +43,7 @@ By default, Squad uses the **CastingEngine** to assign agent names from fictiona ├── orchestration-log/ # Coordinator state └── casting/ # Universe assignments ``` - ## Customizing During Init - | What you say | What happens | |--------------|--------------| | "Accept" / "Looks good" | Creates team as proposed | @@ -69,29 +51,20 @@ By default, Squad uses the **CastingEngine** to assign agent names from fictiona | "Remove the tester" | Drops tester from the team | | "Change backend to Rust specialist" | Adjusts role focus for that member | | "Make Fenster the frontend lead" | Assigns specific name to role | - ## Customizing After Init - You can modify `.squad/team.md` directly or ask the coordinator: - > "Add a security specialist to the team" - The coordinator will: 1. Cast a new member from the universe 2. Create their agent directory and charter 3. Update `team.md` and `routing.md` - > "Remove McManus from the team" - The coordinator will: 1. Remove the member from `team.md` 2. Archive their agent directory (moves to `.squad/agents/.archived/{member}/`) 3. Update routing rules - ## Default Team Composition - For most projects, Squad proposes: - | Role | When Included | |------|--------------| | **Lead** | Always — triages, reviews, unblocks | @@ -101,38 +74,29 @@ For most projects, Squad proposes: | **Frontend** | If React/Vue/Svelte/Angular detected | | **Backend** | If API routes, database code, or server framework detected | | **Scribe** | Always — decision logger | - ## Upgrade vs. Init - | Command | When to Use | |---------|------------| | `init` | First-time setup in a new repository | | `upgrade` | Existing `.squad/` — updates templates, adds new members, migrates config | - Running `init` on an existing Squad repository prompts for upgrade mode automatically. - ## Sample Prompts - ``` Start a new Squad team for this project ``` Triggers init mode. Squad analyzes the repository and proposes a team. - ``` Add a database specialist to the team ``` Adds a new member post-init. Coordinator casts from universe, creates charter, updates routing. - ``` Remove the designer role — we don't need it ``` Removes a team member. Archives their directory and updates team.md. - ``` Show me the current team roster ``` Displays team.md with all members, roles, and capabilities. - ``` Change the tester to focus on integration tests instead of unit tests ``` diff --git a/docs/src/content/docs/features/teams-comms.md b/docs/src/content/docs/features/teams-comms.md index 533cefddb..4da0b7eb0 100644 --- a/docs/src/content/docs/features/teams-comms.md +++ b/docs/src/content/docs/features/teams-comms.md @@ -2,19 +2,11 @@ title: Microsoft Teams Comms Adapter description: Bidirectional chat integration between Squad and Microsoft Teams via Microsoft Graph API — 1:1 chats and channel messaging with PKCE browser auth or device code fallback. --- - # Microsoft Teams Comms Adapter - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - The Teams adapter lets your squad post updates and read replies through Microsoft Teams, alongside the existing file-based, email, and other comm channels. It ships in `@bradygaster/squad-sdk` as a `CommunicationAdapter` implementation and uses Microsoft Graph API for both 1:1 chats and channel messaging. - > **⚠️ Breaking change in v0.10:** `createCommunicationAdapter` is now async (returns `Promise`). Callers must `await` the result. - --- - ## What you can do with it - | Action | Supported | |--------|-----------| | Post a message to a 1:1 chat | ✅ | @@ -23,20 +15,14 @@ The Teams adapter lets your squad post updates and read replies through Microsof | Post rich content (Adaptive Cards, attachments) | Partial (text + basic formatting) | | Notify on agent-completed work | ✅ (via squad watch / notification routing) | | Two-way conversation with an agent in Teams | ✅ (poll-based, not push) | - The adapter is one of several `CommunicationAdapter` implementations — see [Notifications](/squad/docs/features/notifications/) for the broader notification system. - --- - ## Authentication flow - The adapter tries auth methods in this order, falling through on failure: - 1. **Cached token** — looks for a previously-saved token in the OS credential store 2. **Refresh token** — if cached refresh token is valid, silently re-issues an access token 3. **Browser PKCE** — opens a browser for the user to sign in; uses Authorization Code with PKCE; 120-second timeout 4. **Device code** — fallback when no browser is available (CI, remote shell); user enters a code on a different device - ``` $ squad notify teams --to user@example.com --message "Build complete" 🔑 No cached token — opening browser for sign-in... @@ -44,22 +30,15 @@ $ squad notify teams --to user@example.com --message "Build complete" ✓ Token cached. Sending message... ✓ Posted to user@example.com ``` - The token cache persists across sessions. After the first sign-in, subsequent runs are silent unless the refresh token expires. - --- - ## Configuration - The adapter requires a Microsoft Entra (Azure AD) app registration with permissions for: - - `Chat.ReadWrite` (1:1 chat operations) - `ChannelMessage.Send` (channel posts) - `ChannelMessage.Read.All` (read channel replies) - `User.Read` (basic profile) - Configure in `.squad/config.json`: - ```json { "comms": { @@ -72,19 +51,13 @@ Configure in `.squad/config.json`: } } ``` - The `redirectUri` is the local-only OAuth callback for browser PKCE — it never leaves your machine. - --- - ## Usage from the SDK - ```typescript import { createCommunicationAdapter } from '@bradygaster/squad-sdk/platform'; - // IMPORTANT: this is async now (breaking change in v0.10) const teams = await createCommunicationAdapter({ channel: 'teams' }); - // Post a message const post = await teams.postUpdate({ title: 'CI passed', @@ -92,36 +65,26 @@ const post = await teams.postUpdate({ category: 'pr-status', author: 'Squad', }); - // Poll for replies const replies = await teams.pollForReplies({ threadId: post.id, since: new Date(Date.now() - 60_000), }); ``` - --- - ## Limitations - - **Polling, not push.** The adapter polls for replies; it doesn't subscribe to a websocket. Reply latency is the poll interval (default 30s). - **No Adaptive Card builder.** You can send plain text and basic formatting today; for rich cards, use the underlying Graph SDK directly. - **No bot-framework integration.** This adapter uses delegated user permissions, not a bot account. Each user sees the message as posted by themselves (or the configured app identity), not by a "Squad bot". - **MSAL token cache shared across processes.** If you run multiple squads simultaneously with the same Entra app, they share the same cached token. Use distinct `tokenCachePath` if you need isolation. - --- - ## Security notes - - Tokens are stored in the OS credential store (Windows Credential Manager / macOS Keychain / Linux libsecret) where available, with a JSON file fallback at `tokenCachePath` - The browser PKCE callback listens on `127.0.0.1` only — never exposed to the network - The device code flow shows a verification URL + code; both are short-lived - The adapter does NOT log message content; only metadata (post id, recipient, timestamp) is recorded in any audit trail - --- - ## See also - - [Notifications](/squad/docs/features/notifications/) — the broader notification system - [Enterprise Platforms](/squad/docs/features/enterprise-platforms/) — Teams + ADO + other enterprise integrations - [Notification Level](/squad/docs/features/notification-level/) — controlling noise across all channels diff --git a/docs/src/content/docs/features/tiered-memory.md b/docs/src/content/docs/features/tiered-memory.md index acdfd6a61..48690491a 100644 --- a/docs/src/content/docs/features/tiered-memory.md +++ b/docs/src/content/docs/features/tiered-memory.md @@ -2,51 +2,34 @@ title: Tiered Memory — Hot / Cold / Wiki description: Three-tier agent memory model that cuts spawn context cost by 20-55% by separating fresh task context from archived history and durable reference docs. --- - # Tiered Memory — Hot / Cold / Wiki - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - **Problem:** Squad agents load their full `history.md` on every spawn. Production measurements show 34–74KB payloads per agent (8.8K–18.5K tokens), with 82–96% of that being "old noise" — context the current task doesn't need. - **Solution:** A three-tier memory model that loads only what each task actually requires, achieving 20–55% context reduction per spawn. - -Tiered Memory ships as a built-in skill at `.copilot/skills/tiered-memory/SKILL.md` and pairs with Scribe's existing 15KB-summarization rule (see [Memory & Knowledge](/squad/docs/concepts/memory-and-knowledge/)) to give large, long-running squads predictable context budgets. - +Tiered Memory ships as a built-in skill at `.github/skills/tiered-memory/SKILL.md`. In current releases, `squad init` and `squad upgrade` install it automatically. It pairs with Scribe's existing 15KB-summarization rule (see [Memory & Knowledge](/squad/docs/concepts/memory-and-knowledge/)) to give large, long-running squads predictable context budgets. --- - ## The three tiers - ### 🔥 Hot — Current Session Context - - **Size target:** ~2–4KB - **Loaded:** Always, on every spawn - **Contents:** Current task, active decisions made this session, immediate blockers, last 3–5 actions, who's being talked to - **Lifetime:** Current session only — Scribe promotes relevant parts to Cold at session end - **Purpose:** Immediate task context with zero latency and zero decision - ### ❄️ Cold — Summarized Cross-Session History - - **Size target:** ~8–12KB - **Loaded:** On demand — include only when the task explicitly needs history - **Contents:** Summarized past sessions, cross-session decisions, recurring patterns, unresolved issues - **Lifetime:** 30-day rolling window — older entries promoted to Wiki - **Purpose:** Answer *"what have we tried before?"* and *"what was decided?"* without replaying full transcripts - **How to include:** Pass `--include-cold` in the spawn template, or add a `## Cold Memory` section to the agent's instructions - ### 📚 Wiki — Durable Structured Knowledge - - **Size target:** variable (structured reference docs) - **Loaded:** Async write, selective read — only when the task requires domain knowledge - **Contents:** ADRs, agent charters, routing rules, stable conventions, external API contracts, platform constraints - **Lifetime:** Permanent until explicitly deprecated - **Purpose:** Authoritative reference (not history) — structured facts - **How to include:** Pass `--include-wiki` or reference specific wiki doc paths in the spawn template - --- - ## When to load each tier - | Situation | Hot | Cold | Wiki | |-----------|-----|------|------| | New task, no prior context needed | ✅ | ❌ | ❌ | @@ -55,44 +38,28 @@ Tiered Memory ships as a built-in skill at `.copilot/skills/tiered-memory/SKILL. | Designing something new in an established area | ✅ | ❌ | ✅ | | Onboarding a new team member | ✅ | ❌ | ✅ | | Investigating an architectural drift | ✅ | ✅ | ✅ | - The bias is to load LESS, not more. Cold and Wiki should be opt-in for each spawn based on whether the task description references the past or domain conventions. - --- - ## How Scribe maintains the tiers - Scribe's existing maintenance cycle (see [Memory & Knowledge](/squad/docs/concepts/memory-and-knowledge/)) is extended: - 1. **Hot drained at session end** — Scribe scans the session's hot memory, summarizes meaningful entries, appends them to Cold 2. **Cold aged into Wiki** — entries older than 30 days that contain structured facts (decisions, conventions, contracts) get promoted to Wiki 3. **Wiki authored deliberately** — Scribe never auto-creates Wiki entries from scratch; it only promotes Cold content that's already structured - --- - ## Production measurements - The skill's documentation cites measurements from a large production squad: - | Squad size | Before tiered | After tiered | Reduction | |------------|--------------|--------------|-----------| | 8 agents, 34KB total history | 8,800 tokens/spawn | 4,400 tokens/spawn | ~50% | | 14 agents, 74KB total history | 18,500 tokens/spawn | 8,300 tokens/spawn | ~55% | - The exact savings depend on what fraction of each agent's history is task-relevant. The 20–55% range is the measured spread across different team configurations. - --- - ## Caveats - - **The tier split is currently advisory** — the skill defines hot/cold/wiki semantics, but the spawn template doesn't yet enforce `--include-cold` / `--include-wiki` flags as part of the runtime contract. Adoption is per-team via spawn-template edits. - **Wiki has no UI** — there's no `squad wiki list` command yet. Entries live as files in `.squad/wiki/` (when teams create that directory) and the coordinator references them by path. - **Issue [#1268](https://github.com/bradygaster/squad/issues/1268) and [#1269](https://github.com/bradygaster/squad/issues/1269)** propose making Scribe enforce these tiers via the governed memory pipeline. Until those land, tier maintenance is best-effort. - --- - ## See also - - [Memory & Knowledge](/squad/docs/concepts/memory-and-knowledge/) — the broader memory architecture - [Skills](/squad/docs/features/skills/) — how built-in skills work - [Context Hygiene](/squad/docs/features/context-hygiene/) — related practices for keeping spawn context small diff --git a/docs/src/content/docs/features/upstream-inheritance.md b/docs/src/content/docs/features/upstream-inheritance.md index d9247c0e9..91d118609 100644 --- a/docs/src/content/docs/features/upstream-inheritance.md +++ b/docs/src/content/docs/features/upstream-inheritance.md @@ -1,210 +1,143 @@ # Upstream Inheritance - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - Upstream inheritance lets you declare external Squad sources (from repositories, local directories, or exports) and automatically inherit their context at session start. Share practices across teams, organizations, and projects without duplicating configuration. - ## How it works - At session start, the coordinator reads all declared upstreams from `upstream.json` and makes their context available to every agent: - - **Skills** — `.copilot/skills/*/SKILL.md` - **Decisions** — `.squad/decisions.md` - **Wisdom** — `.squad/identity/wisdom.md` - **Casting policy** — `.squad/casting/policy.json` - **Routing** — `.squad/routing.md` - **Resolution order:** Later entries override earlier ones. Layer upstreams from org → team → repo, with each level adding or overriding as needed. - **Source types:** - | Type | Example | Use case | |------|---------|----------| | **local** | `../org-practices/.squad/` | Sibling repo, shared drive, monorepo package | | **git** | `https://github.com/acme/platform-squad.git` | Public/private team repo (with credentials) | | **export** | `./exports/squad-export.json` | Snapshot for offline use or version pinning | - ## Quick start - **Local upstream:** - ```bash squad upstream add ../org-practices/.squad squad upstream list # org-practices → local: /path/to/org-practices/.squad (never synced) ``` - **Git upstream:** - ```bash squad upstream add https://github.com/acme/platform-squad.git --name platform --ref main squad upstream sync platform ``` - **Export snapshot:** - ```bash squad export-config --output ./exports/snapshot.json squad upstream add ./exports/snapshot.json --name snapshot ``` - ## Troubleshooting - ### Git clone or sync fails - Ensure the URL is correct and you have access. For private repos, use SSH (`git@github.com:owner/repo.git`) with your SSH key in ssh-agent, or use a GitHub PAT with `https://[PAT]@github.com/owner/repo.git`. - ### Local upstream not found - Verify the path exists: `ls ../shared/.squad`. Use absolute paths if relative paths fail. - ### Agents don't see inherited context - ```bash # Verify upstreams are configured squad upstream list - # Sync and validate sources squad upstream sync - # Restart your session (resolution happens at session start) ``` - For git upstreams, check `.squad/_upstream_repos/{name}` exists. - ### Cached clone out of date - ```bash squad upstream sync ``` - Then start a new session. - ### Conflicting upstreams - Later entries in `upstream.json` override earlier ones. Check order with `squad upstream list`. Reorder with `remove` + `add` if needed. - ## CLI Reference - ### `squad upstream add ` - Add a new upstream source. - **Signature:** ``` squad upstream add [--name ] [--ref ] ``` - **Arguments:** - `` — File path, git URL, or export JSON file. Squad auto-detects the type. - **Options:** - `--name ` — Display name (optional; defaults to repo/dir name) - `--ref ` — Git branch/tag (only for git sources; defaults to `main`) - **Examples:** - Local directory: ```bash squad upstream add ../shared-squad --name shared ``` - Git repository: ```bash squad upstream add https://github.com/acme/platform-squad.git --name platform --ref main ``` - Export file: ```bash squad upstream add ./exports/org-snapshot.json --name org-snapshot ``` - **What happens:** - Reads `upstream.json` from `.squad/` - Detects source type (local, git, export) - For git sources: auto-clones to `.squad/_upstream_repos/{name}` - Adds entry to `.squad/upstream.json` - For local/export: coordinator reads live at session start (no sync needed) - ### `squad upstream remove ` - Remove an upstream by name. - **Signature:** ``` squad upstream remove ``` - **Examples:** ```bash squad upstream remove platform ``` - **What happens:** - Removes entry from `.squad/upstream.json` - Deletes cached clone from `.squad/_upstream_repos/{name}` if it exists - ### `squad upstream list` - Show all configured upstreams. - **Signature:** ``` squad upstream list ``` - **Output example:** ``` Configured upstreams: - platform → git: https://github.com/acme/platform-squad.git (ref: main) (synced 2026-02-22) shared → local: /home/alice/shared-squad (never synced) snapshot → export: ./exports/org-snapshot.json (synced 2026-02-22) ``` - ### `squad upstream sync [name]` - Update cached clones for git upstreams, or validate paths for local/export upstreams. - **Signature:** ``` squad upstream sync [name] ``` - **Examples:** - Sync all: ```bash squad upstream sync ``` - Sync one: ```bash squad upstream sync platform ``` - **What happens:** - For **git** sources: `git pull --ff-only` on the cached clone, or re-clones if needed - For **local** sources: validates that the path exists - For **export** sources: validates that the file exists - Updates `last_synced` timestamp in `upstream.json` - ## SDK API Reference - The upstream module provides resolver functions for programmatic use. - ### Types - #### `UpstreamType` - ```typescript type UpstreamType = 'local' | 'git' | 'export'; ``` - #### `UpstreamSource` - A declared upstream from `upstream.json`: - ```typescript interface UpstreamSource { name: string; // Display name (e.g., "platform") @@ -215,21 +148,15 @@ interface UpstreamSource { last_synced: string | null; // Last successful sync } ``` - #### `UpstreamConfig` - The `upstream.json` file format: - ```typescript interface UpstreamConfig { upstreams: UpstreamSource[]; } ``` - #### `ResolvedUpstream` - Resolved content from a single upstream: - ```typescript interface ResolvedUpstream { name: string; @@ -241,50 +168,35 @@ interface ResolvedUpstream { routing: string | null; } ``` - #### `UpstreamResolution` - Result of resolving all upstreams: - ```typescript interface UpstreamResolution { upstreams: ResolvedUpstream[]; } ``` - ### Functions - #### `readUpstreamConfig(squadDir: string): UpstreamConfig | null` - Read and parse `upstream.json` from a squad directory. - **Returns:** `null` if file doesn't exist or is invalid. - **Example:** ```typescript import { readUpstreamConfig } from '@bradygaster/squad-sdk'; - const config = readUpstreamConfig('.squad'); if (config) { console.log(`Found ${config.upstreams.length} upstreams`); } ``` - #### `resolveUpstreams(squadDir: string): UpstreamResolution | null` - Resolve all upstream sources declared in `upstream.json`. - For each upstream: - **local**: reads directly from the source's `.squad/` - **git**: reads from `.squad/_upstream_repos/{name}/` (must be cloned first) - **export**: reads from the JSON file - **Returns:** `null` if no `upstream.json` exists. If a source can't be reached, that upstream is included with empty content (no error thrown). - **Example:** ```typescript import { resolveUpstreams } from '@bradygaster/squad-sdk'; - const resolution = resolveUpstreams('.squad'); if (resolution) { for (const upstream of resolution.upstreams) { @@ -292,13 +204,9 @@ if (resolution) { } } ``` - #### `buildInheritedContextBlock(resolution: UpstreamResolution | null): string` - Build a text block summarizing inherited context (for agent prompts). - **Returns:** Empty string if no resolution or upstreams. - **Example output:** ``` INHERITED CONTEXT: @@ -306,15 +214,10 @@ INHERITED CONTEXT: shared: skills (5), routing ✓ snapshot: (empty) ``` - **Usage:** The coordinator includes this in agent spawn prompts to signal what context is available. - #### `buildSessionDisplay(resolution: UpstreamResolution | null): string` - Build a user-facing display for session start greeting. - **Returns:** Empty string if no resolution or upstreams. - **Example output:** ``` 📡 Inherited context: @@ -322,17 +225,11 @@ Build a user-facing display for session start greeting. shared (local) — 5 skills, routing ⚠️ snapshot (export) — source not reachable ``` - **Usage:** Shown in the session greeting to confirm what upstreams are available. - ## Use cases - ### Shared practices across teams - **Problem:** Multiple teams need consistent agent definitions, decisions, and casting policy without duplicating configuration. - **Solution:** Create a central Squad repo (platform-squad) with shared context. Product teams add it as an upstream. - ```bash # In platform-squad repo .squad/ @@ -341,25 +238,17 @@ Build a user-facing display for session start greeting. skills/ platform-engineer/SKILL.md backend-engineer/SKILL.md - # In product-a repo squad upstream add https://github.com/acme/platform-squad.git --name platform --ref main ``` - **Outcome:** Platform team updates practices once. All product teams inherit changes at next `squad upstream sync`. Product teams can layer their own skills or override decisions as needed. - **Also works for:** - Open-source frameworks with community plugins - Consultancy methodology across client projects - --- - ### Domain consistency across services - **Problem:** Multiple microservices share a domain model (user, order, payment). You need a single source of truth for how agents work with that model. - **Solution:** Create a shared-domain repo with domain-specific skills and decisions. Each service adds it as an upstream. - ```bash # In shared-domain repo .squad/ @@ -368,21 +257,14 @@ squad upstream add https://github.com/acme/platform-squad.git --name platform -- database-engineer/SKILL.md decisions.md routing.md - # In user-service, order-service, payment-service repos squad upstream add https://github.com/acme/shared-domain.git --name domain ``` - **Outcome:** All agents across services understand the domain model. Domain conventions change once; each service pulls independently. Services stay decoupled with consistency. - --- - ### Multi-team scaling patterns - **Problem:** Post-acquisition, migration, or enterprise modernization requires coordinating practices across teams with different histories. - **Solution:** Create a unified practices or playbook repo. All teams add it as an upstream. - ```bash # In acme-unified-practices repo (post-acquisition example) .squad/ @@ -391,19 +273,14 @@ squad upstream add https://github.com/acme/shared-domain.git --name domain skills/ acme-engineer/SKILL.md acquired-engineer/SKILL.md - # In both original and acquired product repos squad upstream add https://github.com/acme/acme-unified-practices.git --name unified ``` - **Outcome:** Teams work independently while culturally aligned. Agents understand both traditions. Gradual convergence without painful rewrites. - **Also works for:** - Monolith-to-microservices modernization (playbook defines architecture patterns) - Multi-geo teams converging on shared standards - ## Next Steps - - **Read more:** See `docs/guide/casting.md` for how inherited casting policy shapes agent behavior - **Set up**: Run `squad upstream add ` to add your first upstream - **Share:** Export your Squad config with `squad export-config` for others to inherit diff --git a/docs/src/content/docs/features/vscode.md b/docs/src/content/docs/features/vscode.md index b928e854b..7e3288af6 100644 --- a/docs/src/content/docs/features/vscode.md +++ b/docs/src/content/docs/features/vscode.md @@ -1,148 +1,83 @@ # Squad in VS Code - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Squad is fully supported in VS Code (v0.4.0+). Your team runs identically to the CLI, with the same `.squad/` state, same agents, same decisions — but with VS Code-specific tooling and constraints. - This guide covers what's different, what's the same, and when to use CLI vs VS Code. - --- - ## Getting Started - ### Prerequisites - - **VS Code** — Latest version - **GitHub Copilot extension** — `GitHub.copilot` (installed, authenticated) - **Workspace trust** — Your workspace must be trusted (VS Code security) - **Node.js 20+ (LTS)** — If running CLI to initialize Squad - **Squad installed** — Either in the repo already (from CLI), or initialized fresh via agent selection - ### Initial Setup - **Option A: Initialize with CLI (recommended)** - ```bash npm install -g @bradygaster/squad-cli ``` - Creates `.github/agents/squad.agent.md` and `.squad/templates/`. Then open VS Code and select **Squad** from the agent picker. - **Option B: Fresh in VS Code** - Open Copilot in VS Code, select **Squad** from `/agents`. Squad detects it's running in VS Code and bootstraps normally. The `.squad/` directory is created on first run. - --- - ## How It Works - Squad detects VS Code automatically and adapts its spawning mechanism: - - **In CLI:** Uses `task` tool with full control (model selection, agent type, background mode) - **In VS Code:** Uses `runSubagent` for **parallel synchronous execution** - When you assign work to an agent, the coordinator spawns that agent as a sub-agent in VS Code. Multiple sub-agents spawn in **the same turn** run in **parallel**. Each completes, then you get all results at once — no intermediate "launch table" feedback like CLI shows. - --- - ## What's Different from CLI - ### No Per-Spawn Model Selection - VS Code accepts the session model (your Copilot model picker). No per-spawn dynamic selection. Cost optimization deferred — use Haiku via model picker for cheaper runs. - ### Sub-Agents Run Sync (But Parallel) - Agents launch in the same turn and run in parallel, but block as a group. Results arrive all at once — no launch table or `read_agent` polling. - ### SQL Tool Not Available - SQL unavailable in VS Code agents. Workflows needing SQL should live in CLI, or use file-based state (JSON in `.squad/state/`). - ### File Writes May Prompt for Approval - VS Code security feature: approve file modifications once with "Always allow in this workspace". - --- - ## What's the Same - ### Same `.squad/` State - Initialize in CLI, use in VS Code, or vice versa. Team roster, decisions, histories are identical across both. - ### Same Team, Same Skills - Charters, histories, agent roles persist. Decisions made in CLI are visible in VS Code. - ### Parallel Execution Works - Multiple agents in one turn → all run in parallel. Equivalent throughput to CLI background mode. - ### Full File Access (Workspace-Scoped) - Read/write your entire workspace and `.squad/` directory. Cannot reach outside workspace. - ### MCP Tools Inherited - If workspace has MCP servers configured, sub-agents inherit them (GitHub MCP, semantic search, terminal). - --- - ## Tips - Use single-root workspaces (multi-root has path resolution bugs). - Accept file modification approval once — subsequent writes are automatic. - For initial setup, heavy parallel work (5+ agents), SQL workflows, or cost optimization (per-spawn model selection) → use CLI. - Check the model picker at top of chat if agents seem slow or expensive — switch to Haiku for cost savings. - --- - ## Known Limitations - - **JetBrains IDEs** — Untested. Agent spawning mechanism undocumented. - **GitHub.com (web)** — Untested. Copilot Chat on GitHub.com doesn't support Squad. - **Custom agent model selection** — Phase 2 future feature. - See [Getting Started](../get-started/first-session.md) for your first VS Code session. - --- - ## Extension Developer Guide - If you're building a VS Code extension that integrates with Squad, follow these patterns. - ### Detect Client Mode - ```typescript const isVSCodeMode = process.env.SQUAD_CLIENT === 'vscode'; - if (!isVSCodeMode) { console.warn('SquadUI should only run in VS Code'); return; } ``` - ### Import SDK Safely - **DO:** Import specific types and functions - ```typescript import type { CastMember, AgentCharter } from '@bradygaster/squad-sdk'; import { loadConfig, resolveSquad } from '@bradygaster/squad-sdk'; ``` - **DON'T:** Import the CLI entry point — this will call `process.exit()` and crash your extension. - ### Load Configuration - ```typescript import { loadConfig, resolveSquad } from '@bradygaster/squad-sdk'; - try { const squadPath = resolveSquad(workspaceRoot); const config = await loadConfig(squadPath); @@ -152,32 +87,23 @@ try { return; } ``` - ### Spawn Agents - ```typescript import { SquadCoordinator } from '@bradygaster/squad-sdk'; - const coordinator = new SquadCoordinator({ teamRoot: squadPath }); await coordinator.initialize(); - const decision = await coordinator.route('refactor this function'); await coordinator.execute(decision, 'refactor this function'); ``` - ### Stream Responses - ```typescript import { startStreaming } from '@bradygaster/squad-sdk'; - const stream = await startStreaming(agentResponse); for await (const chunk of stream) { vscodePanel.append(chunk); } ``` - ### Handle Errors Gracefully - ```typescript try { const result = await coordinator.route(userTask); @@ -185,14 +111,10 @@ try { vscode.window.showErrorMessage(`Squad error: ${err.message}`); } ``` - Never call `process.exit()` in an extension — it crashes VS Code. - ### Pass Editor Context - ```typescript const editor = vscode.window.activeTextEditor; - const decision = await coordinator.route(userTask, { fileContent: editor.document.getText(), fileName: editor.document.fileName, @@ -200,13 +122,10 @@ const decision = await coordinator.route(userTask, { language: editor.document.languageId, }); ``` - --- - ## See Also - - [Getting Started](../get-started/installation.md) — Installation and setup guide - [Parallel Execution](parallel-execution.md) — How Squadron fan-outs agents - [Model Selection](model-selection.md) — Cost-first routing strategy -- [Interactive Shell](../guide/shell.md) — Shell commands and features +- [CLI Reference](../reference/cli.md) — Setup, operations, and remote access commands - [SDK API Reference](../reference/api-reference.md) — Full SDK type and function reference diff --git a/docs/src/content/docs/features/worktrees.md b/docs/src/content/docs/features/worktrees.md index 2d67cb8bc..1cd73cc45 100644 --- a/docs/src/content/docs/features/worktrees.md +++ b/docs/src/content/docs/features/worktrees.md @@ -1,140 +1,99 @@ # Git Worktree Awareness - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this to enable branch-specific state:** ``` Use worktree-local mode — I want each branch to have its own team state ``` - **Try this to share state across branches:** ``` Share the team across all worktrees — use main-checkout mode ``` - Squad supports git worktrees with two strategies: **worktree-local** (each worktree has its own `.squad/` state) and **main-checkout** (shared state across all worktrees). - --- - ## What Are Worktrees? - ## What Are Worktrees? - Git worktrees let you check out multiple branches simultaneously: - ```bash git worktree add ../project-feature-a feature-a git worktree add ../project-feature-b feature-b ``` - Now you have: - `project/` (main branch) - `project-feature-a/` (feature-a branch) - `project-feature-b/` (feature-b branch) - All share the same `.git/` database but have separate working directories. - ## Worktree Strategies - ### 1. Worktree-Local (Independent State) - Each worktree has its own `.squad/` directory. Agents in one worktree don't see state from another. - **When to use:** - Multiple features in parallel with **different teams** - Experimental branches where you want isolated Squad config - Different team compositions per worktree (e.g., frontend-only team in one, backend-only in another) - **Structure:** ``` project/ ├── .git/ └── .squad/ # Main worktree team - project-feature-a/ ├── .git -> ../project/.git/ └── .squad/ # Feature A team (independent) - project-feature-b/ ├── .git -> ../project/.git/ └── .squad/ # Feature B team (independent) ``` - **Setup:** ```bash cd project-feature-a # Initialize Squad in this worktree gh copilot "Initialize Squad for this worktree" ``` - ### 2. Main-Checkout (Shared State) - All worktrees share the `.squad/` directory from the main checkout. Agents across worktrees see the same team, decisions, and routing rules. - **When to use:** - Same team working on multiple branches - Coordinated work where agents need shared context - Parallel feature development by the same squad - **Structure:** ``` project/ ├── .git/ └── .squad/ # Shared by all worktrees - project-feature-a/ ├── .git -> ../project/.git/ └── .squad -> ../project/.squad/ # Symlink - project-feature-b/ ├── .git -> ../project/.git/ └── .squad -> ../project/.squad/ # Symlink ``` - **Setup:** ```bash cd project-feature-a ln -s ../project/.squad .squad ``` - Or tell Squad: `"Use the main worktree's team"` — Squad creates the symlink automatically. - ## Coordinator Team Root Resolution - When Squad starts in a worktree, the coordinator resolves team root: - 1. **Check for `.squad/` in current directory** — If exists and is not a symlink, use worktree-local strategy. 2. **Check if `.squad/` is a symlink** — If yes, follow symlink to main checkout, use main-checkout strategy. 3. **Scan parent worktrees** — If no `.squad/` found, search `../` for main worktree with `.squad/`. 4. **Prompt for strategy** — If ambiguous, ask: "Use worktree-local or main-checkout?" - ## Merge Driver for Append-Only Files - Squad uses `merge=union` for append-only log files to avoid conflicts across worktrees: - **.gitattributes:** ``` .squad/log/* merge=union .squad/orchestration-log/* merge=union .squad/decisions/inbox/* merge=union ``` - This ensures log entries from different worktrees don't conflict when merged back to main. - ## Worktree-Aware Commands - When using main-checkout strategy: - | Command | Behavior | |---------|----------| | `"Show team roster"` | Reads shared `team.md` from main worktree | | `"Add a directive"` | Writes to shared `decisions/inbox/` in main worktree | | `"Who's working on issue #42?"` | Checks orchestration log in main worktree (sees all agents across worktrees) | | `"Initialize Squad"` | Prompts: "Use main worktree's team or create new?" | - ## When to Use Which Strategy - | Scenario | Strategy | Reason | |----------|----------|--------| | **Parallel features, same team** | Main-checkout | Shared context, coordinated work | @@ -142,53 +101,39 @@ When using main-checkout strategy: | **Hotfix branch + feature branch** | Main-checkout | Same squad, need shared decisions | | **Multiple teams in same repo** | Worktree-local | Different roles, different directives | | **Solo dev, multiple branches** | Main-checkout | No need for duplicate state | - ## Switching Strategies - You can convert between strategies: - ### Worktree-Local → Main-Checkout - ```bash cd project-feature-a rm -rf .squad ln -s ../project/.squad .squad ``` - Or: `"Convert this worktree to use main team"` - ### Main-Checkout → Worktree-Local - ```bash cd project-feature-a rm .squad # Remove symlink cp -r ../project/.squad .squad # Copy state ``` - Or: `"Give this worktree its own Squad team"` - ## Sample Prompts - ``` Initialize Squad in this worktree with a separate team ``` Creates worktree-local `.squad/` directory. Team is independent from main worktree. - ``` Use the main worktree's Squad team ``` Creates symlink to main worktree's `.squad/`. All state is shared. - ``` Which worktrees have active Squad teams? ``` Scans all worktrees linked to this repository, reports which have `.squad/` directories. - ``` Show me the team roster for the main worktree ``` Resolves main worktree path, reads `team.md` from there (useful when in a feature worktree). - ``` Convert this worktree to use the main team ``` diff --git a/docs/src/content/docs/get-started/choose-your-interface.md b/docs/src/content/docs/get-started/choose-your-interface.md index a0c1d9a07..d6889efc3 100644 --- a/docs/src/content/docs/get-started/choose-your-interface.md +++ b/docs/src/content/docs/get-started/choose-your-interface.md @@ -1,178 +1,110 @@ -# Choose your interface - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -Squad works across multiple interfaces. Pick the one that fits your workflow. - ---- - -## Try this: - -```bash -# Day-to-day work with your squad -copilot --agent squad - -# Setup and diagnostics -squad init -squad doctor -``` - ---- - -## What are the ways to use Squad? - -Squad runs in multiple modes and across multiple platforms: - -### GitHub Copilot CLI (`copilot` command) - -The conversational terminal interface. Powered by the GitHub Copilot CLI, this is the recommended way to work with Squad day-to-day. - -```bash -copilot --agent squad -``` - -Reads `.squad/` and uses `squad.agent.md` to coordinate your team. Full feature set — sub-agent spawning, per-spawn model selection, background execution, SQL tools, parallel fan-out. - -### VS Code (GitHub Copilot in the editor) - -Squad works identically in VS Code through GitHub Copilot. Same `.squad/` directory, same agents, same decisions. Full file access, parallel execution, MCP tool inheritance. See [Squad in VS Code](../features/vscode.md) for details. - -### Squad CLI (`squad` command) - -The Squad CLI provides setup, diagnostics, and automation commands. Not conversational — use this for installation, validation, and operational tasks. - -```bash -# Setup -squad init - -# Validation -squad doctor - -# Monitoring -squad watch - -# Observability -squad aspire -``` - -See [CLI Reference](../reference/cli.md) for all commands. - -### Interactive shell (`squad start` / `squad shell`) - -> ⚠️ **Deprecated:** The interactive shell is no longer recommended. Use [GitHub Copilot CLI](https://docs.github.com/en/copilot/github-copilot-in-the-cli) instead for a richer agent experience. -> -> ```bash -> copilot --agent squad -> ``` - -REPL mode for conversational interaction directly via the Squad CLI. Enter `squad` with no arguments to start a persistent shell session. - -### SDK (`@bradygaster/squad-sdk`) - -Programmatic access for building tools on top of Squad. Typed APIs, routing config, agent lifecycle hooks. - -```bash -npm install @bradygaster/squad-sdk -``` - -```typescript -import { resolveSquad, loadConfig, SquadCoordinator } from '@bradygaster/squad-sdk'; -``` - -See [SDK Reference](../reference/sdk.md) for the complete API. - -### Copilot Coding Agent (`@copilot`) - -Autonomous GitHub bot that picks up labeled issues and opens draft PRs. Works across your entire organization without human intervention. Issue gets labeled → agent picks it up → PR gets opened → human reviews. - -See [Copilot Coding Agent](../features/copilot-coding-agent.md) for setup. - ---- - -## Which should I use? - -| You want to... | Use | Why | -|----------------|-----|-----| -| **Work with your squad day-to-day** | **GitHub Copilot CLI** or **VS Code** | Conversational interface, full agent spawning, parallel execution. Most natural way to collaborate with your team. | -| **Set up Squad in a new repo** | **Squad CLI** (`squad init`) | One command initializes `.squad/` directory and all configuration. | -| **Check if Squad is working** | **Squad CLI** (`squad doctor`) | Validates directory structure, agents, configuration integrity. | -| **Monitor work 24/7** | **Squad CLI** (`squad watch`) | Persistent polling for new issues, auto-triage, agent assignment. | -| **View OpenTelemetry traces** | **Squad CLI** (`squad aspire`) | Launches Aspire dashboard for observability. | -| **Process issues autonomously** | **Copilot Coding Agent** | GitHub Actions workflow watches for labeled issues and dispatches `@copilot`. | -| **Build tools on top of Squad** | **SDK** | Typed APIs, configuration loading, agent lifecycle hooks. | - ---- - -## Feature availability matrix - -Not every feature works everywhere. Here's what's available where: - -| Feature | GitHub Copilot CLI | VS Code | Squad CLI | Interactive shell | SDK | -|---------|:------------------:|:-------:|:---------:|:--------:|:---:| -| Agent spawning | ✅ | ✅ | ✅ | ⚠️ (deprecated) | ✅ | -| Ralph / work monitoring | ✅ | ✅ | ✅ (`squad watch`) | ❌ | ✅ | -| Per-spawn model selection | ✅ | ⚠️ (session model only) | ✅ | ❌ | ✅ | -| Background execution | ✅ | ⚠️ (parallel sync) | ✅ | ❌ | ✅ | -| SQL tool | ✅ | ❌ | ✅ | ❌ | ✅ | -| Aspire dashboard | ❌ | ❌ | ✅ | ❌ | ❌ | -| `squad doctor` diagnostics | ❌ | ❌ | ✅ | ❌ | ✅ | -| Issue assignment to `@copilot` | ❌ | ❌ | ✅ (setup) | ❌ | ❌ | - -**Legend:** -- ✅ Fully supported -- ⚠️ Limited or constrained -- ❌ Not available - -For a detailed breakdown of VS Code constraints and CLI parity, see [Client Compatibility Matrix](../scenarios/client-compatibility.md). - ---- - -## Common workflows - -### "I use GitHub Copilot CLI for everything" - -```bash -# Terminal 1: Work with Squad -copilot --agent squad - -# Let Squad call `squad` commands when needed (doctor, watch, aspire) -``` - -This is the recommended workflow. The CLI automatically invokes Squad CLI commands when needed. - -### "I run squad watch in one terminal and use GitHub Copilot CLI in another" - -```bash -# Terminal 1: Monitoring (persistent) -squad watch --interval 10 - -# Terminal 2: Work with Squad -copilot --agent squad -``` - -Keep Ralph monitoring issues in the background while you work conversationally. - -### "I use VS Code with Copilot for coding and Squad CLI for setup" - -```bash -# One-time setup -squad init -squad doctor - -# Open VS Code, select Squad from agent picker -# Same .squad/ directory, same team -``` - -Initialize with CLI, work in VS Code. - ---- - -## See also - -- [Installation](installation.md) — Install Squad CLI, SDK, or use in VS Code -- [First Session](first-session.md) — Get started with your first Squad conversation -- [Client Compatibility Matrix](../scenarios/client-compatibility.md) — Full feature comparison across platforms -- [CLI Reference](../reference/cli.md) — All Squad CLI commands -- [Squad in VS Code](../features/vscode.md) — VS Code-specific guidance -- [SDK Reference](../reference/sdk.md) — Programmatic API +# Choose your interface +Squad works across multiple interfaces. Pick the one that fits your workflow. +--- +## Try this: +```bash +# Day-to-day work with your squad +copilot --agent squad +# Setup and diagnostics +squad init +squad doctor +``` +--- +## What are the ways to use Squad? +Squad runs in multiple modes and across multiple platforms: +### GitHub Copilot CLI (`copilot` command) +The conversational terminal interface. Powered by the GitHub Copilot CLI, this is the recommended way to work with Squad day-to-day. +```bash +copilot --agent squad +``` +Reads `.squad/` and uses `squad.agent.md` to coordinate your team. Full feature set — sub-agent spawning, per-spawn model selection, background execution, SQL tools, parallel fan-out. +### VS Code (GitHub Copilot in the editor) +Squad works identically in VS Code through GitHub Copilot. Same `.squad/` directory, same agents, same decisions. Full file access, parallel execution, MCP tool inheritance. See [Squad in VS Code](../features/vscode.md) for details. +### Squad CLI (`squad` command) +The Squad CLI provides setup, diagnostics, and automation commands. Not conversational — use this for installation, validation, and operational tasks. +```bash +# Setup +squad init +# Validation +squad doctor +# Monitoring +squad watch +# Observability +squad aspire +``` +See [CLI Reference](../reference/cli.md) for all commands. +### SDK (`@bradygaster/squad-sdk`) +Programmatic access for building tools on top of Squad. Typed APIs, routing config, agent lifecycle hooks. +```bash +npm install @bradygaster/squad-sdk +``` +```typescript +import { resolveSquad, loadConfig, SquadCoordinator } from '@bradygaster/squad-sdk'; +``` +See [SDK Reference](../reference/sdk.md) for the complete API. +### Copilot Coding Agent (`@copilot`) +Background GitHub automation that picks up labeled issues and opens draft PRs. Works across your organization with human-defined guardrails. Issue gets labeled → agent picks it up → PR gets opened → human reviews. +See [Copilot Coding Agent](../features/copilot-coding-agent.md) for setup. +--- +## Which should I use? +| You want to... | Use | Why | +|----------------|-----|-----| +| **Work with your squad day-to-day** | **GitHub Copilot CLI** or **VS Code** | Conversational interface, full agent spawning, parallel execution. Most natural way to collaborate with your team. | +| **Set up Squad in a new repo** | **Squad CLI** (`squad init`) | One command initializes `.squad/` directory and all configuration. | +| **Check if Squad is working** | **Squad CLI** (`squad doctor`) | Validates directory structure, agents, configuration integrity. | +| **Monitor work 24/7** | **Squad CLI** (`squad watch`) | Persistent polling for new issues, auto-triage, agent assignment. | +| **View OpenTelemetry traces** | **Squad CLI** (`squad aspire`) | Launches Aspire dashboard for observability. | +| **Process approved issues in the background** | **Copilot Coding Agent** | GitHub Actions workflow watches for labeled issues and dispatches `@copilot`. | +| **Build tools on top of Squad** | **SDK** | Typed APIs, configuration loading, agent lifecycle hooks. | +--- +## Feature availability matrix +Not every feature works everywhere. Here's what's available where: +| Feature | GitHub Copilot CLI | VS Code | Squad CLI | SDK | +|---------|:------------------:|:-------:|:---------:|:---:| +| Agent spawning | ✅ | ✅ | ✅ | ✅ | +| Ralph / work monitoring | ✅ | ✅ | ✅ (`squad watch`) | ✅ | +| Per-spawn model selection | ✅ | ⚠️ (session model only) | ✅ | ✅ | +| Background execution | ✅ | ⚠️ (parallel sync) | ✅ | ✅ | +| SQL tool | ✅ | ❌ | ✅ | ✅ | +| Aspire dashboard | ❌ | ❌ | ✅ | ❌ | +| `squad doctor` diagnostics | ❌ | ❌ | ✅ | ✅ | +| Issue assignment to `@copilot` | ❌ | ❌ | ✅ (setup) | ❌ | +**Legend:** +- ✅ Fully supported +- ⚠️ Limited or constrained +- ❌ Not available +For a detailed breakdown of VS Code constraints and CLI parity, see [Client Compatibility Matrix](../scenarios/client-compatibility.md). +--- +## Common workflows +### "I use GitHub Copilot CLI for everything" +```bash +# Terminal 1: Work with Squad +copilot --agent squad +# Let Squad call `squad` commands when needed (doctor, watch, aspire) +``` +This is the recommended workflow. The CLI automatically invokes Squad CLI commands when needed. +### "I run squad watch in one terminal and use GitHub Copilot CLI in another" +```bash +# Terminal 1: Monitoring (persistent) +squad watch --interval 10 +# Terminal 2: Work with Squad +copilot --agent squad +``` +Keep Ralph monitoring issues in the background while you work conversationally. +### "I use VS Code with Copilot for coding and Squad CLI for setup" +```bash +# One-time setup +squad init +squad doctor +# Open VS Code, select Squad from agent picker +# Same .squad/ directory, same team +``` +Initialize with CLI, work in VS Code. +--- +## See also +- [Installation](installation.md) — Install Squad CLI, SDK, or use in VS Code +- [First Session](first-session.md) — Get started with your first Squad conversation +- [Client Compatibility Matrix](../scenarios/client-compatibility.md) — Full feature comparison across platforms +- [CLI Reference](../reference/cli.md) — All Squad CLI commands +- [Squad in VS Code](../features/vscode.md) — VS Code-specific guidance +- [SDK Reference](../reference/sdk.md) — Programmatic API diff --git a/docs/src/content/docs/get-started/choosing-your-path.md b/docs/src/content/docs/get-started/choosing-your-path.md index 16eacec5e..53f87dbf6 100644 --- a/docs/src/content/docs/get-started/choosing-your-path.md +++ b/docs/src/content/docs/get-started/choosing-your-path.md @@ -1,82 +1,52 @@ -# Choose your path - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -CLI, Copilot agent, or SDK? Pick the right mode for your workflow. - ---- - -## Three modes - -### CLI mode - -Install Squad globally or per-project, then use terminal commands to initialize, route work, and manage your team. - -```bash -npm install -g @bradygaster/squad-cli -squad init -squad status -squad watch -``` - -**Use for:** Terminal workflows, automation scripts, CI/CD integration. - ---- - -### Copilot agent mode - -Talk to Squad in GitHub Copilot CLI or VS Code. Squad is built-in as an agent. Your `.squad/` directory works identically to CLI mode. - -```bash -copilot -> /agent Squad - -Squad: Hey Brady, what are you building? -``` - -**Use for:** Conversational workflows, exploratory work, VS Code users. - ---- - -### SDK mode - -Write TypeScript code that spawns agents, routes work, and coordinates teams programmatically. Full access to Squad's internals. - -```bash -npm install @bradygaster/squad-sdk -``` - -```typescript -import { Coordinator } from '@bradygaster/squad-sdk'; - -const coordinator = new Coordinator(); -const result = await coordinator.route('Build a login page'); -``` - -**Use for:** Building tools on Squad, custom integrations, advanced automation. - ---- - -## Decision table - -| **Your goal** | **Use** | -|---------------|---------| -| Try Squad quickly | **Copilot agent** — no install | -| Work in the terminal | **CLI** | -| Work in VS Code | **Copilot agent** | -| Automate repetitive tasks | **CLI** or **SDK** | -| Build custom tooling | **SDK** | -| CI/CD integration | **CLI** or **SDK** | - ---- - -## Can I use multiple modes? - -Yes. Your `.squad/` directory is the source of truth. CLI, Copilot agent, and SDK all read and write the same files. You can switch between modes anytime. - -Example workflow: -1. Use **Copilot agent** to form your team and do exploratory work -2. Use **CLI** (`squad watch`) to monitor issues in the background -3. Use **SDK** to build a custom deployment script that spawns agents - -All three modes share the same memory and decisions. +# Choose your path +CLI, Copilot agent, or SDK? Pick the right mode for your workflow. +--- +## Three modes +### CLI mode +Install Squad globally or per-project, then use terminal commands to initialize, route work, and manage your team. +```bash +npm install -g @bradygaster/squad-cli +squad init +squad status +squad watch +``` +**Use for:** Terminal workflows, automation scripts, CI/CD integration. +--- +### Copilot agent mode +Talk to Squad in GitHub Copilot CLI or VS Code. Squad is built-in as an agent. Your `.squad/` directory works identically to CLI mode. +```bash +copilot +> /agent Squad +Squad: Hey Brady, what are you building? +``` +**Use for:** Conversational workflows, exploratory work, VS Code users. +--- +### SDK mode +Write TypeScript code that spawns agents, routes work, and coordinates teams programmatically. Full access to Squad's internals. +```bash +npm install @bradygaster/squad-sdk +``` +```typescript +import { Coordinator } from '@bradygaster/squad-sdk'; +const coordinator = new Coordinator(); +const result = await coordinator.route('Build a login page'); +``` +**Use for:** Building tools on Squad, custom integrations, advanced automation. +--- +## Decision table +| **Your goal** | **Use** | +|---------------|---------| +| Try Squad quickly | **Copilot agent** — no install | +| Work in the terminal | **CLI** | +| Work in VS Code | **Copilot agent** | +| Automate repetitive tasks | **CLI** or **SDK** | +| Build custom tooling | **SDK** | +| CI/CD integration | **CLI** or **SDK** | +--- +## Can I use multiple modes? +Yes. Your `.squad/` directory is the source of truth. CLI, Copilot agent, and SDK all read and write the same files. You can switch between modes anytime. +Example workflow: +1. Use **Copilot agent** to form your team and do exploratory work +2. Use **CLI** (`squad watch`) to monitor issues in the background +3. Use **SDK** to build a custom deployment script that spawns agents +All three modes share the same memory and decisions. diff --git a/docs/src/content/docs/get-started/first-session.md b/docs/src/content/docs/get-started/first-session.md index 6c62bbebc..016d80566 100644 --- a/docs/src/content/docs/get-started/first-session.md +++ b/docs/src/content/docs/get-started/first-session.md @@ -1,97 +1,62 @@ # Your First Session - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - A step-by-step walkthrough from install to parallel fan-out. Follow along in your terminal. - --- - ## Try this: - ```bash mkdir my-app && cd my-app && git init squad ``` - Then tell your team what you're building. - --- - ## 1. Install Squad - Start with a git repo — new or existing: - ```bash mkdir my-app && cd my-app git init squad ``` - You'll see: - ``` ✅ Squad installed. .github/agents/squad.agent.md — coordinator agent .squad/templates/ — 11 template files - Open GitHub Copilot and select Squad from the agent list. ``` - --- - ## 2. Open Copilot — Your Team Forms - Start the Copilot CLI: - ```bash copilot ``` - Select **Squad** from the `/agent` list (CLI) or `/agents` (VS Code). Squad greets you by name: - ``` Hey Brady, what are you building? ``` - Describe your project: - ``` > I'm building a recipe sharing app with React and Node.js. Users can > post recipes, search by ingredient, and save favorites. ``` - Squad proposes a team. The coordinator analyzes your description and suggests specialists: - ``` Here's your team: - 🏗️ Hicks — Lead Scope, decisions, code review ⚛️ Ripley — Frontend Dev React, UI, components 🔧 Dallas — Backend Dev Node.js, APIs, database 🧪 Lambert — Tester Tests, quality, edge cases 📋 Scribe — (silent) Memory, decisions, session logs - **Scribe** and **Ralph** are always on every roster — you don't manage them. - Look right? Say yes, add someone, or change a role. (Or just give me a task to start!) ``` - --- - ## 3. Confirm and Give Your First Task - Say "yes" or skip straight to a task (which is implicit confirmation): - ``` > Yes. Dallas, set up the Express server with basic routing. ``` - Squad creates the `.squad/` directory structure — team roster, routing rules, casting state, ceremony config, agent charters, and histories — all seeded with your project context. Each agent spawns to do their work. - ### What's inside .squad/? - | File/Directory | Purpose | |---|---| | `team.md` | Team roster, roles, and member info | @@ -102,32 +67,21 @@ Squad creates the `.squad/` directory structure — team roster, routing rules, | `casting/` | Team formation history and casting state | | `skills/` | Reusable capabilities agents can learn | | `templates/` | Format reference files for docs, PRDs, charters | - **You own these files.** Edit them anytime — change roles, add routing rules, fix decisions. Squad reads them before every spawn. - **Commit `.squad/` to version control.** It's your team's brain. Anyone who clones the repo gets the team with all their knowledge. - Then it spawns Dallas: - ``` 🔧 Dallas — setting up Express server with routing - Dallas is on it — building the Express server now. ``` - --- - ## 4. Parallel Fan-Out — The Launch Table - Now give a bigger task to see parallel work: - ``` > Team, build the recipe listing page. We need an API endpoint > that returns recipes and a React component that displays them. ``` - Squad decomposes the work and launches agents simultaneously: - ``` 🏗️ Hicks — reviewing requirements, defining API contract ⚛️ Ripley — building RecipeList component @@ -135,28 +89,20 @@ Squad decomposes the work and launches agents simultaneously: 🧪 Lambert — writing test cases from requirements 📋 Scribe — logging session ``` - All five agents are working at the same time in separate context windows. The tester writes test cases from the requirements *while* the implementation is being built — no waiting. - --- - ## 5. Results Labeled by Agent - When agents finish, you see results tagged with each agent's identity: - ``` 🏗️ Hicks — Defined API contract: GET /api/recipes returns { recipes: [{ id, title, ingredients, prepTime }] }. Decided on cursor-based pagination for large result sets. - ⚛️ Ripley — Built RecipeList component at src/components/RecipeList.tsx. Uses React Query for data fetching. Card layout with title, prep time, and ingredient count. - 🔧 Dallas — Created GET /api/recipes endpoint at src/routes/recipes.ts. Connected to MongoDB. Supports ?cursor= pagination per Hicks' spec. Also added GET /api/recipes/:id for detail view (proactive). - 🧪 Lambert — Wrote 8 test cases in test/recipes.test.ts: 📌 Proactive: Written from requirements while Dallas was building. May need adjustment once implementation is final. @@ -166,138 +112,96 @@ When agents finish, you see results tagged with each agent's identity: - Detail endpoint returns 404 for missing recipe [+4 more] ``` - Behind the scenes, the Scribe merged decisions from each agent into `decisions.md` and logged the session. - --- - ## 6. Ask "Where Are We?" — Instant Status - At any point, check status: - ``` > Where are we? ``` - The coordinator reads recent logs: - ``` Last session: Recipe listing page (15 minutes ago) - Who worked: - Hicks defined the API contract (cursor pagination) - Ripley built RecipeList component - Dallas created recipes endpoint + detail endpoint - Lambert wrote 8 test cases - Key decisions: - Cursor-based pagination for recipe lists - React Query for frontend data fetching - MongoDB as database - 3 decisions in decisions.md, 4 agent history entries updated. ``` - --- - ## 7. Follow-Up — Watch Decisions Accumulate - Give a follow-up task: - ``` > Now add a search feature. Users should be able to search > recipes by ingredient name. ``` - Squad fans out again: - ``` 🔧 Dallas — adding search endpoint with ingredient filtering ⚛️ Ripley — building search UI with ingredient input 🧪 Lambert — writing search test cases 📋 Scribe — logging session ``` - After completion, check decisions: - ``` > Show me the decisions ``` - ``` ### 2025-07-15: Cursor-based pagination for recipe lists **By:** Hicks **What:** All list endpoints use cursor-based pagination **Why:** Scales better than offset pagination for growing datasets - ### 2025-07-15: React Query for data fetching **By:** Ripley **What:** All API calls use React Query with stale-while-revalidate **Why:** Built-in caching, refetching, and loading states - ### 2025-07-15: Full-text search via MongoDB text index **By:** Dallas **What:** Search uses $text index on recipe title and ingredients array **Why:** Native MongoDB capability, no external search service needed ``` - -Every agent reads these decisions before working. As the list grows, the team self-aligns — agents respect each other's architectural choices without you repeating them. - +Every agent reads these decisions before working. As the list grows, the team retains context so you don't need to restate architectural choices. --- - ## 8. Set a Directive - You notice something you want to standardize: - ``` > Always use Zod for API input validation ``` - Squad captures it — no agent spawn needed: - ``` 📌 Captured. All API input validation must use Zod. ``` - This gets written to `decisions.md`. Every agent will see it in their next spawn. - --- - ## 9. Export Your Squad - After a productive session, export your team for portability: - ```bash squad export ``` - ``` ✅ Exported to squad-export.json 5 agents, 3 skills, 6 decisions ``` - This snapshot contains charters, histories, casting state, skills, and decisions. Import into another repo anytime: - ```bash cd ../other-project squad squad import ../my-app/squad-export.json ``` - --- - ## Tips - - **First session is slowest.** Agents have no history yet. After 2–3 sessions, they know your conventions. - **Commit `.squad/`** — your team's brain. Anyone who clones the repo gets the full team. - **Say "team" for big tasks.** The word "team" triggers parallel fan-out across multiple agents. - **Name an agent for focused work.** `"Dallas, fix the login bug"` sends work to one specific agent. - **Directives are sticky.** Once captured, they persist across all future sessions. - --- - ## What to Try Next - - [**Your Team**](../concepts/your-team.md) — How agents form, specialize, and collaborate - [**Memory & Knowledge**](../concepts/memory-and-knowledge.md) — Decisions, skills, and persistent context - [**Existing Repo Scenario**](../scenarios/existing-repo.md) — Bring Squad into a project that's already in flight diff --git a/docs/src/content/docs/get-started/five-minute-start.md b/docs/src/content/docs/get-started/five-minute-start.md index 29667ea95..00be803b7 100644 --- a/docs/src/content/docs/get-started/five-minute-start.md +++ b/docs/src/content/docs/get-started/five-minute-start.md @@ -1,87 +1,52 @@ -# Quick start - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -Your first 5 minutes with Squad. Prove it works before you learn anything. - ---- - -## Prerequisites - -- **Node.js 20+** — Check with `node --version` -- **Git repository** — New or existing - ---- - -## Install - -```bash -npm install --save-dev @bradygaster/squad-cli -``` - -Then initialize: - -```bash -npx squad init -``` - -You'll see: - -``` -✅ Squad installed. - .github/agents/squad.agent.md — coordinator agent - .squad/templates/ — 11 template files - -Open GitHub Copilot and select Squad from the agent list. -``` - ---- - -## Validate - -Check that Squad created your team directory: - -```bash -ls .squad/ -``` - -You should see: `team.md`, `routing.md`, `decisions.md`, `agents/`, and more. - -Confirm Squad is ready: - -```bash -npx squad status -``` - ---- - -## Try it - -Open GitHub Copilot in your terminal or VS Code. Select **Squad** from the agent list (`/agent Squad` in CLI or `/agents` in VS Code). - -Say something simple: - -``` -> I'm building a task management app with React and Node.js. -> Users can create, update, and delete tasks. -``` - -Squad forms your team and responds with agent names and roles. Say yes, or just give your first task: - -``` -> Team, create a basic Express server with a /health endpoint. -``` - -Squad spawns agents and does the work. - ---- - -## What just happened? - -Squad read your description, formed a team of specialists, wrote their charters to `.squad/agents/`, and coordinated parallel work. Check `.squad/decisions.md` to see what they decided. - ---- - -## Next steps - -[**Your first session**](first-session) — Step-by-step walkthrough of parallel work, decisions, and memory. +# Quick start +Your first 5 minutes with Squad. Prove it works before you learn anything. +--- +## Prerequisites +- **Node.js 20+** — Check with `node --version` +- **Git repository** — New or existing +--- +## Install +```bash +npm install --save-dev @bradygaster/squad-cli +``` +Then initialize: +```bash +npx squad init +``` +You'll see: +``` +✅ Squad installed. + .github/agents/squad.agent.md — coordinator agent + .squad/templates/ — 11 template files +Open GitHub Copilot and select Squad from the agent list. +``` +--- +## Validate +Check that Squad created your team directory: +```bash +ls .squad/ +``` +You should see: `team.md`, `routing.md`, `decisions.md`, `agents/`, and more. +Confirm Squad is ready: +```bash +npx squad status +``` +--- +## Try it +Open GitHub Copilot in your terminal or VS Code. Select **Squad** from the agent list (`/agent Squad` in CLI or `/agents` in VS Code). +Say something simple: +``` +> I'm building a task management app with React and Node.js. +> Users can create, update, and delete tasks. +``` +Squad forms your team and responds with agent names and roles. Say yes, or just give your first task: +``` +> Team, create a basic Express server with a /health endpoint. +``` +Squad spawns agents and moves the work forward. +--- +## What just happened? +Squad read your description, formed a team of specialists, wrote their charters to `.squad/agents/`, and coordinated parallel work. Check `.squad/decisions.md` to see what they decided. +--- +## Next steps +[**Your first session**](first-session) — Step-by-step walkthrough of parallel work, decisions, and memory. diff --git a/docs/src/content/docs/get-started/installation.md b/docs/src/content/docs/get-started/installation.md index 3f393ac02..6b30eecdf 100644 --- a/docs/src/content/docs/get-started/installation.md +++ b/docs/src/content/docs/get-started/installation.md @@ -1,148 +1,80 @@ # Installation - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Three ways to get Squad running. Pick the one that fits. - --- - ## Try this: - ```bash npm install -g @bradygaster/squad-cli squad ``` - That's it. You're in. - --- - ## 1. CLI (Recommended) - The CLI is the fastest way to use Squad from any terminal. - ### Global install - ```bash npm install -g @bradygaster/squad-cli ``` - Now use it anywhere: - ```bash squad init squad status squad watch ``` - ### One-off with npx - No install needed — run the latest version directly: - ```bash npx @bradygaster/squad-cli init npx @bradygaster/squad-cli status ``` - ### Verify - ```bash squad --version ``` - ### Update - ```bash npm install -g @bradygaster/squad-cli@latest ``` - --- - ## Which method should I use? - Pick based on what you're doing: - | **You want to...** | **Use** | **Why** | |--------------------|---------|---------| | Try Squad quickly | **CLI** with `npx` | No install needed. Run `npx @bradygaster/squad-cli init` and you're testing it. | | Use Squad across all projects | **CLI** with `--global` | One install. Works everywhere. Run `squad` from any terminal. | | Work inside VS Code | **VS Code** (just open your project) | Already using Copilot? Squad just works. Same `.squad/` directory as CLI. | | Build tools on top of Squad | **SDK** | Typed APIs, routing config, agent lifecycle hooks. Programmatic access to everything. | - Can't decide? → Start with **CLI**. You can always add VS Code or the SDK later. Your `.squad/` directory works identically everywhere. - --- - ## 2. VS Code - Squad works in VS Code through GitHub Copilot. Your `.squad/` directory works identically in both CLI and VS Code — same agents, same decisions, same memory. - > **Tip:** Initialize your team with the CLI (`squad`), then open the project in VS Code to keep working with the same squad. - --- - ## 3. SDK - Building your own tooling on top of Squad? Install the SDK as a project dependency: - ```bash npm install @bradygaster/squad-sdk ``` - Then import what you need: - ```typescript import { defineConfig, loadConfig, resolveSquad } from '@bradygaster/squad-sdk'; ``` - The SDK gives you typed configuration, routing, model selection, and the full agent lifecycle API. See the [SDK Reference](../reference/sdk.md) for details. - --- - -### Personal squad (cross-project) - -Want the same agents across all your projects? - -```bash -squad init --global -``` - -This creates your personal squad directory — a personal team root that any project can inherit from. See [Upstream Inheritance](../features/upstream-inheritance.md) for details. - -**Personal squad location by platform:** - -| Platform | Path | -|----------|------| -| Linux | `~/.config/squad/` | -| macOS | `~/Library/Application Support/squad/` | -| Windows | `%APPDATA%\squad\` | - ---- - ## First-Time Setup - After installing, initialize Squad in your project: - ```bash cd your-project squad init ``` - This creates: - ``` .github/agents/squad.agent.md — coordinator agent .squad/ — team state directory ``` - ### Configuration (optional) - For typed configuration, create a `squad.config.ts` at your project root: - ```typescript import { defineConfig } from '@bradygaster/squad-sdk'; - export default defineConfig({ team: { name: 'my-squad', @@ -151,47 +83,29 @@ export default defineConfig({ }, }); ``` - `defineConfig()` gives you full autocomplete and validation. But you don't need it to get started — Squad works out of the box with sensible defaults. - --- - ## Troubleshooting - ### `squad: command not found` - Your npm global bin isn't in your PATH. Fix: - ```bash # Check if installed npm list -g @bradygaster/squad-cli - # If installed but not found, check PATH: echo $PATH | grep npm # macOS/Linux echo %PATH% | findstr npm # Windows ``` - ### `Cannot find .squad/ directory` - -Run `squad init` in your project root, or `squad init --global` for a personal squad. - +Run `squad init` in your project root to create your Squad files. ### Version mismatch between CLI and SDK - Update both: - ```bash npm install -g @bradygaster/squad-cli@latest npm install @bradygaster/squad-sdk@latest ``` - --- - ## Ready to Learn? - New to Squad? Check out [**Tamir's Squad Skills Workshop**](https://github.com/tamirdresher/squad-skills/tree/main/workshop) for hands-on learning and practical patterns. - --- - ## Next Steps - → [Your First Session](first-session.md) diff --git a/docs/src/content/docs/guide.md b/docs/src/content/docs/guide.md index 01e0f6a87..dc100130c 100644 --- a/docs/src/content/docs/guide.md +++ b/docs/src/content/docs/guide.md @@ -1,119 +1,79 @@ # Squad — Product Guide - ## What is Squad? - -Squad gives you an AI development team through GitHub Copilot. You describe what you're building. Squad proposes a team of specialists — lead, frontend, backend, tester — that live in your repo as files. Each agent runs in its own context window, reads its own knowledge, and writes back what it learned. They persist across sessions, share decisions, and get better the more you use them. - +Squad gives you a human+AI development team through GitHub Copilot. You describe what you're building. Squad proposes a team of specialists — lead, frontend, backend, tester — that live in your repo as files. Each agent runs in its own context window, reads its own knowledge, and writes back what it learned. They persist across sessions, share decisions, and get better the more you use them. It is not a chatbot wearing hats. Each team member is spawned as a real sub-agent with its own tools, its own memory, and its own area of expertise. - --- - ## Which CLI should I use? - **Use GitHub Copilot CLI for day-to-day work.** It's the recommended interface for interacting with your Squad — full agent spawning, model selection, and conversational access to all features. - **Use Squad CLI for setup and operations:** - Initial setup: `squad init` - Build from config: `squad build` - Diagnostics: `squad doctor` -- Interactive shell: `squad shell` — **Deprecated** (use `copilot --agent squad`) - Continuous triage: `squad triage --interval 10` - Watch mode: `squad watch` - Aspire dashboard: `squad aspire` - Export/import: `squad export` and `squad import` - Plugin management: `squad plugin install ` - **Common workflow:** ```bash # Terminal 1: Run continuous triage (Squad CLI) squad triage --interval 10 - # Terminal 2: Work with your team (GitHub Copilot CLI) -gh copilot -> @squad what issues are ready to work? +copilot --agent squad +Team, what issues are ready to work? ``` - Both CLIs read and write the same `.squad/` directory, so state stays synchronized. For more details, see [FAQ: Which CLI should I use?](guide/faq.md#which-cli-should-i-use) and [Client Compatibility Matrix](scenarios/client-compatibility.md). - --- - ## Supported platforms - Squad works across multiple interfaces — GitHub Copilot CLI, VS Code, Squad CLI, SDK, and the Copilot Coding Agent. Pick the one that fits your workflow: - - **GitHub Copilot CLI** — Day-to-day conversational work with your squad (recommended) - **VS Code** — Same experience, editor-integrated - **Squad CLI** — Setup, diagnostics, monitoring (`squad init`, `squad doctor`, `squad watch`) - **SDK** — Build tools on top of Squad with `squad.config.ts` -- **Copilot Coding Agent** — Autonomous issue processing via `@copilot` - +- **Copilot Coding Agent** — Label-driven issue processing via `@copilot` **Multi-platform support:** Squad also works with Azure DevOps (work items, PRs via `az boards`/`az repos`), GitLab Issues, and Microsoft Planner through pluggable platform adapters. See [Enterprise Platforms](features/enterprise-platforms.md) for details. - Not sure which to use? See [Choose your interface](get-started/choose-your-interface.md) for a complete comparison and decision tree. - --- - ## Installation - ```bash npm install -g @bradygaster/squad-cli ``` - **Requirements:** - Node.js 20+ (LTS) - GitHub Copilot (CLI, VS Code, Visual Studio, or Coding Agent) - A git repository (Squad stores team state in `.squad/`) - **`gh` CLI** — required for GitHub Issues, PRs, Ralph, and Project Boards ([install](https://cli.github.com/)) - Running `squad init` creates the `.squad/` directory structure, copies `squad.agent.md` into `.github/agents/`, and installs GitHub Actions workflows into `.github/workflows/`. Your team is created at runtime when you first talk to Squad. - **Note:** When you select Squad from the agent picker, you'll see the version number in the name (e.g., "Squad (v0.8.25)"). This helps you confirm which version is installed. - ### GitHub CLI authentication - Squad uses the `gh` CLI for all GitHub API operations — issues, PRs, labels, project boards, and Ralph's work monitoring. You must authenticate before using any of these features. - **Quick start:** - ```bash gh auth login ``` - Choose **GitHub.com**, **HTTPS**, and authenticate with your browser or a Personal Access Token (PAT Classic). - **Verify it worked:** - ```bash gh auth status ``` - **Additional scopes** — some features require scopes beyond the default: - | Feature | Required scope | Command | |---------|---------------|---------| | Issues, PRs, Ralph | `repo` (included by default) | — | | Project Boards | `project` | `gh auth refresh -s project` | - The `gh auth refresh` command adds scopes to your existing token — it takes about 10 seconds and you only need to do it once. - **Troubleshooting:** - - **"gh: command not found"** — Install the GitHub CLI from https://cli.github.com/ - **"HTTP 401" or "authentication required"** — Run `gh auth login` to re-authenticate - **Project board commands fail** — Run `gh auth refresh -s project` to add the `project` scope - **"Resource not accessible by integration"** — Your token may lack the `repo` scope. Re-authenticate with a PAT Classic that has `repo` and `project` scopes - --- - ## How teams form (init mode) - When you open Copilot and select **Squad** for the first time in a repo, there's no team yet. Squad enters Init Mode: - 1. **Squad identifies you** via `git config user.name` and uses your name in conversation. 2. **You describe your project** — language, stack, what it does. 3. **Squad casts a team** — agents get names from a single fictional universe (e.g., Apollo 13 / NASA Mission Control, The Usual Suspects, Ocean's Eleven). The universe is selected deterministically based on team size, project shape, and what's been used before. Names are persistent identifiers — they don't change the agent's behavior or voice. 4. **Squad proposes the team:** - ``` 🏗️ FLIGHT — Lead Scope, decisions, code review ⚛️ RETRO — Frontend Dev React, UI, components @@ -121,13 +81,9 @@ When you open Copilot and select **Squad** for the first time in a repo, there's 🧪 TELMU — Tester Tests, quality, edge cases 📋 Scribe — (silent) Memory, decisions, session logs ``` - 5. **You confirm** — say "yes", adjust roles, add someone, or just give a task (which counts as implicit yes). - Squad then creates the `.squad/` directory structure with charters, histories, routing rules, casting state, and ceremony config. Each agent's `history.md` is seeded with your project description and tech stack so they have day-1 context. - ### What gets created - ``` .squad/ ├── team.md # Roster — who's on the team @@ -149,49 +105,31 @@ Squad then creates the `.squad/` directory structure with charters, histories, r ├── orchestration-log/ # Per-spawn log entries └── log/ # Session history ``` - **Commit this folder.** Anyone who clones your repo gets the team — with all their accumulated knowledge. - --- - ## Talking to your team (routing) - How you phrase your message determines who works on it. - ### Name an agent directly - ``` > FLIGHT, fix the error handling in the API ``` - Squad spawns FLIGHT specifically. - ### Say "team" for parallel fan-out - ``` > Team, build the login page ``` - Squad spawns multiple agents simultaneously — frontend builds the UI, backend sets up endpoints, tester writes test cases from the spec, all at once. - ### General requests - ``` > Add input validation to the form ``` - Squad checks `routing.md`, picks the best match, and may launch anticipatory agents (e.g., tester writes validation test cases while the implementer builds). - ### Quick questions — no spawn - ``` > What port does the server run on? ``` - Squad answers directly without spawning an agent. - ### Example prompts to try - | You say | What happens | |---------|-------------| | `"RETRO, set up the project structure"` | RETRO (Frontend) scaffolds the project | @@ -200,24 +138,15 @@ Squad answers directly without spawning an agent. | `"Run a retro"` | Lead facilitates a retrospective ceremony | | `"I need a DevOps person"` | A new agent joins, named from the same universe | | `"Always use single quotes in TypeScript"` | Captured as a directive to `decisions.md` | - --- - ## Response modes - Squad automatically picks the right response speed based on your request complexity. Direct answers take seconds, full agent spawns take longer but deliver deeper reasoning and parallel work. You don't control the mode — Squad routes based on what the task needs. - → [Full guide: Response Modes](features/response-modes.md) - --- - ## SDK-first mode - Define your team in TypeScript instead of maintaining markdown files manually. Write a `squad.config.ts` with type-safe builder functions, and `squad build` generates the `.squad/` governance markdown. - ```typescript import { defineSquad, defineTeam, defineAgent, defineRouting } from '@bradygaster/squad-sdk'; - export default defineSquad({ team: defineTeam({ name: 'Core Squad', @@ -238,30 +167,20 @@ export default defineSquad({ }), }); ``` - **Get started:** - ```bash squad init --sdk # New project with SDK config squad migrate --to sdk # Convert existing .squad/ to TypeScript squad build # Generate .squad/ from config squad build --check # Validate in CI without writing ``` - Builder functions: `defineTeam()`, `defineAgent()`, `defineRouting()`, `defineCeremony()`, `defineHooks()`, `defineCasting()`, `defineTelemetry()`, `defineSkill()`, `defineSquad()`. - → [Full guide: SDK-First Mode](sdk-first-mode.md) - --- - ## Casting system - Squad names agents from fictional universes — Apollo 13 / NASA Mission Control (the default), The Usual Suspects, Breaking Bad, Star Trek, and others. The universe is selected deterministically based on team size and project shape. - Casting is **persistent** — once an agent receives a name, it keeps that name across sessions. The casting registry lives in `.squad/casting/registry.json`. You control which universes are available through a policy allowlist and can set per-universe capacity limits. - In SDK-first mode, configure casting with `defineCasting()`: - ```typescript defineCasting({ allowlistUniverses: ['Apollo 13', 'Breaking Bad'], @@ -269,19 +188,12 @@ defineCasting({ capacity: { 'Apollo 13': 8 }, }); ``` - When a universe runs out of names, the overflow strategy determines what happens: `reject` (error), `generic` (use a functional name), or `rotate` (move to the next universe). - --- - ## Skills system - Skills are reusable knowledge patterns that agents load on demand. They live in `.copilot/skills/{name}/SKILL.md` and teach agents how to handle specific tasks — branching workflows, deployment strategies, testing patterns, or domain expertise. - Skills have a confidence lifecycle: `low` → `medium` → `high`, and track their source: `manual` (you wrote it), `observed` (agent saw a pattern), `earned` (validated through use), or `extracted` (imported from another project). - In SDK-first mode, define skills with `defineSkill()`: - ```typescript defineSkill({ name: 'git-workflow', @@ -296,19 +208,12 @@ defineSkill({ `, }); ``` - Skills accumulate as you work. After a few sessions, your team has a knowledge base tailored to your codebase. - → [Full guide: Skills](features/skills.md) - --- - ## Ceremonies - Ceremonies are structured team meetings. Squad ships with two default ceremonies — Design Review (triggers before multi-agent work) and Retrospective (triggers after failures). You can trigger ceremonies manually, create custom ones, or disable them. Configuration lives in `.squad/ceremonies.md`. - In SDK-first mode, define ceremonies with `defineCeremony()`: - ```typescript defineCeremony({ name: 'standup', @@ -318,161 +223,95 @@ defineCeremony({ agenda: 'Yesterday / Today / Blockers', }); ``` - → [Full guide: Ceremonies](features/ceremonies.md#ceremonies) - --- - ## Ralph — work monitor - Ralph triages your issue backlog, assigns work to agents, and keeps the board moving. Activate Ralph when you have open issues, and he reports every 3–5 rounds. - ``` > Ralph, start monitoring ``` - **CLI commands:** - `squad triage` — run a single triage pass - `squad triage --interval 10` — continuous triage every 10 minutes - `squad watch` — Ralph watchdog mode (monitors and auto-restarts) - The `squad-heartbeat` workflow runs Ralph on a schedule — your squad triages issues between sessions. - **Note:** `squad ralph` is a legacy alias. New projects should use `squad triage`. - → [Full guide: Ralph — Work Monitor](features/ralph.md#ralph--work-monitor) - --- - ## Memory system - Squad's memory is layered — personal agent histories, shared team decisions, and reusable skills. Knowledge compounds over sessions. After a few sessions, agents stop asking questions they've already answered. Mature projects carry full architecture knowledge and decision history. - → [Full guide: Memory System](features/memory.md) - --- - ## Plugin marketplace - Extend your squad with community plugins — reusable collections of skills, ceremonies, and directives. - ```bash squad plugin install github/my-org/my-extension squad plugin list squad plugin remove my-extension ``` - Plugins let you add domain expertise (Azure infrastructure patterns), workflow templates (client-delivery processes), or testing ceremonies without modifying Squad core. Build your own and share them. - → [Full guide: Plugins](features/plugins.md) | [Marketplace](features/marketplace.md) - --- - ## SubSquads (streams) - Break large teams into focused SubSquads — smaller groups that work independently on different features or domains. SubSquads maintain their own routing and task queues while sharing the parent squad's decisions and memory. - ```bash squad subsquads ``` - → [Full guide: Streams](features/streams.md) - --- - ## Export and import - Export creates a portable snapshot of your entire team — agents, knowledge, skills. Import brings that snapshot into another repo. Squad handles collision detection and splits imported knowledge into portable learnings and project-specific context automatically. - ```bash squad export --out my-team.json squad import my-team.json squad import my-team.json --force # Archive existing agents first ``` - → [Full guide: Export and Import](features/export-import.md#export--import) - --- - ## GitHub Issues mode - Squad integrates with GitHub Issues for issue-driven development. Connect to a repo, view the backlog, assign issues to agents, and Squad handles branch creation, implementation, PR creation, and review feedback. Agents link work to issues automatically. - → [Full guide: GitHub Issues Mode](features/github-issues.md#github-issues-mode) - --- - ## PRD mode - Paste your product requirements document directly into Squad. The Lead agent decomposes the spec into discrete work items, assigns them to the right agents, and the team works in parallel. Specs become trackable tasks automatically. - → [Full guide: PRD Mode](features/prd-mode.md#prd-mode) - --- - ## Human team members - Not every team member needs to be an AI agent. Add humans to the roster for decisions that require a real person — design sign-off, security review, product approval. Squad pauses when work is routed to a human and reminds you if they haven't responded. - → [Full guide: Human Team Members](features/human-team-members.md#human-team-members) - --- - ## Notifications - Your squad can notify you when they need input — send instant pings to Teams, Discord, iMessage, or any webhook. Agents trigger notifications when they're blocked, need a decision, hit an error, or complete important work. - **Setup is quick:** Configure an MCP notification server (takes 5 minutes), and agents automatically know when to ping you. - See [Notifications Guide](features/notifications.md#quick-start-teams-simplest-path) for platform-specific setup and examples. For MCP configuration details, see [MCP Setup Guide](features/mcp.md#step-by-step-cli-setup). - --- - ## Multi-platform support - Squad works with more than GitHub. Pluggable platform adapters let you use: - - **GitHub** — Issues, PRs, Project Boards (via `gh` CLI) - **Azure DevOps** — Work items, repos, PRs (via `az boards`/`az repos` CLI) - **GitLab** — Issues and merge requests - **Microsoft Planner** — Hybrid work-item tracking (via Microsoft Graph API) - Configure cross-project ADO support in `.squad/config.json` — work items can live in a different org/project than the repo. - → [Full guide: Enterprise Platforms](features/enterprise-platforms.md) | [GitLab Issues](features/gitlab-issues.md) - --- - ## Upgrading - Already have Squad installed? Update to the latest version: - ```bash npm install -g @bradygaster/squad-cli@latest ``` - Run `squad doctor` to validate your setup after upgrading: - ```bash squad doctor ``` - Doctor runs 9 checks — Node.js version, `gh` CLI auth, `.squad/` directory structure, team state, and more. It reports issues with clear fix instructions. - **Migrating from `.ai-team/` to `.squad/`:** - ```bash squad migrate --from ai-team ``` - This renames `.ai-team/` to `.squad/` and updates all internal references. - --- - ## Context budget - Each agent runs in its own context window. Real numbers: - | What | Tokens | % of 200K window | |------|--------|-------------------| | Coordinator (squad.agent.md) | ~13,200 | 6.6% | @@ -480,74 +319,49 @@ Each agent runs in its own context window. Real numbers: | Agent at Week 4 (+ 15 learnings, 8 decisions) | ~3,300 | 1.7% | | Agent at Week 12 (+ 50 learnings, 47 decisions) | ~9,000 | 4.5% | | **Remaining for actual work** | **~187,000** | **93%+** | - The coordinator uses 6.6% of its window. A 12-week veteran agent uses 4.5% — but in **its own window**, not yours. Fan out to 5 agents and you get ~1M tokens of total reasoning capacity across all windows. - --- - ## Known limitations - - **Experimental** — file formats and APIs may change between versions. - **Silent success bug** — approximately 7–10% of background agent spawns complete all their file writes but return no text response. This is a platform-level issue. Squad detects it by checking the filesystem for work product and reports what it finds. Work is not lost. - **Platform latency** — response times depend on the Copilot platform. Complex multi-agent tasks take 40–60 seconds. Simple questions are answered in 2–3 seconds. - **Node 20+** — requires a Node.js LTS release (v20.0.0 or later). - **GitHub Copilot required** — Squad works across Copilot hosts (CLI, VS Code, Visual Studio, Coding Agent). - **First session is the least capable** — agents improve as they accumulate history. Give it a few sessions before judging. - --- - ## Adding and removing team members - ### Adding - ``` > I need a DevOps person ``` - Squad allocates a name from the current universe, generates a charter and history seeded with project context, and adds them to the roster. Immediately productive. - ### Removing - ``` > Remove the designer — we're past that phase ``` - Agents are never deleted. Their charter and history move to `.squad/agents/_alumni/`. Knowledge is preserved. If you need them back later, they remember everything. - --- - ## Reviewer protocol - Agents with review authority can reject work. On rejection, the original author is locked out and a different agent must handle the revision. This prevents the common failure mode where an agent keeps fixing its own work in circles. - → [Full guide: Reviewer Protocol](features/reviewer-protocol.md#reviewer-rejection-protocol) - --- - ## File ownership - Squad maintains a clear ownership model: - | What | Owner | Safe to edit? | |------|-------|--------------| | `.github/agents/squad.agent.md` | Squad (overwritten on upgrade) | No — your changes will be lost | | `.squad/` | You and your team | Yes — this is your team's state | | `squad.config.ts` | You | Yes — your SDK-first config | | Everything else | You | Yes | - --- - ## Quick reference - | Command | What it does | |---------|-------------| | `squad init` | Initialize Squad in the current repo | | `squad init --sdk` | Initialize with SDK-first TypeScript config | -| `squad init --global` | Initialize a personal squad (cross-project) | | `squad build` | Generate `.squad/` from `squad.config.ts` | | `squad build --check` | Validate generated files match disk (for CI) | | `squad doctor` | Run 9 setup validation checks | -| `squad shell` | **Deprecated** — Enter the interactive shell (use `copilot --agent squad`) | | `squad triage` | Run a single triage pass | | `squad triage --interval 10` | Continuous triage every 10 minutes | | `squad watch` | Ralph watchdog mode | diff --git a/docs/src/content/docs/guide/build-autonomous-agent.md b/docs/src/content/docs/guide/build-autonomous-agent.md index 12832df78..4592b97f4 100644 --- a/docs/src/content/docs/guide/build-autonomous-agent.md +++ b/docs/src/content/docs/guide/build-autonomous-agent.md @@ -1,33 +1,19 @@ -# Build an autonomous agent - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -Build a CLI-wrapped autonomous agent pipeline that picks up tasks, coordinates work across teammates, and runs unattended. - +# Build a background agent pipeline +Build a CLI-wrapped background agent pipeline that picks up tasks, coordinates work across teammates, and runs with clear guardrails. **Try this:** Clone the [autonomous-pipeline sample](https://github.com/bradygaster/squad/tree/dev/samples/autonomous-pipeline) and run `npm run dev` to see the pattern in action. - -This guide walks you through the pattern used by production autonomous agents — like a docs agent that monitors a repo for changes and generates documentation without human intervention. - +This guide walks you through the pattern used by production background agents — like a docs agent that monitors a repo for changes and generates documentation with human-defined guardrails. --- - -## What an autonomous agent is - -An autonomous agent is a program that: - +## What a background agent is +A background agent is a program that: - Receives a task (from a queue, a CLI argument, or a cron job) - Routes the task to the right agent based on role and skill match -- Executes the work without waiting for human input +- Executes approved work without waiting for a live chat session - Records decisions and learnings for future runs - Reports results (cost, tokens, timeline) - In Squad, you build this by composing SDK primitives — `CastingEngine`, `CostTracker`, `SkillRegistry`, and `StreamingPipeline` — into a loop that assigns work, collects results, and decides what to do next. - --- - ## Set up the project - Create a new directory and initialize it with the Squad SDK dependency: - ```bash mkdir my-autonomous-agent cd my-autonomous-agent @@ -35,9 +21,7 @@ npm init -y npm install @bradygaster/squad-sdk npm install -D typescript ``` - Create a `tsconfig.json`: - ```json { "compilerOptions": { @@ -53,9 +37,7 @@ Create a `tsconfig.json`: "include": ["*.ts"] } ``` - Set `"type": "module"` in your `package.json` and add scripts: - ```json { "type": "module", @@ -66,16 +48,11 @@ Set `"type": "module"` in your `package.json` and add scripts: } } ``` - --- - ## Define your agents - Use `defineAgent()` to declare each agent's name, role, and capabilities: - ```ts import { defineAgent } from '@bradygaster/squad-sdk'; - const docsWriter = defineAgent({ name: 'lori', role: 'Technical Writer', @@ -86,7 +63,6 @@ const docsWriter = defineAgent({ { name: 'api-docs', level: 'proficient' }, ], }); - const reviewer = defineAgent({ name: 'chen', role: 'Reviewer', @@ -97,15 +73,10 @@ const reviewer = defineAgent({ ], }); ``` - Each `defineAgent()` call validates the config at runtime and returns a typed `AgentDefinition` object. - --- - ## Build the squad - Compose your agents into a squad with `defineSquad()`: - ```ts import { defineSquad, @@ -114,16 +85,13 @@ import { defineRouting, defineDefaults, } from '@bradygaster/squad-sdk'; - export default defineSquad({ version: '1.0.0', - team: defineTeam({ name: 'Docs Automator', - description: 'Autonomous documentation pipeline', + description: 'Background documentation pipeline', members: ['lori', 'chen'], }), - agents: [ defineAgent({ name: 'lori', @@ -138,7 +106,6 @@ export default defineSquad({ status: 'active', }), ], - routing: defineRouting({ rules: [ { pattern: 'docs-*', agents: ['@lori'], tier: 'standard' }, @@ -147,7 +114,6 @@ export default defineSquad({ defaultAgent: '@lori', fallback: 'default-agent', }), - defaults: defineDefaults({ model: { preferred: 'claude-sonnet-4', @@ -157,21 +123,14 @@ export default defineSquad({ }), }); ``` - `defineSquad()` validates every nested section through its respective builder — `defineTeam()`, `defineAgent()`, `defineRouting()`, `defineDefaults()`. If any field is invalid, you get a `BuilderValidationError` at startup, not at runtime. - --- - ## Create a CLI wrapper - Wrap your pipeline in a CLI entry point so you can invoke it from a terminal, cron job, or CI workflow: - ```ts #!/usr/bin/env node - import { CastingEngine, CostTracker, SkillRegistry } from '@bradygaster/squad-sdk'; import type { AgentRole, CastMember } from '@bradygaster/squad-sdk'; - interface Task { id: string; title: string; @@ -181,14 +140,12 @@ interface Task { assignedTo?: string; result?: string; } - // Parse CLI arguments const taskArg = process.argv[2]; if (!taskArg) { console.error('Usage: my-agent '); process.exit(1); } - async function main(): Promise { // Cast the team const engine = new CastingEngine(); @@ -197,13 +154,10 @@ async function main(): Promise { requiredRoles: ['lead', 'developer', 'scribe'] as AgentRole[], teamSize: 3, }); - console.log(`Team: ${team.map(m => m.displayName).join(', ')}`); - // Initialize SDK components const costTracker = new CostTracker(); const skillRegistry = new SkillRegistry(); - // Register domain skills skillRegistry.registerSkill({ id: 'markdown-gen', @@ -213,7 +167,6 @@ async function main(): Promise { agentRoles: ['scribe'], content: 'Generate markdown from code analysis.', }); - // Build the task queue from CLI input const tasks: Task[] = [{ id: 'task-01', @@ -222,32 +175,24 @@ async function main(): Promise { requiredRole: 'scribe', status: 'queued', }]; - - // Run the autonomous loop + // Run the background loop await runLoop(team, tasks, costTracker, skillRegistry); - // Print results const summary = costTracker.getSummary(); console.log(`Done. ${summary.totalInputTokens} tokens in, $${summary.totalEstimatedCost.toFixed(4)} cost.`); } - main().catch(err => { console.error('Fatal:', err); process.exit(1); }); ``` - --- - -## The autonomous pipeline pattern - -The core of an autonomous agent is a loop with four phases: - +## The background pipeline pattern +The core of a background agent is a loop with four phases: 1. **Assign** — find the next queued task and match it to an available agent by role 2. **Execute** — the agent processes the task, streaming results and tracking cost 3. **Coordinate** — the agent may route follow-up tasks, record decisions, or save learnings 4. **Repeat** — continue until no tasks remain - ```ts import { CastingEngine, @@ -257,7 +202,6 @@ import { selectResponseTier, } from '@bradygaster/squad-sdk'; import type { CastMember, AgentRole, ResponseTier, TierContext } from '@bradygaster/squad-sdk'; - interface Task { id: string; title: string; @@ -267,14 +211,12 @@ interface Task { assignedTo?: string; result?: string; } - interface AgentState { member: CastMember; status: 'idle' | 'working' | 'done'; tasksCompleted: number; sessionId: string; } - async function runLoop( team: CastMember[], tasks: Task[], @@ -282,39 +224,32 @@ async function runLoop( skillRegistry: SkillRegistry, ): Promise { const streaming = new StreamingPipeline(); - // Build agent states const agents: AgentState[] = team.map((member, i) => { const sessionId = `session-${member.name.toLowerCase()}-${i}`; streaming.attachToSession(sessionId); return { member, status: 'idle', tasksCompleted: 0, sessionId }; }); - - // Autonomous execution loop + // Background execution loop while (tasks.some(t => t.status !== 'done')) { for (const agent of agents) { if (agent.status === 'working') continue; - // Phase 1: Assign — find a task matching this agent's role const task = tasks.find( t => t.status === 'queued' && t.requiredRole === agent.member.role, ); if (!task) continue; - task.status = 'in-progress'; task.assignedTo = agent.member.name; agent.status = 'working'; - // Check for skill matches const matches = skillRegistry.matchSkills(task.description, agent.member.role); if (matches.length > 0) { console.log(` Skill match: ${matches[0].skill.id} (score: ${matches[0].score.toFixed(2)})`); } - // Phase 2: Execute — simulate or call real LLM work console.log(` ${agent.member.name} working on: ${task.title}`); // In a real agent, you would call the Copilot SDK here - // Phase 3: Coordinate — record cost, route follow-ups costTracker.recordUsage({ sessionId: agent.sessionId, @@ -324,7 +259,6 @@ async function runLoop( outputTokens: 800, estimatedCost: 0.006, }); - // Complete the task task.status = 'done'; task.result = `Completed by ${agent.member.name}`; @@ -332,7 +266,6 @@ async function runLoop( agent.tasksCompleted++; } } - // Clean up streaming sessions for (const agent of agents) { streaming.detachFromSession(agent.sessionId); @@ -340,31 +273,21 @@ async function runLoop( streaming.clear(); } ``` - --- - ## Coordination tools - The autonomous-pipeline sample demonstrates three coordination patterns that agents use during the loop: - | Tool | What it does | Example | |------|-------------|---------| | `squad_route` | Routes a follow-up task to a teammate | Developer finishes auth → routes test-writing to Tester | | `squad_decide` | Records an architectural decision | "Use JWT with RS256 signing for auth" | | `squad_memory` | Saves a learning for future sessions | "Connection pool sweet spot: 20 connections" | - These patterns let agents coordinate without a central orchestrator. Each agent makes local decisions that accumulate into a shared knowledge base. - :::note[`squad_route` requires `fanOutDepsGetter`] For `squad_route` to actually spawn agent sessions, the `ToolRegistry` must be constructed with a `fanOutDepsGetter` callback that provides fan-out dependencies (`sessionPool`, `modelClient`, `squadRoot`, `configGetter`). Without it, the tool returns an honest `fan-out-deps-unavailable` error instead of silently succeeding. See the [SDK reference](/reference/sdk/#toolregistry) for wiring details. ::: - --- - ## Add observability - Track cost, token usage, and agent activity with the built-in `CostTracker` and OpenTelemetry integration: - ```ts import { CostTracker, @@ -374,26 +297,22 @@ import { recordAgentDuration, recordTokenUsage, } from '@bradygaster/squad-sdk'; - // Initialize OTel (connects to Aspire dashboard if endpoint is set) const otelEndpoint = process.env['OTEL_EXPORTER_OTLP_ENDPOINT']; if (otelEndpoint) { initSquadTelemetry({ endpoint: otelEndpoint, - serviceName: 'my-autonomous-agent', + serviceName: 'my-background-agent', }); } - // Track per-agent costs const costTracker = new CostTracker(); - // Collect opt-in telemetry events const telemetry = new TelemetryCollector({ enabled: true }); telemetry.collectEvent({ name: 'squad.init', properties: { agents: 3, sample: 'docs-agent' }, }); - // Record OTel metrics per task recordAgentSpawn('lori'); recordAgentDuration('lori', 1200, 'success'); @@ -407,20 +326,14 @@ recordTokenUsage({ estimatedCost: 0.006, timestamp: new Date(), }); - // Print cost summary at the end const summary = costTracker.getSummary(); console.log(`Total: $${summary.totalEstimatedCost.toFixed(4)}`); ``` - To view traces and metrics in the Aspire dashboard, see the [Aspire dashboard scenario](/scenarios/aspire-dashboard/). - --- - ## Complete working example - -Here is a minimal but complete autonomous agent you can copy and run: - +Here is a minimal but complete background agent you can copy and run: ```ts #!/usr/bin/env node import { @@ -430,7 +343,6 @@ import { StreamingPipeline, } from '@bradygaster/squad-sdk'; import type { CastMember, AgentRole } from '@bradygaster/squad-sdk'; - // Types interface Task { id: string; @@ -439,14 +351,12 @@ interface Task { status: 'queued' | 'done'; assignedTo?: string; } - interface Agent { member: CastMember; status: 'idle' | 'working'; tasksCompleted: number; sessionId: string; } - // Cast the team const engine = new CastingEngine(); const team = engine.castTeam({ @@ -454,12 +364,10 @@ const team = engine.castTeam({ requiredRoles: ['lead', 'developer', 'scribe'] as AgentRole[], teamSize: 3, }); - // Initialize components const costTracker = new CostTracker(); const skills = new SkillRegistry(); const streaming = new StreamingPipeline(); - skills.registerSkill({ id: 'docs', name: 'Documentation', @@ -468,42 +376,35 @@ skills.registerSkill({ agentRoles: ['scribe'], content: 'Generate markdown docs.', }); - // Build task queue const tasks: Task[] = [ { id: '1', title: 'Write API docs', requiredRole: 'scribe', status: 'queued' }, { id: '2', title: 'Review architecture', requiredRole: 'lead', status: 'queued' }, { id: '3', title: 'Implement feature', requiredRole: 'developer', status: 'queued' }, ]; - // Build agents const agents: Agent[] = team.map((member, i) => { const sessionId = `s-${member.name}-${i}`; streaming.attachToSession(sessionId); return { member, status: 'idle' as const, tasksCompleted: 0, sessionId }; }); - -// Autonomous loop: assign → execute → coordinate → repeat +// Background loop: assign → execute → coordinate → repeat while (tasks.some(t => t.status !== 'done')) { for (const agent of agents) { if (agent.status === 'working') continue; - const task = tasks.find( t => t.status === 'queued' && t.requiredRole === agent.member.role, ); if (!task) continue; - // Assign task.assignedTo = agent.member.name; agent.status = 'working'; console.log(`${agent.member.displayName} → ${task.title}`); - // Execute (replace with real LLM call in production) const match = skills.matchSkills(task.title, agent.member.role); if (match.length > 0) { console.log(` Skill: ${match[0].skill.id}`); } - // Track cost costTracker.recordUsage({ sessionId: agent.sessionId, @@ -513,37 +414,28 @@ while (tasks.some(t => t.status !== 'done')) { outputTokens: 600, estimatedCost: 0.005, }); - // Complete task.status = 'done'; agent.status = 'idle'; agent.tasksCompleted++; } } - // Report const summary = costTracker.getSummary(); console.log(`\nDone: ${tasks.length} tasks, $${summary.totalEstimatedCost.toFixed(4)} total cost`); console.log('Agents:', agents.map(a => `${a.member.displayName} (${a.tasksCompleted})`).join(', ')); - // Clean up for (const a of agents) streaming.detachFromSession(a.sessionId); streaming.clear(); ``` - --- - ## Next steps - - Run the [autonomous-pipeline sample](https://github.com/bradygaster/squad/tree/dev/samples/autonomous-pipeline) to see the full pattern with OTel, cost dashboards, and skill matching - Read the [SDK reference](/reference/sdk/) for the complete API surface - See the [extensibility guide](/guide/extensibility/) for where your agent fits in the Squad ecosystem - Check the [Aspire dashboard scenario](/scenarios/aspire-dashboard/) for observability setup - --- - ## See Also - - [Your Team](../concepts/your-team.md) — Agent roles, charters, and team composition - [Architecture](../concepts/architecture.md) — How the coordinator orchestrates work -- [SDK Reference](../reference/sdk.md) — SDK API for autonomous agents +- [SDK Reference](../reference/sdk.md) — SDK API for background agents diff --git a/docs/src/content/docs/guide/building-extensions.md b/docs/src/content/docs/guide/building-extensions.md index f3466d4ce..7668b0a1c 100644 --- a/docs/src/content/docs/guide/building-extensions.md +++ b/docs/src/content/docs/guide/building-extensions.md @@ -1,21 +1,11 @@ # Building extensions - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - You've decided your idea is a Squad Extension (Layer 2). Now package it so another team can install it, enable it, and give their agents better context in five minutes. - --- - ## What is an extension? - An extension is a reusable collection of Squad agents, knowledge, workflows, ceremonies, memory guidance, provider contracts, generated artifacts, and directives that any team can install. It lives outside Squad core, packaged as a GitHub repository or marketplace plugin. Extensions let you codify workflows, domain expertise, memory lenses, knowledge graphs, or testing ceremonies that other teams benefit from. - If your extension needs a Copilot plugin, declare it as a dependency in the manifest. If it points to an external CLI, package, or MCP server, record that in `repository`, `upstream`, or `mcp` metadata. If it uses an approved built-in provider such as Graphify, `squad plugin refresh` can generate artifacts for agents to consume. Squad does not install external packages, run plugin-supplied commands, start MCP servers, or manage Copilot plugins. - --- - ## Extension structure - ``` my-extension/ ├── plugin.manifest.json @@ -27,47 +17,30 @@ my-extension/ │ └── CEREMONY.md └── README.md ``` - --- - ## Build one - **Step 1: Create a repo** - ```bash mkdir my-extension cd my-extension git init ``` - **Step 2: Add knowledge** - Create `knowledge/example-guidance.md`: - ```markdown # Example Guidance - **When to use:** You need to do X. - ## Context - Brief problem statement. - ## Steps - 1. Do the first thing 2. Do the second thing 3. Done ``` - **Step 3 (optional): Add a ceremony** - Create `ceremonies/code-review.md` following Squad ceremony format (decision gate, verdicts, escalation). - **Step 4: Add a manifest** - Create `plugin.manifest.json`: - ```json { "id": "my-extension", @@ -110,90 +83,59 @@ Create `plugin.manifest.json`: ] } ``` - The MVP manifest is declarative. Do not add scripts, commands, lifecycle hooks, or executable files. - **Step 5: Validate and dry-run** - ```bash squad plugin validate . squad plugin dry-run . ``` - Dry-run prints the exact files Squad would write without changing `.squad/`. - **Step 6: Install and enable locally** - ```bash squad plugin install . squad plugin enable my-extension squad plugin list --json ``` - Install records the plugin disabled by default. Enable activates the roles declared in `components`. Copilot dependencies are surfaced to the user but must be installed through Copilot's own plugin flow. External package and MCP metadata is surfaced as install guidance only. - **Step 7: Write the README** - Explain the problem, installation, and usage: - ```markdown # My Extension - Codifies client-delivery workflows for consulting teams. - ## Install - squad plugin install . squad plugin enable my-extension - ## What's Inside - - **discovery-interview** skill — clarify requirements - **evidence-bundler** knowledge — collect test results - **plan-review** ceremony — gate for approval ``` - **Step 8: Test locally** - Run `squad plugin verify`, then run `squad plugin refresh ` if your plugin declares an approved built-in provider. Load your Squad session and verify the installed Squad knowledge, workflows, and generated artifacts appear and work as expected. If you declared Copilot dependencies, verify those are installed separately through Copilot. - --- - ## Share it - Push to GitHub: - ```bash git add . git commit -m "Initial extension: my-extension" git push ``` - Register with a marketplace or pin directly by repository URL: - ``` squad plugin marketplace add github/my-org/my-team-plugins ``` - --- - ## Real examples - - **Client-delivery workflow** ([RFC #328](https://github.com/bradygaster/squad/issues/328)) — discovery, research, multi-round review with evidence gates - **Azure infrastructure patterns** — VM provisioning, Cosmos DB design, monitoring rules - **Knowledge libraries** — document structured analysis, reference synthesis - **External integration samples** — see `samples/plugin-knowledge-graphify` for the real Graphify knowledge graph tool, `samples/plugin-knowledge-index-server` for the real Index Server instruction/knowledge MCP server, and `samples/plugin-memory-mempalace` for the real MemPalace memory CLI/MCP system - --- - ## Related docs - - [Extensibility guide](./extensibility.md#decision-tree) — Where does your idea belong? (decision tree) - [Plugin Marketplace](../features/plugins.md) — How teams discover and install your extension - [Skills](../features/skills.md) — Existing Squad skills concepts; plugin manifests should use `knowledge` unless they are declaring a Copilot dependency - [Ceremonies](../features/ceremonies.md) — How to define decision gates and review rituals - --- - **Ready to share?** [Open a discussion](https://github.com/bradygaster/squad/discussions) in the Squad community. diff --git a/docs/src/content/docs/guide/extensibility.md b/docs/src/content/docs/guide/extensibility.md index 0ec34b5af..4cf29fae9 100644 --- a/docs/src/content/docs/guide/extensibility.md +++ b/docs/src/content/docs/guide/extensibility.md @@ -1,25 +1,15 @@ # Extensibility guide - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - Where does your change idea belong? Squad core, marketplace plugin, or team config? - **Key principle:** Squad core stays small. Most ideas are skills, ceremonies, or directives. - --- - ## The three layers - | Layer | What lives here | Who changes it | Distribution | |-------|----------------|----------------|--------------| | **Squad Core** | Coordinator behavior, routing logic, reviewer protocol | Squad maintainers only | npm releases | | **Squad Extension** | Reusable capabilities (skills, ceremonies, workflows, memory guidance, provider contracts, generated artifacts) | Plugin authors | Marketplace plugins | | **Team Configuration** | Decisions unique to THIS team | The team itself | `.squad/` files | - --- - ## Decision tree - ``` ┌─ Does it change HOW the coordinator routes, spawns, or enforces? │ @@ -39,23 +29,13 @@ Where does your change idea belong? Squad core, marketplace plugin, or team conf └─ Examples: YOUR git workflow, YOUR build process, YOUR routing rules Action: Update `.squad/` files ``` - **Heuristic:** "Squad should..." → check if it's really "My team should..." or "Teams using X should...". - - - --- - ## Worked example: Client-delivery RFC - [RFC #328](https://github.com/bradygaster/squad/issues/328) proposed a sophisticated client-delivery workflow: discovery interviews, research sprints, multi-round review with `SHIP`/`NEEDS_WORK`/`BLOCKED` verdicts, evidence bundles. - **The realization:** It maps entirely to existing Squad primitives. No core changes needed. - **Where it belongs:** Layer 2 (Squad Extension) - This workflow is a reusable pattern any team could adopt — perfect as a marketplace plugin. - **Plugin structure:** ``` client-delivery-workflow/ @@ -69,42 +49,29 @@ client-delivery-workflow/ └── directives/ └── multi-round-review.md # Policy: 2 NEEDS_WORK rounds max ``` - **Usage:** ```bash squad plugin install github/awesome-copilot/client-delivery-workflow ``` - **Lesson:** Most sophisticated workflows are compositions of primitives, not core features. - --- - ## When to escalate to core - You likely need a core change if: - - **New coordinator mode** — Example: `validate` mode that runs checks before `assign` - **Routing logic change** — Example: Route based on agent workload, not labels - **Reviewer protocol change** — Example: Conditional approvals ("approved if tests pass") - **Global enforcement rule** — Example: Block merges if evidence missing - **Skill needs coordinator data** — Example: Access to agent spawn history - You DON'T need core if: - - **Workflow pattern** → Build a plugin (skills + ceremonies) - **Domain expertise** → Write a skill - **Team process** → Add a ceremony to `.squad/ceremonies.md` - **Reusable templates** → Build a plugin - **Configuring existing behavior** → Update `.squad/routing.md` - --- - ## Build an extension - Ready to build? See [Building extensions](./building-extensions.md) for a five-minute walkthrough. - The plugin MVP uses a declarative `plugin.manifest.json` and a simple lifecycle: - ```bash squad plugin validate ./my-extension squad plugin dry-run ./my-extension @@ -112,31 +79,21 @@ squad plugin install ./my-extension squad plugin enable my-extension squad plugin refresh my-extension ``` - Install records lock data and leaves the plugin disabled. Enable activates the roles declared in the manifest. Refresh updates approved generated artifacts for built-in providers such as Graphify. See [Plugin security model](../reference/plugin-security.md) for the guardrails under the pluggability model. - --- - ## Summary - 1. **Start with the decision tree** — Most ideas are Layer 2 or 3 2. **Default to team config** — Unique to your team? → `.squad/` 3. **Build a plugin if reusable** — Other teams benefit? → Package and share 4. **Escalate to core rarely** — Need coordinator/routing changes? → Open an RFC - **When in doubt:** Start with team config. Copy-pasting to other teams? Promote to plugin. Plugins repeatedly hitting limits? Signal for core change. - --- - ## Related documentation - - [Plugin Marketplace](../features/plugins.md) — How to browse, install, and share plugins - [Skills](../features/skills.md) — How to write skills for your team or plugins - [Ceremonies](../features/ceremonies.md) — How to define team meetings and gates - [Routing](../features/routing.md) — How to configure work assignment rules - [Building extensions](./building-extensions.md) — Step-by-step guide to building and sharing extensions - [Contributing](https://github.com/bradygaster/squad/blob/main/CONTRIBUTING.md) — How to propose changes to Squad core - --- - **Questions?** [Open an issue](https://github.com/bradygaster/squad/issues/new) or join the discussion in the Squad community. diff --git a/docs/src/content/docs/guide/faq.md b/docs/src/content/docs/guide/faq.md index c7ad1b7ad..19494d5cc 100644 --- a/docs/src/content/docs/guide/faq.md +++ b/docs/src/content/docs/guide/faq.md @@ -1,22 +1,13 @@ # Frequently asked questions - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Common questions, troubleshooting tips, and clarifications based on community feedback. Can't find your answer? [Open an issue](https://github.com/bradygaster/squad/issues/new). - --- - ## Which CLI should I use? - **Short answer:** Use **GitHub Copilot CLI** for day-to-day work. Use **Squad CLI** for setup, diagnostics, and specific features. - **Why GitHub Copilot CLI?** - Full agent spawning capabilities - Access to all Squad features through natural conversation - Model selection and background execution - No manual commands — just describe what you need - **When to use Squad CLI:** - Initial setup: `squad init` - Diagnostics: `squad doctor` @@ -24,160 +15,108 @@ Common questions, troubleshooting tips, and clarifications based on community fe - Aspire dashboard: `squad aspire` - Export/import: `squad export` and `squad import` - Remote phone access: `squad start --tunnel` - **Common workflow pattern:** ```bash # Terminal 1: Run continuous triage squad triage --interval 10 - # Terminal 2: Work with your team gh copilot > @squad what issues are ready to work? ``` - For a detailed feature comparison, see [Client Compatibility Matrix](../scenarios/client-compatibility.md). - --- - ## Why doesn't `gh issue edit --add-assignee "@copilot"` work? - **Problem:** Running `gh issue edit --add-assignee "@copilot"` (or variants like `copilot-swe-agent[bot]`) fails locally, even with a Personal Access Token. - **Why this happens:** The GitHub Copilot coding agent is a bot account. Bot accounts cannot be assigned to issues via the GitHub CLI in the same way as human users — the GitHub API restricts direct assignment of bot accounts through standard endpoints. - **Recommended workaround:** Use **label-based assignment** through the GitHub Actions workflow: - 1. Add the `squad:copilot` label to the issue: ```bash gh issue edit --add-label "squad:copilot" ``` - 2. The auto-assign workflow (`.github/workflows/squad-copilot-auto-assign.yml`) detects the label and assigns @copilot automatically. - **Prerequisites for auto-assign:** - You must create a **GitHub Classic Personal Access Token** with `repo` scope - Add it as a repository secret: `gh secret set COPILOT_ASSIGN_TOKEN` - The workflow uses this token to perform the assignment on your behalf - See [Copilot Coding Agent](../features/copilot-coding-agent.md) for full setup instructions. - --- - ## I don't see anything on the Aspire dashboard - **Problem:** You ran `squad aspire` and opened the dashboard, but no telemetry is showing up. - **Why this happens:** The Aspire dashboard integration **requires the Squad CLI**. It is not available when using GitHub Copilot CLI directly. - **How to fix:** 1. Ensure you started Aspire with the Squad CLI: ```bash squad aspire ``` - 2. Confirm the container is running: ```bash docker ps | grep aspire-dashboard ``` - 3. Look for the dashboard URL in the output (usually `http://localhost:18888`) - 4. Run a Squad CLI command that generates telemetry: ```bash squad doctor squad triage ``` - 5. Refresh the Aspire dashboard — you should see traces, metrics, and logs appear - **Note:** GitHub Copilot CLI sessions do **not** send telemetry to Aspire. Only Squad CLI commands emit OpenTelemetry data to the dashboard. - See [Using Squad with the Aspire Dashboard](../scenarios/aspire-dashboard.md) for details. - --- - ## `squad doctor` complains about absolute path for teamRoot - **Problem:** Running `squad doctor` shows a warning like: - ``` ⚠ teamRoot uses absolute path — consider making it relative ``` - **Why this matters:** Absolute paths (e.g., `C:\Users\me\squad\` or `/Users/me/squad/`) break portability. If you share the squad with a teammate or clone it to a new machine, the absolute path won't resolve correctly. - **How to fix:** Make the `teamRoot` path **relative to the project root**. - **Example — Before (absolute):** ```json { "teamRoot": "C:\\Users\\me\\repos\\my-team\\.squad" } ``` - **Example — After (relative):** ```json { "teamRoot": ".squad" } ``` - **For linked teams (dual-root mode):** - If your project links to a remote team repository: - ```json { "teamRoot": "../team-repo/.squad" } ``` - The path should be relative to your **project root** (where `.squad/` or `squad.config.ts` lives), not to the `.squad/` directory itself. - **Verify the fix:** ```bash squad doctor ``` - You should see `✓ teamRoot is relative` or no warning. - --- - ## Can I use Squad CLI and GitHub Copilot CLI at the same time? - Yes! They complement each other: - - **Squad CLI** provides infrastructure: triage, Aspire observability, export/import, diagnostics - **GitHub Copilot CLI** provides conversational interface to your team - **Recommended setup:** - Run `squad triage --interval 10` in a dedicated terminal (or as a cron job / GitHub Action) - Use `gh copilot` (or `@squad` in VS Code) for all team interactions - Use `squad doctor` or `squad aspire` for diagnostics when needed - Both CLIs read and write the same `.squad/` directory, so state stays synchronized. - --- - ## What's the difference between Ralph and triage? - **Ralph** and **triage** are different names for the same functionality: - - **`squad ralph`** is the legacy command name - **`squad triage`** is the new primary command name (as of v0.8.26) - Both commands do the same thing: monitor GitHub issues, apply routing rules, and assign work to team members - **Migration path:** - Existing scripts using `squad ralph` will continue to work (it's an alias) - New projects should use `squad triage` in documentation and automation - The `ralph/` directory in `.squad/` remains unchanged for backward compatibility - --- - ## How do I add a new agent to my squad? - **In conversation (recommended):** - ``` gh copilot > @squad I want to add a new agent @@ -185,93 +124,65 @@ gh copilot > Name: Guardian > Expertise: OWASP, dependency scanning, secrets detection ``` - Squad will create the charter, update the team roster, and add routing rules. - **Manual creation:** - 1. Create a charter file in `.squad/agents//charter.md` 2. Update `.squad/team.md` to include the new agent in the roster 3. Add routing rules in `.squad/routing.md` (if applicable) 4. Optionally add a history file in `.squad/agents//history.md` - See [Team Setup](../features/team-setup.md) for details. - --- - ## What happens if I run `squad init` twice? - Nothing breaks! `squad init` is **idempotent** — it's safe to run multiple times. - **What it does:** - Checks if `.squad/` exists; if yes, does nothing - Copies missing templates to `.squad/` - Updates `.github/workflows/` with Squad Actions (skips existing files) - Adds `.github/agents/squad.agent.md` if missing - **Use cases:** - Recover from partial initialization - Update workflows after a Squad upgrade - Add missing templates without overwriting custom changes - --- - ## Can I use Squad without GitHub Issues? - Yes, but with limitations. - **What works without GitHub Issues:** - Conversational team interaction (`@squad`, `gh copilot`) - Agent spawning and parallel execution - Memory, decisions, and knowledge sharing - Skills and ceremonies - Export/import for portability - **What requires GitHub Issues:** - Ralph/triage auto-assignment - Issue-driven development workflows - Project board integration - Label-based routing - Copilot coding agent auto-assignment - If you're using GitLab, see [GitLab Issues](../features/gitlab-issues.md) for integration options. - --- - ## How do I reset my squad without losing decisions? - **Option 1: Archive and start fresh** ```bash # Export current state squad export --out backup-$(date +%Y%m%d).json - # Remove .squad/ rm -rf .squad/ - # Reinitialize squad init ``` - Manually copy decisions from the backup JSON or `.squad/decisions.md` if you archived it separately. - **Option 2: Selective cleanup** ```bash # Remove agent state but keep team structure rm -rf .squad/agents/*/history.md rm -rf .squad/sessions/ - # Keep .squad/decisions.md, .squad/team.md, .squad/routing.md ``` - See [Disaster Recovery](../scenarios/disaster-recovery.md) for more recovery patterns. - --- - ## Where should I report bugs or request features? - [Open an issue on GitHub](https://github.com/bradygaster/squad/issues/new) with: - **Environment:** OS, Node.js version, Squad version (`squad --version`) - **Reproduction steps:** What you ran, what happened, what you expected - **Output:** Copy the full terminal output, including any errors - For questions or discussions, use [GitHub Discussions](https://github.com/bradygaster/squad/discussions). diff --git a/docs/src/content/docs/guide/personal-squad.md b/docs/src/content/docs/guide/personal-squad.md deleted file mode 100644 index 08479686b..000000000 --- a/docs/src/content/docs/guide/personal-squad.md +++ /dev/null @@ -1,352 +0,0 @@ -# Your Personal Squad - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -:::tip[Git helps protect your project state] -We strongly recommend using git to version-control your project `.squad/` directory. Every session writes to it — decisions, logs, orchestration history. Agent charters, skills, and casting histories live in your personal squad directory (not committed to projects). Without version control, context window resets, crashes, or accidental deletions could be unrecoverable. If git isn't an option, regular manual backups to external storage or cloud sync can help protect your work. -::: - -**Try this:** -```bash -squad init --global -``` - -Your agents follow you everywhere now. Same team, every project, persistent memory. - -This tutorial walks you through setup, explains what's happening behind the scenes, and shows a few ways personal squads make your work better. - ---- - -## 1. What Is a Personal Squad? - -Normally, Squad lives inside a single project — `.squad/` in your repo root. Your agents know that project. They don't know your other ones. - -A personal squad flips that. Your team identity — agents, charters, skills, casting history — moves to your personal squad directory. Every project you work in can point to it. - -**Personal squad location by platform:** - -| Platform | Path | -|----------|------| -| Linux | `~/.config/squad/` | -| macOS | `~/Library/Application Support/squad/` | -| Windows | `%APPDATA%\squad\` | - -What that means in practice: - -- Your agents remember your conventions across all your repos -- Skills learned in one project carry over to every other one -- Your Lead reviews code the same way everywhere -- You don't repeat yourself when starting a new project - -You're still one person. But your team travels with you. - ---- - -## 2. Set It Up - -### Install the CLI globally - -```bash -npm install -g @bradygaster/squad-cli -``` - -Now `squad` works from any directory. Not tied to a specific project. - -### Initialize your personal squad - -```bash -squad init --global -``` - -You'll see: - -``` -✅ Personal squad initialized. - {personal squad directory} — your global team root - Agents, skills, and casting will be shared across projects. -``` - -That's it. You have a personal team root. - -### Verify it's working - -```bash -squad status -``` - -``` -Squad Status - Global squad: {personal squad directory} - Agents: 0 (none cast yet — start a session to form your team) - Skills: 0 -``` - -No agents yet. They form when you start your first session. The global directory is just the container — your team comes to life when you give them work. - -### Connect a project - -Navigate to any project and run: - -```bash -cd ~/projects/my-api -squad init -``` - -Squad detects your global team root and writes a pointer: - -``` -✅ Squad initialized. - .squad/config.json → teamRoot: {personal squad directory} - Team identity inherited from personal squad. - Project-local state (decisions, logs) stays here. -``` - -Your project now has its own `.squad/` directory for local state, but your agents, skills, and casting come from the global root. - -Repeat for any project you want connected. - ---- - -## 3. What Just Happened? (Behind the Scenes) - -Two things were created. Understanding the split is the key to personal squads. - -### The global directory: your personal squad - -This is your **team identity**. It contains: - -``` -{personal squad directory}/ - agents/ — your agent charters and histories - casting/ — who's been cast, role assignments - skills/ — accumulated knowledge ("always use Zod", "prefer Tailwind") -``` - -This is the stuff that makes your agents *yours*. It persists across sessions. It grows as you work. It follows you from project to project. - -### The global `squad.agent.md` — a reference, not a live file - -You'll also see `.github/agents/squad.agent.md` inside your global config directory. This file is **not** discoverable by GitHub Copilot — Copilot only reads agent files inside a git repository's `.github/agents/` folder. - -So why is it there? It serves as a **local reference** of the Squad agent instructions that were applied to your projects. The actual source for `squad init` is the packaged template bundled with the CLI (`squad.agent.md.template`), so the global copy is not used as input for future inits. It exists so you can see what Squad generated without opening a project. - -If you want Copilot to use Squad's agent instructions in a project, run `squad init` in that project's root — that creates the "real" `.github/agents/squad.agent.md` inside the git repo where Copilot can discover it. - -### The project pointer: `.squad/config.json` - -Inside each connected project, `.squad/config.json` looks like this: - -```json -{ - "version": 1, - "teamRoot": "{personal squad directory}", - "projectKey": null -} -``` - -That `teamRoot` field is the magic. When Squad's resolution system sees it, the project enters **remote mode**. Here's how the two modes compare: - -| | **Local mode** (default) | **Remote mode** (personal squad) | -|---|---|---| -| Team identity | `.squad/` in project | Personal squad directory (global) | -| Decisions & logs | `.squad/` in project | `.squad/` in project | -| Agents shared? | No — project only | Yes — across all connected projects | -| Skills shared? | No | Yes | - -In remote mode: - -- **Team identity** (agents, charters, skills, casting) → loaded from your personal squad directory -- **Project-local state** (decisions, logs, orchestration-log) → stays in this project's `.squad/` - -The resolution system walks up directories looking for `.squad/`. When it finds one with a `teamRoot` in `config.json`, it switches to remote mode — pulling team identity from the external path while keeping project state local. - -### Why the split matters - -Your agents know you across projects. They remember your conventions. But each project has its own decisions and logs. - -The API project doesn't get cluttered with the frontend project's session history. The Discord bot's architectural decisions stay in the Discord bot. Clean separation — shared brain, local memory. - ---- - -## 4. Use Case: Your Side Projects Get a Team - -You have three side projects. A CLI tool, a web app, a Discord bot. All different stacks. All just you. - -```bash -# Already done once -squad init --global - -# Connect each project -cd ~/projects/cli-tool && squad init -cd ~/projects/web-app && squad init -cd ~/projects/discord-bot && squad init -``` - -Now when you open any of them: - -``` -> Fredo, set up the database layer. -``` - -Fredo already knows you prefer PostgreSQL. He knows you use Zod for validation. He picked that up from working on your CLI tool last week. You didn't tell him again — he remembered. - -Your Lead reviews code the same way in every repo. Your Tester follows the same patterns. Three projects, one consistent team. - -Without a personal squad, you'd re-explain your conventions every time you start a new project. With one, the onboarding is already done. - ---- - -## 5. Use Case: Code Review Across Repos - -Your Lead agent reviews every pull request in every project. But more than that — they remember architectural decisions from *other* repos. - -``` -> Michael, review this new endpoint. -``` - -``` -🏗️ Michael — reviewing /api/users endpoint - -Looks good. Two notes: - - In your CLI tool, you standardized on cursor-based pagination. - This endpoint uses offset pagination. Intentional? - - Missing rate limiting. We added this as a skill after the - auth module in your web app. -``` - -Michael isn't just reviewing code in isolation. He's cross-referencing decisions and skills from your other projects. That's the personal squad at work — knowledge that spans repos. - -The more projects you connect, the richer these reviews become. Your Lead builds a mental model of how *you* build software, not just how one project works. - ---- - -## 6. Use Case: Learning a New Codebase - -You join an open-source project. New repo, unfamiliar code. But your agents already know *you*. - -For projects you don't own — OSS contributions, client work, temporary collaborations — use **consult mode**. Your team consults invisibly, and the project never knows Squad was there: - -```bash -cd ~/projects/new-oss-contribution -squad consult -``` - -Your personal squad is copied into the project's `.squad/` directory, hidden via `.git/info/exclude`. The agents don't know the codebase yet — they'll learn it. But they already know your preferences: - -- How you like code structured -- What testing patterns you follow -- Your communication style (direct? exploratory? detail-oriented?) - -``` -> Team, help me understand this codebase. Where's the entry point -> and how does routing work? -``` - -Your agents explore the repo with your familiar voice. They ask the questions you'd ask. The codebase is new — but your relationship with your team isn't. - -When you're done, extract the generic learnings back to your personal squad and clean up: - -```bash -squad extract --clean -``` - -It's not a cold start. It's your team meeting a new project. - -> 📖 **Full guide:** [Consult Mode](../features/consult-mode.md) — invisible consulting, learning extraction, license handling. - ---- - -## 7. Use Case: Automating Your Personal Workflow - -Your personal squad isn't just for writing code. It's a workflow tool. - -Set a directive once: - -``` -> Always run linting before marking a task done. -``` - -``` -📌 Captured. Linting required before task completion. -``` - -That directive is now in your personal squad directory — every project, every session. Your agents enforce it everywhere. You set the standard once and it sticks. - -Over time, your personal squad becomes an opinionated workflow engine. Not because you configured it that way — because you worked with it and it learned. - ---- - -## 8. Use Case: Skills That Grow Everywhere - -Skills accumulate in your personal squad directory under `skills/`. Every project contributes. - -After a few weeks: - -``` -{personal squad directory}/skills/ - always-use-zod.md - prefer-tailwind.md - cursor-pagination.md - rate-limit-auth-endpoints.md - structured-logging.md - error-boundaries-in-react.md -``` - -These came from different projects. The Zod skill was learned in your API. The Tailwind preference was captured in your web app. The rate-limiting skill came from a code review rejection. - -Now every new project starts with all of that knowledge baked in. Your agents don't make the same mistakes twice — in any repo. - -This is the long game. Early sessions feel similar to project-local squads. But after a few weeks of cross-project work, the difference is real. Your skills directory becomes a personal engineering handbook that your agents actually read. - ---- - -## 9. Where It's Headed - -Honest moment: this is new. Personal squads work, but they're early. - -What works well today: -- Shared team identity across projects -- Skills that accumulate and carry over -- Consistent agent behavior everywhere you work -- **Consult mode** — bring your team to projects you don't own, invisibly ([docs](../features/consult-mode.md)) - -What's still rough: -- No sync mechanism between machines yet — your personal squad directory is local to your machine -- Project keys aren't used for anything yet (that `null` in config.json) -- No UI for browsing your global skills or agent histories (it's files for now) - -We're building in the open. If something feels off, [open an issue](https://github.com/bradygaster/squad/issues). If something feels right, we want to hear about that too. - ---- - -## Tips - -- **Git-first workflow is strongly recommended.** Commit your project `.squad/` directory to git after any session. Without version control: - - Session state could be lost on crashes, context resets, or machine reboots - - Project-local decisions and logs are at risk - - Accidental deletions may be unrecoverable - - Cross-project context could vanish - - Think of git as your insurance policy for your project's local state. It's a good habit: after a session ends, run `git add .squad/ && git commit -m "squad: project state"`. (Agent histories, skills, and team identity are stored in your personal squad directory — not in project repos.) - -- **Commit project `.squad/` but not personal squad directory.** The project-local state (decisions, logs) belongs in version control. Your global identity (agents, skills, casting) is personal — keep it out of repos. - ```bash - # DO THIS - git add .squad/ - git commit -m "squad: project state" - - # DON'T commit your personal squad directory (~/.config/squad/ or similar) — it's machine-specific - ``` - -- **Check status anytime.** `squad status` shows your global squad directory and which projects are connected. -- **Skills are the payoff.** The more projects you work across, the more skills accumulate. After a month, your agents have a real knowledge base tailored to how *you* build software. -- **It's just files.** Your personal squad directory is a folder on your machine. You can browse it, edit it, back it up, copy it to another machine manually. No magic, no cloud, no lock-in. But **always back up or version-control your `.squad/` state**. -- **Global install matters.** `npm install -g @bradygaster/squad-cli` gives you the `squad` command everywhere. Without it, you'd need `npx` in each project. Global CLI + global squad = full portability. - ---- - -## What to Try Next - -- [**Your First Session**](../get-started/first-session.md) — The full walkthrough from zero to fan-out -- [**Solo Dev Scenario**](../scenarios/solo-dev.md) — Squad for one-person teams -- [**Tips & Tricks**](tips-and-tricks.md) — Patterns that work diff --git a/docs/src/content/docs/guide/sample-prompts.md b/docs/src/content/docs/guide/sample-prompts.md index bbf02fe5c..562318418 100644 --- a/docs/src/content/docs/guide/sample-prompts.md +++ b/docs/src/content/docs/guide/sample-prompts.md @@ -1,34 +1,20 @@ # Sample Prompts - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Copy any of these, open Copilot, select Squad, and paste.** Each one is a ready-to-go project that shows a different Squad capability. - --- - ## Quick Builds - Small enough to ship in a single session. Great for seeing fan-out in action. - ### CLI Pomodoro Timer - ``` I'm building a cross-platform CLI pomodoro timer in Python. It should support: - Configurable work/break intervals with sensible defaults (25/5/15) - A persistent stats tracker that logs completed sessions to a local JSON file - Desktop notifications on macOS, Windows, and Linux - A --report flag that prints weekly stats as a table - Set up the team. I want this done fast — everyone works at once. ``` - **Shows:** Parallel fan-out on a small project. Backend handles timer logic, tester writes test cases from the spec while implementation is in flight. - --- - ### Retro Snake Game - ``` Build a browser-based Snake game using vanilla HTML, CSS, and JavaScript. No frameworks. - Canvas-based rendering at 60fps @@ -36,20 +22,13 @@ Build a browser-based Snake game using vanilla HTML, CSS, and JavaScript. No fra - Score tracking with localStorage high scores - Progressive speed increase every 5 points - A retro CRT-style visual effect using CSS filters - Start building immediately — I want to play this in 20 minutes. ``` - **Shows:** Fast iteration — frontend, audio, and input handling all built in parallel. Tester writes Playwright tests while the game is being built. - --- - ## Mid-Size Projects - These take a few sessions and show how decisions and memory compound over time. - ### Playwright-Tested Dashboard App - ``` I'm building a React dashboard that shows sales metrics. Stack: React 19, Vite, Tailwind, Node.js backend with Express, SQLite for local dev. Requirements: - Cards showing revenue, orders, and conversion rate @@ -57,16 +36,11 @@ I'm building a React dashboard that shows sales metrics. Stack: React 19, Vite, - A data table with sorting, filtering, and pagination - Dark mode toggle - Playwright E2E tests for every major interaction - Set up the team and start with the backend data layer. ``` - **Shows:** Agents specialize (frontend on Recharts, backend on Express/SQLite, tester on Playwright). Decisions about chart library and data format propagate automatically. - --- - ### Aspire Cloud-Native App - ``` Build a cloud-native distributed app with Aspire. I want: - An AppHost that orchestrates all services @@ -75,70 +49,47 @@ Build a cloud-native distributed app with Aspire. I want: - A Redis cache and PostgreSQL database - Integration tests using Aspire testing support - OpenTelemetry wired up to the Aspire dashboard - Use the latest .NET 9 templates as a starting point. ``` - **Shows:** Full-stack cloud-native development. Agents handle service discovery, container orchestration, and distributed tracing setup in parallel. - --- - ## Feature Showcases - Prompts designed to exercise specific Squad features. - ### Portable Squad — Cross-Platform Habit Tracker - ``` Build a cross-platform habit tracker with a shared Squad config. I want to: 1. Build the backend API first (Node.js + SQLite) 2. Export the squad 3. Import it into a new React Native project for the mobile app 4. Have both projects share the same team memory and decisions - Start with the backend. When it's solid, I'll export and we'll start the mobile app. ``` - **Shows:** Export/import, portability, and how decisions persist across projects. - --- - ### Issue-Driven Development - ``` I have 12 open issues on my GitHub repo. I want the team to: 1. Triage all untriaged issues 2. Assign each to the right team member based on labels and content 3. Start working through them in priority order 4. Report progress every 3 rounds - Ralph, go. ``` - **Shows:** Ralph's work monitor loop, GitHub Issues integration, automatic triage and assignment. - --- - ### Full Ceremony Lifecycle - ``` We're building an IoT dashboard for smart home sensors. Before we write any code: 1. Run a design review ceremony — I want the team to debate architecture 2. Write a PRD with acceptance criteria 3. Run a sprint planning ceremony to break work into tasks 4. Then build it — full parallel fan-out - Start with the design review. ``` - **Shows:** Ceremonies, PRD mode, sprint planning, and how they feed into parallel execution. - --- - ## Make Your Own - Template for any project: - ``` I'm building [brief description]. Stack: [language, framework, database] @@ -146,8 +97,6 @@ Key requirements: - [requirement 1] - [requirement 2] - [requirement 3] - Set up the team and start building. ``` - That's it. Squad figures out the team composition, casts names from a universe, and gets to work. After a few sessions, agents know your conventions and stop asking questions they've already answered. diff --git a/docs/src/content/docs/guide/shell.md b/docs/src/content/docs/guide/shell.md deleted file mode 100644 index 8ff557b7c..000000000 --- a/docs/src/content/docs/guide/shell.md +++ /dev/null @@ -1,343 +0,0 @@ -# Interactive Shell Guide - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -> 💡 **Recommended: Use GitHub Copilot CLI** — The best way to interact with Squad today is through the [GitHub Copilot CLI](https://docs.github.com/en/copilot/github-copilot-in-the-cli). Run `copilot --agent squad` to start a session with your team. The Copilot CLI provides the richest agent experience, including tool use, parallel execution, and full MCP integration. - -The Squad interactive shell gives you a persistent connection to your team.Instead of spawning short-lived CLI invocations, the shell maintains a real-time session where you can talk to agents, issue commands, and watch work happen. - ---- - -## Getting Started - -### Enter the Shell - -```bash -squad -``` - -With no arguments, `squad` enters the interactive shell. You'll see a prompt: - -``` -squad > -``` - -### Exit the Shell - -``` -squad > /quit -``` - -Or press **Ctrl+C**. - ---- - -## Shell Commands - -All shell commands start with a forward slash `/`. - -### `/status` — Check team status - -Display the current state of your squad: active agents, sessions, and recent work. - -``` -squad > /status -``` - -Output: - -``` -Team Status -──────────────────── -Active Agents: 4/5 - Keaton (lead): idle - McManus (devrel): working (10s) - Verbal (backend): working (25s) - Fenster (tester): idle - Kobayashi (scribe): logging - -Sessions: 5 -Latest decision: "Use React Query for data fetching" (2m ago) -``` - -### `/history` — View recent work - -Display the session log and recent decisions. - -``` -squad > /history -``` - -Shows: -- Last 10 completed tasks -- Decisions made in this session -- Agents that have worked -- Full session transcript (searchable) - -### `/agents` — List team members - -Show all agents on the team with their roles, expertise, and knowledge. - -``` -squad > /agents -``` - -### `/sessions` — List saved sessions - -View past shell sessions. Shows the 10 most recent sessions with their ID prefix, timestamp, and message count. - -``` -squad > /sessions -``` - -Output: - -``` -Saved Sessions (3 total) - 1. a1b2c3d4 6/15/2026, 2:30:00 PM (12 messages) - 2. e5f6a7b8 6/14/2026, 10:15:00 AM (8 messages) - 3. c9d0e1f2 6/13/2026, 4:45:00 PM (23 messages) - -Use /resume to restore a session. -``` - -### `/resume ` — Restore a past session - -Resume a previous session by providing the first few characters of its ID. The session's full message history is restored into the current shell. - -``` -squad > /resume a1b2 -✔ Restored session a1b2c3d4 (12 messages) -``` - -Typical workflow — pick up where you left off: - -``` -squad > /sessions -squad > /resume a1b2 -squad > @Keaton, where were we on the auth work? -``` - -### `/clear` — Clear the screen - -Clears terminal output. - -### `/help` — Show all commands - -### `/quit` — Exit the shell - -Close the shell and return to your terminal. - ---- - -## Addressing Agents - -You can talk to specific agents by name: - -### Using `@AgentName` - -``` -squad > @Keaton, analyze the architecture of this project -``` - -### Using natural language - -``` -squad > Keaton, set up the database schema for user authentication -``` - -Or without naming an agent — the coordinator routes to whoever is best suited: - -``` -squad > Write a blog post about our new casting system -``` - ---- - -## Message Routing - -### How Messages Get to Agents - -1. **You type a message** → Shell receives it -2. **Coordinator reads it** → Determines which agent(s) can usefully start -3. **Agents launch in parallel** → All applicable agents work simultaneously -4. **Agents write results** → To `.squad/` (decisions, history, skills, etc.) -5. **Shell streams updates** → You see progress in real-time - -### Parallel Execution - -When you give a task that multiple agents can handle: - -``` -squad > Build the login page -``` - -The coordinator might spawn: -- McManus (frontend) → building the UI -- Verbal (backend) → setting up auth endpoints -- Fenster (tester) → writing test cases -- Kobayashi (scribe) → logging everything - -All at once. All in parallel. - ---- - -## Session Management - -### What Is a Session? - -Each agent gets a **persistent session** — a long-lived context where it remembers: -- The task you gave it -- What it's already written to disk -- Previous decisions and learnings -- Its own knowledge base (charter, history) - -Sessions survive crashes. If an agent dies mid-work, it resumes from the exact checkpoint. - -### Viewing Session History - -``` -squad > /history -``` - -Shows full session log with start time, end time, duration, what the agent did, files written, decisions made, and any errors. - -### Resuming Work - -If an agent crashes or times out: - -``` -squad > @Keaton, check on Verbal and resume if needed -``` - ---- - -## Keyboard Shortcuts - -| Shortcut | Action | -|----------|--------| -| `↑` / `↓` | Scroll command history | -| `Ctrl+A` | Jump to start of line | -| `Ctrl+E` | Jump to end of line | -| `Ctrl+U` | Clear to start of line | -| `Ctrl+K` | Clear to end of line | -| `Ctrl+W` | Delete previous word | -| `Ctrl+C` | Exit shell | - ---- - -## Tips and Tricks - -### Check `/status` before big asks - -Before sending a complex task, check team status. If agents are already working, you might want to wait. - -### Reference decisions, not details - -Instead of explaining the whole architecture: - -``` -# Don't: -squad > Build the auth system. Use JWT. Refresh tokens every 1 hour... - -# Do: -squad > Build the auth system. See the auth decision in decisions.md. -``` - -Agents read your decisions — they're shortcuts for complex context. - -### Batch work through the coordinator - -``` -squad > @Keaton, here's what needs doing: -1. Set up database schema -2. Build API endpoints -3. Write tests - -Prioritize and route, please. -``` - -The coordinator will decompose, prioritize, and launch agents efficiently. - -### Check `/history` after long waits - -If you step away, run `/history` to see what happened. Every decision is logged, every task is recorded. - -### Name agents explicitly for urgent work - -``` -squad > @Keaton, this is critical: we need the deployment script fixed -``` - -The explicit mention ensures the lead coordinator sees it first. - ---- - -## Advanced Usage - -### Working with Multiple Tasks - -The coordinator queues tasks and parallelizes where possible: - -``` -squad > Write the API spec -squad > Build the React components -squad > Set up the database - -/status # See all three being worked on -``` - -### Asking Agents About Their Work - -``` -squad > @Verbal, what's left on the auth endpoints? -squad > @McManus, show me what you've written so far -squad > @Fenster, are the tests passing? -``` - -Agents respond with status, file paths, and blockers. - -### Custom Agent Chaining - -Instead of asking the coordinator to chain work, set up explicit hand-offs: - -``` -squad > @Keaton, when Verbal finishes the auth API, have him route testing to Fenster -``` - ---- - -## Using the Shell with VS Code - -1. Open an integrated terminal in VS Code -2. Run `squad` to enter the shell -3. Keep it open in a side panel -4. As you edit code, ask agents to review: `@Fenster, test this component` - ---- - -## Troubleshooting - -### Shell Hangs or No Response - -The coordinator might be evaluating a complex task, or an agent might be streaming large output. Press `Ctrl+C` to interrupt, then check `/status`. - -### Agent Not Responding - -Check `/status` and `/history` for blockers. Then ask the coordinator to route explicitly: - -``` -squad > @Keaton, route this task to @Verbal and report any blocks -``` - -### Shell Quit Unexpectedly - -Run `squad` again to restart. Check `.squad/log/` for error context. - ---- - -## See Also - -- [CLI Reference](../reference/cli.md) — All CLI commands -- [VS Code](../features/vscode.md) — Squad in VS Code -- [Parallel Execution](../features/parallel-execution.md) — How agents fan out diff --git a/docs/src/content/docs/guide/tips-and-tricks.md b/docs/src/content/docs/guide/tips-and-tricks.md index c110b8c8d..0fd479f2c 100644 --- a/docs/src/content/docs/guide/tips-and-tricks.md +++ b/docs/src/content/docs/guide/tips-and-tricks.md @@ -1,85 +1,52 @@ # Tips & Tricks - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this:** ``` Team, build the login feature — include UI, API endpoints, and tests ``` - Patterns that make Squad click. Skim the headers, steal what's useful. - --- - ## The Big Three - **Say "team" for parallel work.** The word "team" triggers fan-out — frontend, backend, testing, all at once. - **Name an agent for focused work.** `"Dallas, fix the login bug"` sends work to one specific agent. No team overhead. - **Set rules early.** First session, drop your conventions into a directive. Agents read `decisions.md` before every task — you only say things once. - --- - ## Write Better Prompts - ``` ❌ "Build the auth system" ✅ "Build JWT auth for login/logout/refresh. Redis sessions. Bcrypt passwords. No OAuth yet — that's phase 2." ``` - Be specific about scope. Tell the team what's in, what's out, what's next. Use bullet points for multi-part tasks — agents process lists better than paragraphs. - --- - ## Direct vs Team vs General - | When | Do this | Example | |------|---------|---------| | Parallel/cross-functional | Say "Team" | `Team, build the checkout flow` | | Sequential/specialized | Name the agent | `Keaton, review this PR` | | Don't care who | Just describe it | `Add error logging to the API` | - --- - ## Parallel Work — Let It Cook - Don't interrupt parallel work. Squad agents chain automatically — the tester catches failures, the backend fixes them, the tester re-runs. If you jump in after 2 minutes, you break the chain. - When they're done, ask Scribe: ``` What did the team just do? ``` - --- - ## Ralph — Your Work Monitor - Got a backlog? Let Ralph handle it while you focus on the critical path. - ``` Ralph, start monitoring ``` - Ralph triages issues, assigns them, spawns agents, and reports every 3-5 rounds. Say `"Ralph, idle"` to stop. - The `squad-heartbeat` workflow runs Ralph on event-based triggers (issue close, PR merge, manual dispatch). - --- - ## Decisions & Memory - - **Set permanent rules:** `"Always use TypeScript strict mode"` → goes to `decisions.md` - **Capture lessons:** `"Never include passwords in API responses"` → agents remember forever - **Check alignment:** When agents disagree, the decision is probably missing. Add it. - **Commit `.squad/`** — it's your team's brain. Anyone who clones gets the full team. - --- - ## Common Pitfalls - | Pitfall | Fix | |---------|-----| | Vague prompt → agents ask questions | Be specific about scope upfront | @@ -88,4 +55,3 @@ The `squad-heartbeat` workflow runs Ralph on event-based triggers (issue close, | Not using Ralph on a full backlog | `Ralph, go` — let the bot grind | | Too many agents | Start with 4-5, add specialists later | | Lost team knowledge | Commit `.squad/` to git | - diff --git a/docs/src/content/docs/reference/api-reference.md b/docs/src/content/docs/reference/api-reference.md index a41ed24bb..c655b9025 100644 --- a/docs/src/content/docs/reference/api-reference.md +++ b/docs/src/content/docs/reference/api-reference.md @@ -1,11 +1,6 @@ # SDK API Reference - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - Complete reference for all public exports from `@bradygaster/squad-sdk`. Each section includes types, functions, and usage examples. - ## Overview - ```typescript import { // Resolution @@ -33,74 +28,47 @@ import { initSquadTelemetry, } from '@bradygaster/squad-sdk'; ``` - --- - ## Resolution - Functions to locate Squad directories. - ### `resolveSquad(startPath?: string): string` - Find `.squad/` directory starting from a path and walking up to the project root. Throws if not found. - ```typescript const squadPath = resolveSquad(); const squadPath = resolveSquad('/home/user/project/src'); ``` - ### `resolveGlobalSquadPath(): string` - -Get path to global personal squad. Returns platform-specific path: `~/.config/squad/` on Linux, `~/Library/Application Support/squad/` on macOS, `%APPDATA%\squad\` on Windows. - +Get the platform-specific root used for shared or external Squad state. ### `ensureSquadPath(startPath?: string): string` - Like `resolveSquad()`, but creates the directory if it doesn't exist. - --- - ## Runtime Constants - ### `MODELS: ModelCatalog` - All supported models, organized by tier. - ```typescript MODELS.premium; // ['claude-opus-4.6', 'gpt-5.2', ...] MODELS.standard; // ['claude-sonnet-4.5', 'gpt-5.1', ...] MODELS.fast; // ['claude-haiku-4.5', 'gpt-5-mini', ...] ``` - ### `TIMEOUTS: TimeoutConfig` - Standard timeout values for agent operations. - ```typescript TIMEOUTS.agentInitMs; // 30000 (30s) TIMEOUTS.agentExecuteMs; // 300000 (5 min) TIMEOUTS.coordinatorRouteMs; // 5000 (5s) ``` - ### `AGENT_ROLES: Record` - Standard agent roles and their default properties. - --- - ## Configuration - ### `loadConfig(squadPath: string): Promise` - Load configuration asynchronously. Reads `squad.config.ts` (if present), parses routing/model overrides, validates schemas. - ```typescript const config = await loadConfig('./.squad'); console.log(config.team.name); console.log(Object.keys(config.agents)); ``` - **Types:** - ```typescript interface ConfigLoadResult { team: { @@ -112,7 +80,6 @@ interface ConfigLoadResult { routing?: RoutingConfig; models?: ModelConfig; } - interface AgentConfig { role: string; model?: string; @@ -120,19 +87,12 @@ interface AgentConfig { status?: 'active' | 'inactive'; } ``` - ### `loadConfigSync(squadPath: string): ConfigLoadResult` - Synchronous version of `loadConfig()`. - --- - ## Agents & Onboarding - ### `onboardAgent(options: OnboardOptions): Promise` - Create a new agent directory, charter, and history file. - ```typescript const result = await onboardAgent({ teamRoot: './.squad', @@ -143,9 +103,7 @@ const result = await onboardAgent({ userName: 'Alice', }); ``` - **Types:** - ```typescript interface OnboardOptions { teamRoot: string; @@ -156,7 +114,6 @@ interface OnboardOptions { userName?: string; charterTemplate?: string; } - interface OnboardResult { createdFiles: string[]; agentDir: string; @@ -164,39 +121,28 @@ interface OnboardResult { historyPath: string; } ``` - --- - ## Casting - ### `CastingEngine` - Generate agent personas from universe themes. - ```typescript const engine = new CastingEngine({ universes: ['The Wire', 'Seinfeld'], activeUniverse: 'The Wire', }); - const members = await engine.castTeam([ { role: 'lead', title: 'Lead Developer' }, { role: 'backend', title: 'Backend Engineer' }, ]); ``` - ### `CastingHistory` - Track all casting decisions over time. - ```typescript const history = new CastingHistory('./.squad/casting'); const records = history.getRecordsByAgent('lead'); const previousCast = history.findByName('Stringer'); ``` - **Types:** - ```typescript interface CastMember { name: string; @@ -205,35 +151,25 @@ interface CastMember { displayName: string; } ``` - --- - ## Coordinator - ### `SquadCoordinator` - Main class for routing work to agents. - ```typescript const coordinator = new SquadCoordinator({ teamRoot: './.squad', enableParallel: true, }); - await coordinator.initialize(); - const decision = await coordinator.route('refactor the API'); console.log(decision.tier); // 'standard' or 'full' console.log(decision.agents); // ['backend', 'tester'] console.log(decision.parallel); // true if multi-agent console.log(decision.rationale); // Explanation of routing choice - await coordinator.execute(decision, 'refactor the API'); await coordinator.shutdown(); ``` - **Types:** - ```typescript interface RoutingDecision { tier: ResponseTier; @@ -241,26 +177,16 @@ interface RoutingDecision { parallel: boolean; rationale: string; } - type ResponseTier = 'direct' | 'lightweight' | 'standard' | 'full'; ``` - ### `selectResponseTier(context: TierContext): TierName` - Choose the right response tier for a task. - ### `getTier(name: TierName): TierDefinition` - Get configuration for a specific tier (max agents, default model, available tools). - --- - ## Tools - ### `defineTool(config: ToolConfig): SquadTool` - Define a new tool with typed parameters. - ```typescript const myTool = defineTool<{ query: string }>({ name: 'search_docs', @@ -282,28 +208,20 @@ const myTool = defineTool<{ query: string }>({ }, }); ``` - ### `ToolRegistry` - Manage the built-in tool set. - ```typescript import { ToolRegistry } from '@bradygaster/squad-sdk/tools'; import type { FanOutDependencies } from '@bradygaster/squad-sdk/coordinator'; - const registry = new ToolRegistry('./.squad'); const tools = registry.getTools(); const agentTools = registry.getToolsForAgent(['squad_route', 'squad_decide']); ``` - **Constructor:** `new ToolRegistry(squadRoot?, sessionPoolGetter?, storage?, state?, fanOutDepsGetter?)` - - `fanOutDepsGetter` — Required for `squad_route` to spawn sessions via `spawnParallel`. Returns a `FanOutDependencies` object (from `@bradygaster/squad-sdk/coordinator`). Without it, `squad_route` returns `resultType: 'failure'` with `error: 'fan-out-deps-unavailable'`. - `state` — When provided, `squad_route` validates that the target agent exists in the team roster before spawning. - Agent names must match `/^[a-zA-Z0-9_-]+$/`. Spawn errors are sanitized before being returned to the LLM. - **Built-in tools:** - | Tool | Purpose | |------|---------| | `squad_route` | Route a task to another agent (requires `fanOutDepsGetter`) | @@ -311,94 +229,60 @@ const agentTools = registry.getToolsForAgent(['squad_route', 'squad_decide']); | `squad_memory` | Append to agent history | | `squad_status` | Query session pool state | | `squad_skill` | Read/write agent skills | - --- - ## Observability (OpenTelemetry) - Three-layer observability API for traces, metrics, and telemetry. - ### Layer 1: Low-Level Control - ```typescript import { initializeOTel, shutdownOTel, getTracer, getMeter } from '@bradygaster/squad-sdk'; - await initializeOTel({ endpoint: 'http://localhost:4318', serviceName: 'my-squad', }); - const tracer = getTracer('my-component'); const meter = getMeter('my-component'); - await shutdownOTel(); ``` - ### Layer 2: Mid-Level Bridge - ```typescript import { bridgeEventBusToOTel, createOTelTransport } from '@bradygaster/squad-sdk'; - const unsubscribe = bridgeEventBusToOTel(eventBus); const transport = createOTelTransport(); ``` - ### Layer 3: High-Level Convenience - ```typescript import { initSquadTelemetry } from '@bradygaster/squad-sdk'; - const telemetry = await initSquadTelemetry({ endpoint: 'http://localhost:4318', serviceName: 'my-squad', eventBus: myEventBus, }); - await telemetry.shutdown(); ``` - --- - ## Streaming - ### `createReadableStream(response: unknown): ReadableStream` - Convert an agent response to a readable stream. - ```typescript const stream = createReadableStream(agentResponse); const reader = stream.getReader(); let result; - while (!(result = await reader.read()).done) { console.log(result.value); } ``` - --- - ## Upstream Inheritance - ### `readUpstreamConfig(squadPath: string): Promise` - Load upstream sources from `.squad/upstream.json`. - ### `resolveUpstreams(config: UpstreamConfig, squadPath: string): Promise` - Resolve all upstreams and return their inherited content. - ### `buildInheritedContextBlock(resolved: ResolvedUpstream[]): string` - Build a markdown block of all inherited context (for agent charters). - ### `buildSessionDisplay(resolved: ResolvedUpstream[]): string` - Build a human-readable display of upstream sources (for `squad status`). - --- - ## Glossary of Exports - | Export | Type | Module | Purpose | |--------|------|--------|---------| | `resolveSquad` | function | resolution | Find .squad directory | @@ -424,11 +308,8 @@ Build a human-readable display of upstream sources (for `squad status`). | `bridgeEventBusToOTel` | function | runtime/otel-bridge | EventBus → OTel | | `createOTelTransport` | function | runtime/otel-bridge | Create OTel transport | | `initSquadTelemetry` | function | runtime/otel-init | One-call setup | - --- - ## See Also - - [SDK Reference](sdk.md) — Quick reference for common SDK usage - [Integration Guide](integration.md) — Connecting to the Copilot SDK - [Tools & Hooks](tools-and-hooks.md) — Custom tools and hook pipeline diff --git a/docs/src/content/docs/reference/cli.md b/docs/src/content/docs/reference/cli.md index ffc9221e3..4b7213a6e 100644 --- a/docs/src/content/docs/reference/cli.md +++ b/docs/src/content/docs/reference/cli.md @@ -1,35 +1,21 @@ # CLI Reference - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -Everything you need to run Squad from the command line — commands, shell interactions, configuration files, and environment variables. - +Everything you need to run Squad from the command line — setup, monitoring, remote access, configuration files, and environment variables. --- - ## Installation - ```bash # Global install (recommended) npm install -g @bradygaster/squad-cli - # One-off with npx npx @bradygaster/squad-cli init - # Latest from GitHub (bleeding edge) squad init ``` - --- - ## CLI Commands (17 commands) - | Command | Description | Requires `.squad/` | |---------|-------------|:------------------:| -| `squad` | **Deprecated** — Enter interactive shell (no args). Use `copilot --agent squad` instead. | No | | `squad init` | Initialize Squad in the current repo (idempotent — safe to run multiple times) | No | | `squad init --state-backend ` | Initialize with a specific state backend (`local`, `orphan`, `two-layer`) | No | -| `squad init --global` | Create a personal squad in your platform-specific directory | No | | `squad init --mode remote ` | Initialize linked to a remote team root (dual-root mode) | No | | `squad link ` | Link project to a remote team root | Yes | | `squad loop` | Run a prompt-driven work loop from `loop.md` | Yes | @@ -54,7 +40,6 @@ squad init | `squad watch --max-concurrent N` | Max parallel issues per round (default: 1) | Yes | | `squad watch --timeout N` | Per-issue timeout in minutes (default: 30) | Yes | | `squad watch --copilot-flags "..."` | Extra flags for Copilot CLI | Yes | -| `squad shell` | **Deprecated** — Launch interactive shell explicitly. Use `copilot --agent squad` instead. | No | | `squad copilot` | Add the @copilot coding agent to the team | Yes | | `squad copilot --off` | Remove @copilot from the team | Yes | | `squad copilot --auto-assign` | Enable auto-assignment for @copilot | Yes | @@ -66,75 +51,52 @@ squad init | `squad aspire` | Launch Aspire dashboard for observability | No | | `squad aspire --docker` | Force Docker mode for Aspire | No | | `squad upstream add\|remove\|list\|sync` | Manage upstream Squad sources | Yes | -| `copilot --agent squad` | Launch interactive shell explicitly | No | +| `copilot --agent squad` | Launch Squad in the GitHub Copilot CLI | No | | `squad nap` | Context hygiene (compress, prune, archive .squad/ state) | Yes | | `squad nap --deep` | Thorough cleanup with recursive descent | Yes | | `squad nap --dry-run` | Preview cleanup actions without changes | Yes | | `squad scrub-emails [directory]` | Remove email addresses from Squad state files (default: `.squad/`) | No | | `squad --version` | Print installed version | No | - ### Remote Init Mode - Use `--mode remote` to link your project to a shared team root: - ```bash squad init --mode remote ../team-repo ``` - In dual-root mode, project-specific state lives in your local `.squad/` while team identity (casting, charters, shared decisions) lives in the remote location. This is useful for monorepos or organizations with a shared team definition. - --- - ### squad start - Start Copilot with optional remote access via phone. Spawns Copilot in a PTY and mirrors to your phone via WebSocket + devtunnel. - **Flags:** - - `--tunnel` — Create a devtunnel for remote access (shows QR code for phone scanning). Requires `devtunnel` CLI installed and authenticated (`devtunnel user login`). - `--port ` — Specific WebSocket port (default: random). Example: `--port 3456` - `--command ` — Run a custom command instead of copilot. Example: `--command powershell` - All copilot flags pass through. Example: `squad start --tunnel --yolo` or `squad start --tunnel --model gpt-4` - **Examples:** - ```bash # Basic local PTY (no phone access) squad start - # With phone access + devtunnel squad start --tunnel # Output: QR Code, URL, Session ID - # Custom port, local only squad start --port 3456 - # Custom command with tunnel squad start --tunnel --command powershell - # Copilot flags pass through squad start --tunnel --yolo squad start --tunnel --model gpt-4 --no-config ``` - -For details on architecture, security, mobile keyboard, and troubleshooting, see [Remote Control Guide](../features/remote-control.md). - +For remote Copilot access, architecture notes, and troubleshooting, see [Squad RC](../features/squad-rc.md). --- - ### squad loop - Run a prompt-driven work loop from a `loop.md` file. Each cycle, Loop sends your prompt to Copilot and loops again at your chosen interval. - **Basic usage:** - ```bash squad loop # Run the loop from loop.md squad loop --init # Create a starter loop.md squad loop --file scripts/monitor.md # Run a custom loop file ``` - **Flags:** - - `--init` — Create a starter `loop.md` file in your project - `--file ` — Path to loop file (default: `loop.md` in project root) - `--interval ` — Override loop interval in minutes (default: from frontmatter) @@ -144,42 +106,30 @@ squad loop --file scripts/monitor.md # Run a custom loop file - `--monitor-email` — Scan email for alerts each cycle (requires WorkIQ MCP) - `--monitor-teams` — Scan Teams for action items each cycle (requires WorkIQ MCP) - `--self-pull` — Run `git fetch && git pull` before each cycle - **Frontmatter reference:** - Loop.md requires YAML frontmatter with: - | Field | Type | Description | |-------|------|-------------| | `configured` | boolean | Safety check — must be `true` to run (prevents accidental execution) | | `interval` | number | Minutes between cycles (default: 10) | | `timeout` | number | Max runtime in minutes per cycle (default: 30) | | `description` | string | Human-readable description of the loop | - **Examples:** - ```bash # Create a starter loop squad loop --init - # Edit loop.md, then run it squad loop - # Run with faster interval (overrides frontmatter) squad loop --interval 3 - # Run with monitoring squad loop --monitor-email --monitor-teams - # Run a named loop file squad loop --file scripts/ci-monitor.md - # Run with custom Copilot model squad loop --copilot-flags "--model gpt-4" ``` - **Example loop.md:** - ```markdown --- configured: true @@ -187,75 +137,28 @@ interval: 10 timeout: 20 description: "Monitor failing CI and fix issues" --- - # CI Monitor Loop - Each cycle, you will: - 1. Check GitHub Actions for failures in main branch 2. If failures exist, investigate the top 1-2 3. If fixable, create a PR with the fix 4. Report findings (failures found, fixes created) - Keep cycles to 20 minutes max. ``` - **MCP auto-injection:** When using the default Copilot agent, `squad loop` automatically injects `--yolo --additional-mcp-config @.mcp.json` into every Copilot invocation. See [Copilot CLI MCP Trust Gate](../features/copilot-mcp-trust.md). - For complete documentation and examples, see [Loop — Prompt-driven work loop](../features/loop.md). - --- - -Enter the shell with `squad` (no arguments). You'll see: - +### Prompting agents through Copilot CLI +Launch Squad with `copilot --agent squad`, then prompt it directly: +```text +@Keaton, analyze the architecture +Keaton, set up the database schema +Build a blog post about our casting system ``` -squad > -``` - -### Shell Commands - -All shell commands start with `/`. - -| Command | What it does | -|---------|-------------| -| `/status` | Show active agents, sessions, recent decisions | -| `/history` | View session log — tasks, decisions, agent work | -| `/agents` | List team members with roles and expertise | -| `/sessions` | List saved sessions | -| `/resume ` | Restore a past session | -| `/version` | Show version | -| `/clear` | Clear terminal output | -| `/help` | Show all commands | -| `/quit` | Exit the shell (also: `Ctrl+C`) | - -### Addressing Agents - -``` -squad > @Keaton, analyze the architecture -squad > Keaton, set up the database schema -squad > Build a blog post about our casting system -``` - Agent name matching is **case-insensitive** — `@keaton`, `@Keaton`, and `@KEATON` all route to the same agent. Name an agent to route directly. Omit the name and the coordinator routes to the best fit. - -### Keyboard Shortcuts - -| Shortcut | Action | -|----------|--------| -| `↑` / `↓` | Scroll command history | -| `Ctrl+A` | Jump to start of line | -| `Ctrl+E` | Jump to end of line | -| `Ctrl+U` | Clear to start of line | -| `Ctrl+K` | Clear to end of line | -| `Ctrl+W` | Delete previous word | -| `Ctrl+C` | Exit shell | - --- - ## Configuration Files - ### `.squad/` Directory Structure - ``` .squad/ ├── team.md # Roster — agent names, roles, human members @@ -275,33 +178,23 @@ Agent name matching is **case-insensitive** — `@keaton`, `@Keaton`, and `@KEAT │ └── ... └── history-archive/ # Archived old session logs ``` - ### `team.md` - Defines the roster. Squad generates this during init, but you can edit it: - ```markdown ## Team - 🏗️ Neo — Lead Scope, decisions, code review ⚛️ Trinity — Frontend Dev React, TypeScript, UI 🔧 Morpheus — Backend Dev Node.js, Express, Prisma 🧪 Tank — Tester Jest, integration tests 📋 Scribe — (silent) Memory, decisions, session logs - ## Human Team Members - - **Sarah** — Senior Backend Engineer - **Jamal** — Frontend Lead ``` - ### `routing.md` - Controls which agent gets which work: - ```markdown # Routing Rules - **Frontend changes** → Trinity **Backend API work** → Morpheus **Database migrations** → Morpheus @@ -309,85 +202,58 @@ Controls which agent gets which work: **Architecture decisions** → Neo **Backend architecture decisions** → Sarah (human) ``` - ### `decisions.md` - Append-only log of architectural decisions. Agents read this before every task: - ```markdown ### 2025-07-15: Use Zod for API validation **By:** Morpheus **What:** All API input validation uses Zod schemas **Why:** Type-safe, composable, generates TypeScript types ``` - ### `directives.md` - Permanent rules agents always follow: - ```markdown - Always use TypeScript strict mode - No any/unknown casts - All database queries through Prisma, no raw SQL ``` - --- - ## Resolution Order - When Squad starts, it looks for `.squad/` in this order: - 1. Current directory (`./.squad/`) 2. Parent directories (walk up to project root) -3. Personal squad directory (platform-specific: `~/.config/squad/` on Linux, `~/Library/Application Support/squad/` on macOS, `%APPDATA%\squad\` on Windows) -4. Global CLI default (fallback only) - +3. Linked or external Squad state configured for this workspace +4. Global CLI fallback (when explicitly configured) First match wins. - --- - ## Environment Variables - | Variable | Purpose | Values | |----------|---------|--------| | `SQUAD_CLIENT` | Detected client platform | `cli`, `vscode` | | `COPILOT_TOKEN` | Copilot auth token (SDK usage) | Token string | - --- - --- - ## Troubleshooting with `squad doctor` - When something isn't working, run: - ```bash squad doctor ``` - This performs a comprehensive diagnostic check of your Squad setup, validating: - - `.squad/` directory structure - Required configuration files (team.md, routing.md, etc.) - Agent definitions and capabilities - File permissions and integrity - Integration with GitHub and Copilot - ### Usage Examples - ```bash # Run diagnostics on the current project squad doctor - # Quick check after upgrading Squad squad upgrade && squad doctor - # Verify setup after cloning a repo with a squad git clone my-project && cd my-project && squad doctor ``` - ### Example Output - ``` ✓ .squad/ directory exists ✓ team.md is readable and valid @@ -395,24 +261,17 @@ git clone my-project && cd my-project && squad doctor ⚠ skills/ directory is empty — consider adding documentation ✓ .gitattributes rules applied ``` - The doctor always exits cleanly (no error code) because it's a diagnostic tool, not a gate. Use it to troubleshoot setup issues, validate team state, or run before opening an issue on GitHub. - --- - ## Version Management - ```bash squad --version # Check version npm install -g @bradygaster/squad-cli@latest # Update npm install -g @bradygaster/squad-cli@1.2.3 # Pin version npm install -g @bradygaster/squad-cli@insider # Dev-channel prerelease builds ``` - --- - ## See Also - - [SDK Reference](./sdk.md) — Programmatic API - [Recipes & Advanced Scenarios](../cookbook/recipes.md) — Prompt-driven cookbook - [Adding Squad to an Existing Repo](../scenarios/existing-repo.md) — Getting started walkthrough diff --git a/docs/src/content/docs/reference/config.md b/docs/src/content/docs/reference/config.md index eb4654c21..4aa675110 100644 --- a/docs/src/content/docs/reference/config.md +++ b/docs/src/content/docs/reference/config.md @@ -1,20 +1,12 @@ # Configuration Reference - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this:** ``` squad init ``` That's it. Squad works out of the box. Everything below is optional. - --- - ## squad.config.ts - For type-safe SDK-First configuration, create this at your project root: - ```typescript import { defineSquad, @@ -22,7 +14,6 @@ import { defineAgent, defineRouting, } from '@bradygaster/squad-sdk'; - export default defineSquad({ version: '1.0.0', team: defineTeam({ @@ -53,15 +44,10 @@ export default defineSquad({ }), }); ``` - Each builder (`defineSquad()`, `defineTeam()`, `defineAgent()`, etc.) validates your config at runtime with type-safe error messages. Edit your `.ts` file, then run `squad build` to generate `.squad/` markdown. - **Or start with markdown:** `squad init` creates a markdown-only squad with no config file needed. - --- - ## .squad/ Directory - ``` .squad/ ├── team.md # Who's on the team @@ -80,19 +66,12 @@ Each builder (`defineSquad()`, `defineTeam()`, `defineAgent()`, etc.) validates ├── log/ # Session logs └── orchestration-log/ # Coordinator state ``` - Commit this directory. It's your team's brain. Anyone who clones the repo gets the full team with all their knowledge. - --- - ## .squad/ — Required vs Optional Files - `squad init` creates a working team. Here's what's required and what's optional. - ### Required Files - These are always created by `squad init`. The loader expects them. - | File | Purpose | Can You Edit? | |------|---------|---------------| | `.squad/team.md` | Team roster — loader requires it | Yes | @@ -105,42 +84,29 @@ These are always created by `squad init`. The loader expects them. | `.squad/identity/now.md` | Current team focus | Auto-updated | | `.squad/identity/wisdom.md` | Accumulated team patterns | Auto-updated | | `.gitattributes` | Merge drivers for append-only files | Merge rules only | - ### Optional Files - These are created only when you opt in during init. - - **`.squad/templates/`** — SDK templates, overwritten on upgrade - **`.github/workflows/*.yml`** — CI/CD workflows (opt-in: `--include-workflows`) - **`.copilot/mcp-config.json`** — MCP server config (opt-in: `--include-mcp-config`) - > ⚠️ **Hard rule:** Squad NEVER writes temp files, logs, or memory to your repo root. All team state lives in `.squad/` only. Your project tree stays clean. - ### Quick Recovery - ```bash squad doctor # Check for issues rm -rf .squad && squad init # Full reset (back up agents/decisions first) ``` - --- - ## Routing Rules - Control which agent gets which work. Edit `.squad/routing.md` or configure in `squad.config.ts`: - ```markdown # Routing Rules - **Frontend changes** → Trinity **Backend API work** → Morpheus **Database migrations** → Morpheus **Test writing** → Tank **Architecture decisions** → Neo ``` - Or programmatically: - ```typescript routing: { workTypes: [ @@ -153,55 +119,37 @@ routing: { ], } ``` - --- - ## Model Configuration - 17 models across three tiers. Squad picks the right one, or you override: - | Tier | Models | Use Case | |------|--------|----------| | **premium** | claude-opus-4, gpt-4.1 | Architecture, code review | | **standard** | claude-sonnet-4, gpt-4.1 | Most work | | **fast** | claude-haiku-3.5, gpt-4.1-mini | Triage, logging, quick tasks | - Per-agent overrides in `model-config.json`: - ```json { "neo": "claude-opus-4", "tank": "claude-haiku-3.5" } ``` - Resolution order: user override → charter → task auto-select → config default. - --- - ## Resolution Order - Squad finds `.squad/` by walking up: - 1. Current directory (`./.squad/`) 2. Parent directories (up to project root) -3. Personal squad directory (platform-specific: `~/.config/squad/` on Linux, `~/Library/Application Support/squad/` on macOS, `%APPDATA%\squad\` on Windows) -4. Global CLI default (fallback) - +3. Linked or external Squad state configured for this workspace +4. Global CLI fallback First match wins. - --- - ## Environment Variables - | Variable | Purpose | |----------|---------| | `SQUAD_CLIENT` | Detected client (`cli` or `vscode`) | | `COPILOT_TOKEN` | Auth token for SDK usage | - --- - ## See Also - - [CLI Reference](cli.md) — Commands and shell interactions - [SDK Reference](sdk.md) — Programmatic API diff --git a/docs/src/content/docs/reference/glossary.md b/docs/src/content/docs/reference/glossary.md index 5642dd0ba..96114acfd 100644 --- a/docs/src/content/docs/reference/glossary.md +++ b/docs/src/content/docs/reference/glossary.md @@ -1,39 +1,19 @@ -# Glossary - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - -Key terms defined in one sentence each. Alphabetical order. - ---- - -**Agent** — A specialist AI team member with a role, charter, and persistent memory that handles specific types of work. - -**Casting** — The process of forming your team by proposing agents, confirming roles, and writing their charters to `.squad/`. - -**Ceremony** — A scheduled team event like retrospectives, reviews, or planning sessions defined in `.squad/ceremonies.md`. - -**Coordinator** — Squad's routing engine that reads your request, checks routing rules, and spawns the right agents. - -**Decisions** — Architectural choices, conventions, and directives captured in `.squad/decisions.md` that all agents read before working. - -**Directive** — A persistent rule or convention you give the team (like "Always use Zod for validation") that gets written to `decisions.md`. - -**History** — Each agent's memory of past work, stored in `.squad/agents/{name}/history.md` and read before every spawn. - -**Memory** — All persistent team state stored in the `.squad/` directory, including roster, routing rules, decisions, and agent histories. - -**Ralph** — The silent work monitor agent that watches your GitHub or GitLab issues and tracks work in progress. - -**Routing** — Rules in `.squad/routing.md` that define which agent handles which type of work, read by the coordinator before spawning. - -**Scribe** — The silent agent that tracks decisions and logs sessions, merging proposals from all agents into `.squad/decisions.md`. - -**Skill** — A reusable capability stored in `.copilot/skills/` that agents can learn and execute. - -**Spawn** — The act of starting an agent as an independent subprocess with its own context window, tools, and memory. - -**Squad** — Your AI development team, coordinated through the Squad framework. - -**.squad/ directory** — The root directory containing all team state: roster, routing, decisions, agent charters and histories, and ceremony config. - -**Team** — The collection of agents working on your project, defined in `.squad/team.md`. +# Glossary +Key terms defined in one sentence each. Alphabetical order. +--- +**Agent** — A specialist team member with a role, charter, and persistent memory that handles specific types of work. +**Casting** — The process of forming your team by proposing agents, confirming roles, and writing their charters to `.squad/`. +**Ceremony** — A scheduled team event like retrospectives, reviews, or planning sessions defined in `.squad/ceremonies.md`. +**Coordinator** — Squad's routing engine that reads your request, checks routing rules, and spawns the right agents. +**Decisions** — Architectural choices, conventions, and directives captured in `.squad/decisions.md` that all agents read before working. +**Directive** — A persistent rule or convention you give the team (like "Always use Zod for validation") that gets written to `decisions.md`. +**History** — Each agent's memory of past work, stored in `.squad/agents/{name}/history.md` and read before every spawn. +**Memory** — All persistent team state stored in the `.squad/` directory, including roster, routing rules, decisions, and agent histories. +**Ralph** — The silent work monitor agent that watches your GitHub or GitLab issues and tracks work in progress. +**Routing** — Rules in `.squad/routing.md` that define which agent handles which type of work, read by the coordinator before spawning. +**Scribe** — The silent agent that tracks decisions and logs sessions, merging proposals from all agents into `.squad/decisions.md`. +**Skill** — A reusable capability stored in `.copilot/skills/` that agents can learn and execute. +**Spawn** — The act of starting an agent as an independent subprocess with its own context window, tools, and memory. +**Squad** — Your AI development team, coordinated through the Squad framework. +**.squad/ directory** — The root directory containing all team state: roster, routing, decisions, agent charters and histories, and ceremony config. +**Team** — The collection of agents working on your project, defined in `.squad/team.md`. diff --git a/docs/src/content/docs/reference/integration.md b/docs/src/content/docs/reference/integration.md index d50f823dc..94427dbdd 100644 --- a/docs/src/content/docs/reference/integration.md +++ b/docs/src/content/docs/reference/integration.md @@ -1,76 +1,49 @@ # SDK Integration Guide - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - This guide covers connecting to the Copilot SDK via Squad's adapter layer, managing sessions, handling events, and recovering from errors. - --- - ## SquadClient Setup - `SquadClient` wraps `@github/copilot-sdk` with lifecycle management and auto-reconnection: - ```typescript import { SquadClient } from '@bradygaster/squad-sdk'; - const client = new SquadClient({ port: 3000, auth: { token: process.env.COPILOT_TOKEN }, reconnection: { maxRetries: 5, backoffMs: 1000 }, }); - await client.connect(); ``` - The client tracks connection state via `SquadConnectionState`: `disconnected → connecting → connected → reconnecting → error`. Auto-reconnection uses exponential backoff with jitter. - --- - ## Session Management - Use `SquadClientWithPool` for production workloads — it composes `SquadClient`, `SessionPool`, and `EventBus`: - ```typescript import { SquadClientWithPool } from '@bradygaster/squad-sdk'; - const squad = new SquadClientWithPool({ client: clientOptions, pool: { maxConcurrent: 10, idleTimeout: 60_000 }, }); - const session = await squad.createSession({ agent: 'backend' }); const response = await session.sendMessage('Implement the /users endpoint'); await session.destroy(); ``` - `SessionPool` enforces concurrency limits, runs health checks, and reaps idle sessions automatically. `SessionStatus` tracks each session through `creating → active → idle → error → destroyed`. - --- - ## Event Handling - `EventBus` provides typed pub/sub for session lifecycle events: - ```typescript squad.events.on('session.created', (event) => { console.log(`Session ${event.sessionId} started`); }); - squad.events.on('session.status_changed', (event) => { if (event.payload.status === 'error') { // handle degraded session } }); ``` - Events include `session.created`, `session.destroyed`, `session.status_changed`, and tool execution events. - --- - ## Error Handling - All SDK errors are wrapped in `SquadError` subtypes with severity, category, and recoverability: - ```typescript try { await client.connect(); @@ -82,9 +55,7 @@ try { } } ``` - Error classes: - | Class | Description | |-------|-------------| | `SDKConnectionError` | Connection failures (retryable) | @@ -96,19 +67,12 @@ Error classes: | `RateLimitError` | Rate limit exceeded | | `RuntimeError` | General runtime errors | | `ValidationError` | Input validation failures | - Use `ErrorFactory` to wrap raw SDK errors with Squad context. - --- - ## Telemetry - `TelemetryCollector` tracks operation latency and error rates. `HealthMonitor` runs periodic connection checks returning `HealthCheckResult` with status (`healthy | degraded | unhealthy`) and response time. - --- - ## See Also - - [SDK API Reference](api-reference.md) — Full type and function reference - [Tools & Hooks](tools-and-hooks.md) — Custom tools and hook pipeline - [SDK Reference](sdk.md) — Quick reference diff --git a/docs/src/content/docs/reference/sdk.md b/docs/src/content/docs/reference/sdk.md index efacaec9a..33ae5d388 100644 --- a/docs/src/content/docs/reference/sdk.md +++ b/docs/src/content/docs/reference/sdk.md @@ -1,66 +1,43 @@ # SDK Reference - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - Complete reference for `@bradygaster/squad-sdk` — the programmatic API for Squad. - > **See also:** [API Reference](api-reference.md) — Complete auto-generated reference with full type signatures for all exports. - ```bash npm install @bradygaster/squad-sdk ``` - All imports work from the barrel export: - ```typescript import { resolveSquad, loadConfig, SquadCoordinator, defineTool } from '@bradygaster/squad-sdk'; ``` - --- - ## Resolution - Find `.squad/` directories on disk. - | Function | Description | |----------|-------------| | `resolveSquad(startPath?)` | Find `.squad/` walking up from `startPath` (throws if not found) | -| `resolveGlobalSquadPath()` | Get personal squad directory path (platform-specific) | +| `resolveGlobalSquadPath()` | Get the platform-specific Squad data root | | `ensureSquadPath(startPath?)` | Like `resolveSquad`, but creates `.squad/` if missing | - ```typescript const squadPath = resolveSquad(); // '/home/user/project/.squad' -const globalPath = resolveGlobalSquadPath(); // Platform-specific: ~/.config/squad/ (Linux), ~/Library/Application Support/squad/ (macOS), %APPDATA%\squad\ (Windows) +const globalPath = resolveGlobalSquadPath(); // Platform-specific Squad data root const safePath = ensureSquadPath(); // Creates if needed ``` - --- - ## Configuration - ### `loadConfig(squadPath): Promise` - Load and validate Squad configuration asynchronously. - ```typescript const config = await loadConfig('./.squad'); config.team.name; // Team name Object.keys(config.agents); // Agent names config.routing.workTypes; // Routing rules ``` - ### `loadConfigSync(squadPath): ConfigLoadResult` - Synchronous version for scripts and CLI tools. - ### `defineConfig(partial): SquadConfig` - Create a typed config with defaults and editor autocomplete: - ```typescript // squad.config.ts import { defineConfig } from '@bradygaster/squad-sdk'; - export default defineConfig({ team: { name: 'my-squad', root: '.squad' }, agents: { @@ -81,9 +58,7 @@ export default defineConfig({ }, }); ``` - ### Key Types - ```typescript interface ConfigLoadResult { team: { name: string; root: string; description?: string }; @@ -91,7 +66,6 @@ interface ConfigLoadResult { routing?: RoutingConfig; models?: ModelConfig; } - interface AgentConfig { role: string; model?: string; @@ -99,21 +73,13 @@ interface AgentConfig { status?: 'active' | 'inactive'; } ``` - --- - ## Builder Functions (SDK-First Mode) - Type-safe team configuration with runtime validation. Each builder accepts a config object, validates it, and returns the typed value. - > **New in Phase 1** — SDK-First Mode lets you define teams in TypeScript instead of manually maintaining markdown. Run `squad build` to generate `.squad/` files. - See [SDK-First Mode Guide](../sdk-first-mode.md) for comprehensive documentation and examples. - ### `defineTeam(config): TeamDefinition` - Define team metadata, members, and project context. - ```typescript const team = defineTeam({ name: 'Platform Squad', @@ -122,9 +88,7 @@ const team = defineTeam({ members: ['@edie', '@mcmanus', '@fenster'], }); ``` - **Type:** - ```typescript interface TeamDefinition { readonly name: string; @@ -133,13 +97,9 @@ interface TeamDefinition { readonly members: readonly string[]; } ``` - --- - ### `defineAgent(config): AgentDefinition` - Define a single agent with role, tools, model, and capabilities. - ```typescript const edie = defineAgent({ name: 'edie', @@ -153,9 +113,7 @@ const edie = defineAgent({ status: 'active', }); ``` - **Type:** - ```typescript interface AgentDefinition { readonly name: string; @@ -166,19 +124,14 @@ interface AgentDefinition { readonly capabilities?: readonly AgentCapability[]; readonly status?: 'active' | 'inactive' | 'retired'; } - interface AgentCapability { readonly name: string; readonly level: 'expert' | 'proficient' | 'basic'; } ``` - --- - ### `defineRouting(config): RoutingDefinition` - Define routing rules with pattern matching and tier assignment. - ```typescript const routing = defineRouting({ rules: [ @@ -189,16 +142,13 @@ const routing = defineRouting({ fallback: 'coordinator', }); ``` - **Type:** - ```typescript interface RoutingDefinition { readonly rules: readonly RoutingRule[]; readonly defaultAgent?: string; readonly fallback?: 'ask' | 'default-agent' | 'coordinator'; } - interface RoutingRule { readonly pattern: string; readonly agents: readonly string[]; @@ -206,13 +156,9 @@ interface RoutingRule { readonly priority?: number; } ``` - --- - ### `defineCeremony(config): CeremonyDefinition` - Define ceremonies (standups, retros, etc.) with schedule and participants. - ```typescript const standup = defineCeremony({ name: 'standup', @@ -222,9 +168,7 @@ const standup = defineCeremony({ agenda: 'Yesterday / Today / Blockers', }); ``` - **Type:** - ```typescript interface CeremonyDefinition { readonly name: string; @@ -235,13 +179,9 @@ interface CeremonyDefinition { readonly hooks?: readonly string[]; } ``` - --- - ### `defineHooks(config): HooksDefinition` - Define governance hooks — write paths, blocked commands, PII scrubbing. - ```typescript const hooks = defineHooks({ allowedWritePaths: ['src/**', 'test/**', '.squad/**'], @@ -251,9 +191,7 @@ const hooks = defineHooks({ reviewerLockout: true, }); ``` - **Type:** - ```typescript interface HooksDefinition { readonly allowedWritePaths?: readonly string[]; @@ -263,13 +201,9 @@ interface HooksDefinition { readonly reviewerLockout?: boolean; } ``` - --- - ### `defineCasting(config): CastingDefinition` - Define casting configuration — universe allowlists and overflow behavior. - ```typescript const casting = defineCasting({ allowlistUniverses: ['The Usual Suspects', 'Breaking Bad'], @@ -277,9 +211,7 @@ const casting = defineCasting({ capacity: { 'The Usual Suspects': 8 }, }); ``` - **Type:** - ```typescript interface CastingDefinition { readonly allowlistUniverses?: readonly string[]; @@ -287,13 +219,9 @@ interface CastingDefinition { readonly capacity?: Readonly>; } ``` - --- - ### `defineTelemetry(config): TelemetryDefinition` - Define OpenTelemetry configuration for observability. - ```typescript const telemetry = defineTelemetry({ enabled: true, @@ -303,9 +231,7 @@ const telemetry = defineTelemetry({ aspireDefaults: true, }); ``` - **Type:** - ```typescript interface TelemetryDefinition { readonly enabled?: boolean; @@ -315,13 +241,9 @@ interface TelemetryDefinition { readonly aspireDefaults?: boolean; } ``` - --- - ### `defineSquad(config): SquadSDKConfig` - Compose all builders into a single SDK config. - ```typescript export default defineSquad({ version: '1.0.0', @@ -330,9 +252,7 @@ export default defineSquad({ routing: defineRouting({ /* ... */ }), }); ``` - **Type:** - ```typescript interface SquadSDKConfig { readonly version?: string; @@ -345,113 +265,77 @@ interface SquadSDKConfig { readonly telemetry?: TelemetryDefinition; } ``` - --- - ## SquadClient - Wraps `@github/copilot-sdk` with lifecycle management and auto-reconnection. - ```typescript import { SquadClient } from '@bradygaster/squad-sdk'; - const client = new SquadClient({ port: 3000, auth: { token: process.env.COPILOT_TOKEN }, reconnection: { maxRetries: 5, backoffMs: 1000 }, }); - await client.connect(); ``` - **Connection states:** `disconnected → connecting → connected → reconnecting → error` - ### SquadClientWithPool - Production-ready client composing `SquadClient`, `SessionPool`, and `EventBus`: - ```typescript import { SquadClientWithPool } from '@bradygaster/squad-sdk'; - const squad = new SquadClientWithPool({ client: clientOptions, pool: { maxConcurrent: 10, idleTimeout: 60_000 }, }); - const session = await squad.createSession({ agent: 'backend' }); const response = await session.sendMessage('Implement the /users endpoint'); await session.destroy(); ``` - **Session states:** `creating → active → idle → error → destroyed` - --- - ## Coordinator - Central routing and orchestration engine. - ### `SquadCoordinator` - ```typescript import { SquadCoordinator } from '@bradygaster/squad-sdk'; - const coordinator = new SquadCoordinator({ teamRoot: './.squad', enableParallel: true }); await coordinator.initialize(); - const decision = await coordinator.route('refactor the API'); // decision.tier: 'direct' | 'lightweight' | 'standard' | 'full' // decision.agents: ['backend', 'tester'] // decision.parallel: true // decision.rationale: 'Backend refactor with test coverage' - await coordinator.execute(decision, 'refactor the API'); await coordinator.shutdown(); ``` - ### `selectResponseTier(context): TierName` - ```typescript const tier = selectResponseTier({ complexity: 'high', budget: 10, userTeam: true }); // → 'standard' or 'full' ``` - ### `getTier(name): TierDefinition` - ```typescript const tier = getTier('standard'); tier.maxAgents; // Max parallel agents tier.defaultModel; // Default model tier.toolset; // Available tools ``` - --- - ## Event Handling - Typed pub/sub for session lifecycle events: - ```typescript squad.events.on('session.created', (event) => { console.log(`Session ${event.sessionId} started`); }); - squad.events.on('session.status_changed', (event) => { if (event.payload.status === 'error') { /* handle */ } }); ``` - **Events:** `session.created`, `session.destroyed`, `session.status_changed`, tool execution events. - --- - ## Tools & Hooks - ### `defineTool(config): SquadTool` - ```typescript import { defineTool } from '@bradygaster/squad-sdk'; - const myTool = defineTool<{ query: string }>({ name: 'search_docs', description: 'Search project documentation', @@ -466,26 +350,19 @@ const myTool = defineTool<{ query: string }>({ }), }); ``` - ### `ToolRegistry` - ```typescript import { ToolRegistry } from '@bradygaster/squad-sdk/tools'; import type { FanOutDependencies } from '@bradygaster/squad-sdk/coordinator'; - const registry = new ToolRegistry('./.squad'); registry.getTools(); // All tools registry.getToolsForAgent(['squad_route', 'squad_decide']); // Agent-specific registry.getTool('squad_route'); // Single lookup ``` - **Constructor:** `new ToolRegistry(squadRoot?, sessionPoolGetter?, storage?, state?, fanOutDepsGetter?)` - - `fanOutDepsGetter` — Required for `squad_route` to create sessions via `spawnParallel`. Returns a `FanOutDependencies` object (from `@bradygaster/squad-sdk/coordinator`). Without it, `squad_route` returns `error: 'fan-out-deps-unavailable'`. - `state` — When provided, `squad_route` validates that the target agent exists in the roster before spawning. - **Built-in tools:** - | Tool | Purpose | |------|---------| | `squad_route` | Route a task to another agent (requires `fanOutDepsGetter`) | @@ -493,33 +370,22 @@ registry.getTool('squad_route'); // Single lookup | `squad_memory` | Append to agent history | | `squad_status` | Query session pool state | | `squad_skill` | Read/write agent skills | - ### HookPipeline - Intercept tool calls before (`PreToolUseHook`) and after (`PostToolUseHook`) execution: - ```typescript import { HookPipeline, type PreToolUseHook } from '@bradygaster/squad-sdk'; - const auditHook: PreToolUseHook = async (toolName, params, context) => { console.log(`Agent ${context.agentId} calling ${toolName}`); return { action: 'allow' }; }; - const pipeline = new HookPipeline(); pipeline.addPreHook(auditHook); ``` - **Hook actions:** `allow`, `block`, `modify` - **Built-in policies:** ReviewerLockout, File Guards, Shell Restrictions, Rate Limits, PII Filters. - --- - ## Agents & Casting - ### `onboardAgent(options): Promise` - ```typescript const result = await onboardAgent({ teamRoot: './.squad', @@ -530,12 +396,9 @@ const result = await onboardAgent({ }); // result.agentDir, result.charterPath, result.historyPath ``` - ### `CastingEngine` - ```typescript import { CastingEngine } from '@bradygaster/squad-sdk'; - const engine = new CastingEngine({ universes: ['The Wire'], activeUniverse: 'The Wire' }); const members = await engine.castTeam([ { role: 'lead', title: 'Lead Developer' }, @@ -543,83 +406,56 @@ const members = await engine.castTeam([ ]); // members[0].name → 'Stringer', members[0].universe → 'The Wire' ``` - --- - ## Runtime Constants - ```typescript import { MODELS, TIMEOUTS, AGENT_ROLES } from '@bradygaster/squad-sdk'; - MODELS.premium; // ['claude-opus-4.6', 'gpt-5.2', ...] MODELS.standard; // ['claude-sonnet-4.5', 'gpt-5.1', ...] MODELS.fast; // ['claude-haiku-4.5', 'gpt-5-mini', ...] - TIMEOUTS.agentInitMs; // 30000 TIMEOUTS.agentExecuteMs; // 300000 TIMEOUTS.coordinatorRouteMs; // 5000 ``` - --- - ## Upstream Inheritance - Share skills, decisions, and routing across teams. - ```typescript import { readUpstreamConfig, resolveUpstreams, buildInheritedContextBlock } from '@bradygaster/squad-sdk'; - const config = await readUpstreamConfig('./.squad'); const resolved = await resolveUpstreams(config, './.squad'); const contextBlock = buildInheritedContextBlock(resolved); ``` - **Upstream types:** `local`, `git`, `export` - --- - ## Observability (OpenTelemetry) - ### Quick Setup - ```typescript import { initSquadTelemetry } from '@bradygaster/squad-sdk'; - const telemetry = await initSquadTelemetry({ endpoint: 'http://localhost:4318', serviceName: 'my-squad', eventBus: myEventBus, }); - // ... run agents ... await telemetry.shutdown(); ``` - ### Low-Level Control - ```typescript import { initializeOTel, shutdownOTel, getTracer, getMeter } from '@bradygaster/squad-sdk'; - await initializeOTel({ endpoint: 'http://localhost:4318' }); - const tracer = getTracer('my-component'); const span = tracer.startSpan('my-work'); // ... do work ... span.end(); - const meter = getMeter('my-component'); const counter = meter.createCounter('requests_total'); counter.add(1); - await shutdownOTel(); ``` - --- - ## Error Classes - All errors extend `SquadError` with severity, category, and recoverability: - | Error | When | |-------|------| | `SDKConnectionError` | Connection failures (retryable) | @@ -630,11 +466,8 @@ All errors extend `SquadError` with severity, category, and recoverability: | `ConfigurationError` | Invalid config (includes field + reason) | | `RateLimitError` | Too many requests | | `ValidationError` | Schema validation failures | - --- - ## Exports at a Glance - | Export | Type | Module | |--------|------|--------| | `resolveSquad` | function | resolution | @@ -652,10 +485,7 @@ All errors extend `SquadError` with severity, category, and recoverability: | `initializeOTel` / `shutdownOTel` | function | runtime/otel | | `getTracer` / `getMeter` | function | runtime/otel | | `initSquadTelemetry` | function | runtime/otel-init | - --- - ## See Also - - [CLI Reference](./cli.md) — Shell commands and config files - [Recipes & Advanced Scenarios](../cookbook/recipes.md) — Prompt-driven cookbook diff --git a/docs/src/content/docs/reference/tools-and-hooks.md b/docs/src/content/docs/reference/tools-and-hooks.md index 3de91366c..a57cc8179 100644 --- a/docs/src/content/docs/reference/tools-and-hooks.md +++ b/docs/src/content/docs/reference/tools-and-hooks.md @@ -1,20 +1,11 @@ # Custom Tools & Hooks Guide - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - Squad ships with 5 built-in tools and a hook pipeline for policy enforcement. This guide covers extending both. - --- - ## ToolRegistry API - `ToolRegistry` manages tool definitions. Each tool has a name, JSON schema, and async handler: - ```typescript import { ToolRegistry, defineTool } from '@bradygaster/squad-sdk'; - const registry = new ToolRegistry(); - const myTool = defineTool({ name: 'search-docs', description: 'Search internal documentation', @@ -30,14 +21,10 @@ const myTool = defineTool({ return { success: true, data: results }; }, }); - registry.register(myTool); ``` - The handler returns a `ToolResult` with `success` flag and `data` payload. - **Built-in tools:** - | Tool | Purpose | |------|---------| | `route` | Dispatch to another agent | @@ -45,31 +32,21 @@ The handler returns a `ToolResult` with `success` flag and `data` payload. | `memory` | Agent history | | `status` | Session pool query | | `skill` | Read/write skills | - --- - ## HookPipeline - `HookPipeline` intercepts tool calls at two points: before execution (`PreToolUseHook`) and after (`PostToolUseHook`). Hooks return a `HookAction`: `allow`, `block`, or `modify`. - ```typescript import { HookPipeline, PreToolUseHook } from '@bradygaster/squad-sdk'; - const auditHook: PreToolUseHook = async (toolName, params, context) => { console.log(`Agent ${context.agentId} calling ${toolName}`); return { action: 'allow' }; }; - const pipeline = new HookPipeline(); pipeline.addPreHook(auditHook); ``` - --- - ## Writing Custom Hooks - Custom hooks receive the tool name, parameters, and agent context. Use them for logging, validation, or transformation: - ```typescript const sanitizeHook: PreToolUseHook = async (toolName, params, context) => { if (toolName === 'shell' && params.command.includes('rm -rf')) { @@ -78,23 +55,16 @@ const sanitizeHook: PreToolUseHook = async (toolName, params, context) => { return { action: 'allow' }; }; ``` - Post-tool hooks inspect results and can trigger follow-up actions like notifications or audit logging. - --- - ## Built-in Policies - Squad ships 5 policies configured via `PolicyConfig`: - 1. **ReviewerLockoutHook** — Agents cannot edit files they are reviewing 2. **File guards** — Restrict write access to sensitive paths 3. **Shell restrictions** — Block dangerous shell commands 4. **Rate limits** — Cap tool invocations per agent per interval 5. **PII filters** — Redact sensitive data before model calls - Configure policies in `squad.config.ts` under the `hooks` key: - ```typescript export default defineConfig({ hooks: { @@ -104,11 +74,8 @@ export default defineConfig({ }, }); ``` - --- - ## See Also - - [SDK API Reference](api-reference.md) — Full type and function reference - [Integration Guide](integration.md) — Connecting to the Copilot SDK - [Config Reference](config.md) — Configuration file options diff --git a/docs/src/content/docs/reference/vscode-troubleshooting.md b/docs/src/content/docs/reference/vscode-troubleshooting.md index 1747516c4..422d134ca 100644 --- a/docs/src/content/docs/reference/vscode-troubleshooting.md +++ b/docs/src/content/docs/reference/vscode-troubleshooting.md @@ -1,71 +1,39 @@ # VS Code Troubleshooting - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - This page covers known issues and mitigations when running Squad inside VS Code's integrated terminal. - --- - ## Symptom: VS Code closes unexpectedly during Squad execution - **Reported in:** [Issue #259](https://github.com/bradygaster/squad/issues/259), [Discussion #174](https://github.com/bradygaster/squad/discussions/174) - VS Code (especially Insiders/Nightly builds) may close without a crash dialog while Squad tasks are executing. This is most often caused by resource pressure — not a bug in Squad itself — but Squad's runtime patterns can contribute to the problem. - --- - ## Root cause analysis - An audit of the Squad codebase (SDK and CLI) identified the following resource-pressure vectors: - ### 1. Unbounded in-memory collections (fixed) - **Streaming usage history** (`streaming.ts`) and **telemetry queue** (`telemetry.ts`) previously grew without limit during long-running sessions. In multi-agent sessions with heavy token throughput, these arrays could accumulate tens of thousands of entries over hours. - **Fix applied:** Both collections now enforce FIFO eviction caps — 1,000 events for usage history and 500 for telemetry. This bounds worst-case memory contribution to a predictable ceiling. - ### 2. File watcher scope (fixed) - The `SquadObserver` watches the `.squad/` directory with `recursive: true`. On projects with large orchestration logs, this could generate a high volume of `fs.watch` events. Combined with VS Code's own file watchers on the same workspace, the total watcher count can approach OS limits. - **Fix applied:** The observer now filters out `orchestration-log/` and `.git/` subdirectories at the watcher callback level, reducing event volume significantly. - ### 3. Stream buffer accumulation (already mitigated) - The CLI shell accumulates per-agent streaming content in `Map` buffers. The existing `MemoryManager` enforces a 1 MB per-stream cap and trims the active message list to 200 entries. No additional changes needed. - ### 4. Process spawning (no issues found) - All child processes (`node-pty` for Copilot, `devtunnel`, Docker, .NET) are properly tracked and cleaned up via signal handlers (`SIGINT`, `SIGTERM`). No process leak patterns were detected. - ### 5. Terminal output rate (low risk) - Squad uses the Ink framework for rendering, which batches React state updates. Direct `process.stdout.write` calls are rate-limited by the model's token generation speed. No fire-hose output patterns were found. - ### 6. Synchronous I/O (startup only) - `existsSync` and `readFileSync` calls exist in config loading and shell initialization, but these run only at startup — not in hot paths or event loops. - --- - ## Recommended mitigations - ### For users - 1. **Use VS Code Stable instead of Insiders/Nightly.** Nightly builds may have unresolved memory or renderer bugs that amplify resource pressure from terminal-heavy workloads. - 2. **Increase the file watcher limit** (Linux/macOS): ```bash # Check current limit cat /proc/sys/fs/inotify/max_user_watches - # Increase temporarily sudo sysctl fs.inotify.max_user_watches=524288 - # Increase permanently echo 'fs.inotify.max_user_watches=524288' | sudo tee -a /etc/sysctl.conf ``` - 3. **Exclude `.squad/orchestration-log/` from VS Code's file watcher** by adding to `.vscode/settings.json`: ```json { @@ -74,32 +42,24 @@ Squad uses the Ink framework for rendering, which batches React state updates. D } } ``` - 4. **Monitor resource usage** during Squad sessions: - **Windows:** Task Manager → Details tab, watch `node.exe` memory - **macOS/Linux:** `top -p $(pgrep -f squad)` or Activity Monitor - If memory climbs steadily past 1 GB, restart the Squad shell - 5. **Close unused terminals** in VS Code before starting a Squad session. Each terminal consumes renderer memory. - 6. **Disable terminal GPU acceleration** if you see flickering (related: [#254](https://github.com/bradygaster/squad/issues/254)): ```json { "terminal.integrated.gpuAcceleration": "off" } ``` - ### For developers - - When adding new in-memory collections, enforce size caps with FIFO eviction (see `StreamingPipeline.MAX_USAGE_EVENTS` pattern). - When adding file watchers, always filter high-churn directories in the callback. - Prefer `async` file I/O in any code path that runs during active sessions. - Test long-running sessions (2+ hours) to verify memory stays bounded. - --- - ## Related issues - | Issue | Description | Status | |-------|-------------|--------| | [#259](https://github.com/bradygaster/squad/issues/259) | VS Code crash during Squad execution | This investigation | diff --git a/docs/src/content/docs/scenarios/aspire-dashboard.md b/docs/src/content/docs/scenarios/aspire-dashboard.md index c7879e897..84d05623a 100644 --- a/docs/src/content/docs/scenarios/aspire-dashboard.md +++ b/docs/src/content/docs/scenarios/aspire-dashboard.md @@ -1,43 +1,27 @@ # Using Squad with the Aspire Dashboard - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - > 📌 **Squad CLI only** — The Aspire dashboard integration requires the Squad CLI (`squad aspire`). It is not available when using GitHub Copilot CLI directly. Only Squad CLI commands emit OpenTelemetry data to the dashboard. - **Try this:** ``` squad aspire ``` - Aspire is a free, open-source dashboard for observing any OpenTelemetry app — traces, metrics, logs, all in one place. Squad ships with an Aspire integration that streams all your telemetry (agent spawns, token usage, session metrics, errors) to the dashboard in real time. - --- - ## 1. What Is Aspire? - Aspire is not a .NET thing — it's a **standalone dashboard for any app that speaks OpenTelemetry**. You can run it in Docker, point any OTLP client at it, and watch telemetry flow in: - - **Traces** — see every agent spawn, task execution, and error with timing - **Metrics** — counters (agents spawned, tokens consumed), histograms (latency), gauges (active sessions) - **Resources** — grouping by service and environment - Squad's OTel integration exports OTLP/gRPC (the only protocol Aspire understands), so you get instant visibility into what your agents are doing. - --- - ## 2. Launch the Aspire Container - The easiest way is the built-in `squad aspire` command: - ```bash squad aspire ``` - This will: 1. Pull the Aspire dashboard image: `mcr.microsoft.com/dotnet/aspire-dashboard:latest` 2. Start the container on **port 18888** (UI) and **port 4317** (OTLP gRPC) 3. Print the dashboard URL (usually `http://localhost:18888`) - **Behind the scenes**, the container runs with: ```bash docker run -d \ @@ -48,11 +32,8 @@ docker run -d \ -e DASHBOARD__OTLP__AUTHMODE=Unsecured \ mcr.microsoft.com/dotnet/aspire-dashboard:latest ``` - > ⚠️ **Both `AUTHMODE=Unsecured` flags are required for local dev.** Without `DASHBOARD__OTLP__AUTHMODE=Unsecured`, the OTLP endpoint rejects connections with: `API key from 'x-otlp-api-key' header is missing`. Without `DASHBOARD__FRONTEND__AUTHMODE=Unsecured`, the UI requires a login token. - If you started the container yourself (without `squad aspire`) and you're seeing auth errors, stop it and re-run with the flags above. Or, if you prefer to keep API key auth, set a key on both sides: - ```bash # Container side — set the expected API key docker run -d \ @@ -64,63 +45,43 @@ docker run -d \ -e DASHBOARD__OTLP__PRIMARYAPIKEY=my-dev-key \ mcr.microsoft.com/dotnet/aspire-dashboard:latest ``` - ```bash # Client side — tell the OTLP exporter to send the key export OTEL_EXPORTER_OTLP_HEADERS="x-otlp-api-key=my-dev-key" ``` - For local dev, unsecured mode is simplest. For shared environments, use an API key. - --- - ## 3. Connect Squad to Aspire - When you run Squad (via the CLI or SDK), set the OTLP endpoint: - ### Option A: CLI (standalone) - ```powershell $env:OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4317" squad run ``` - ```bash export OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4317" squad run "your prompt here" ``` - ### Option B: SDK (programmatic) - ```typescript import { initSquadTelemetry, EventBus } from 'squad-sdk'; - const bus = new EventBus(); const telemetry = initSquadTelemetry({ endpoint: 'http://localhost:4317', eventBus: bus, }); - // … run your squad … - await telemetry.shutdown(); ``` - That's it. Squad will automatically: 1. Initialize OpenTelemetry providers (tracing + metrics) 2. Export all agent spawns, token usage, session metrics, and errors to Aspire 3. Flush telemetry on shutdown - --- - ## 4. What You'll See in the Dashboard - Open **http://localhost:18888** and navigate to: - ### **Traces** (`/traces`) - You'll see a list of spans for each operation. Examples: - ``` squad.init 3ms ✓ squad.agent.spawn 250ms ✓ (agent-name: "Lead") @@ -128,76 +89,56 @@ squad.agent.spawn 180ms ✓ (agent-name: "Backend") squad.agent.error 5ms ✗ (error: "timeout") squad.run 2100ms ✓ ``` - Each span has attributes: - `agent.name` — the agent that ran - `session.id` — which session this belongs to - `mode` — "sync" or "async" - `status` — success or error - Click a span to see full details (attributes, events, timing). - ### **Metrics** (`/metrics`) - You'll see gauges, counters, and histograms: - **Counters:** - `squad.tokens.input` — total input tokens consumed - `squad.tokens.output` — total output tokens produced - `squad.agent.spawns` — total agents spawned - `squad.sessions.created` — total sessions created - **Gauges:** - `squad.agent.active` — currently active agent sessions - `squad.sessions.active` — currently active sessions - `squad.sessions.idle` — pooled sessions waiting for reuse - **Histograms:** - `squad.agent.duration` — agent task duration (ms) - `squad.response.ttft` — time to first token (ms) - `squad.response.duration` — total response duration (ms) - ### Rework Rate Metrics (5th DORA) - PR rework rate instruments, exported alongside the core metrics above: - | Instrument | Type | Unit | Description | |-----------|------|------|-------------| | `squad.rework.rate` | Gauge | % | Current rework rate percentage | | `squad.rework.cycles` | Histogram | — | Review cycles per PR | | `squad.rework.rejection_rate` | Gauge | % | Percentage of PRs with changes requested | | `squad.rework.time_ms` | Histogram | ms | Time spent in rework | - ### **Resources** - Aspire groups all telemetry by service. You'll see: - `service.name` — "squad-cli" or your custom app name - `squad.version` — which version of Squad you're running - --- - ## 5. Example Workflow - ### 1. Start Aspire ```bash squad aspire ``` - ### 2. Run Squad with telemetry ```bash export OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 squad run "Implement user registration with email verification" ``` - ### 3. Watch in real time - Refresh the Aspire dashboard. You'll see: - **Traces** section fills with `squad.agent.spawn`, `squad.init`, etc. - **Metrics** show counters ticking up for tokens consumed, agents spawned - **Latency** histogram shows how long agents took - ### 4. Click a span - Click `squad.agent.spawn` to see: ``` Duration: 250ms @@ -206,15 +147,10 @@ Attributes: session.id: "abc-123" mode: "sync" ``` - --- - ## 6. Troubleshooting - ### "API key from 'x-otlp-api-key' header is missing" - This is the most common issue. It means your Aspire container is running with API key auth enabled (the default). Fix it one of two ways: - **Option A: Restart with unsecured OTLP (recommended for local dev)** ```bash docker stop aspire-dashboard && docker rm aspire-dashboard @@ -224,7 +160,6 @@ docker run -d --name aspire-dashboard \ -e DASHBOARD__OTLP__AUTHMODE=Unsecured \ mcr.microsoft.com/dotnet/aspire-dashboard:latest ``` - **Option B: Send the API key from Squad** ```bash # Set both in your .env or shell: @@ -232,88 +167,63 @@ OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317 OTEL_EXPORTER_OTLP_HEADERS=x-otlp-api-key=your-key-here ``` (The key must match `DASHBOARD__OTLP__PRIMARYAPIKEY` on the container.) - ### Quick Debug Checklist - If no telemetry appears in the Aspire dashboard, walk through this list: - 1. **Is the container running?** ```bash docker ps | grep aspire-dashboard ``` You should see ports `18888` and `4317` mapped. - 2. **Is the OTLP endpoint set correctly?** ```bash echo $OTEL_EXPORTER_OTLP_ENDPOINT # Must be: http://localhost:4317 (include http://) ``` Port 4317 on the host maps to the Aspire OTLP/gRPC listener on container port 18889. Do **not** use the dashboard UI port (18888). The `http://` prefix is required. - 3. **Is OTLP auth disabled?** Check the container logs: ```bash docker logs aspire-dashboard 2>&1 | grep -i auth ``` If you see `OtlpComposite was not authenticated` or `API key... is missing`, see the auth fix above. - 4. **Is the protocol correct?** Squad exports OTLP/gRPC. Aspire only accepts gRPC on port 18889. If you see `UNIMPLEMENTED` or connection errors, confirm you're not accidentally using an OTLP/HTTP endpoint (port 4318). - 5. **Firewall / network:** Port 4317 must be reachable between your app and the Docker host. On Docker Desktop (Windows/Mac), `localhost:4317` should work. - 6. **Wait for batching:** OTel batches span exports. Traces may take 1–2 seconds to appear. Metrics export every 30 seconds by default (set `OTEL_METRIC_EXPORT_INTERVAL_MILLIS=1000` for faster feedback). - ### Dashboard shows no traces - - **Check the container is running:** `docker ps | grep aspire-dashboard` - **Verify the endpoint:** `OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317` - **Wait a moment:** Aspire batches exports — it may take 1–2 seconds for traces to appear - **Check firewall:** Port 4317 needs to be open between your app and Docker - ### Dashboard is slow or unresponsive - - **Restart the container:** `squad aspire` (auto-stops and restarts) - **Check Docker resources:** Aspire needs ~500MB RAM - **Look at logs:** `docker logs aspire-dashboard` - ### OTLP/gRPC connection refused - - **Ensure port 4317 is mapped:** The docker run command above maps `-p 4317:18889` - **On Windows/Mac:** If using Docker Desktop, localhost:4317 should work. If not, try `host.docker.internal:4317` - **Custom endpoint?** Set `OTEL_EXPORTER_OTLP_ENDPOINT` explicitly - --- - ## 7. Stop Aspire - ```bash squad aspire --stop ``` - Or manually: ```bash docker stop aspire-dashboard docker rm aspire-dashboard ``` - --- - ## 8. Pro Tips - - **Export metrics frequently:** Set `OTEL_METRIC_EXPORT_INTERVAL_MILLIS=1000` for near-real-time metric updates (default is 60s) - **Tag your service:** Customize the service name with `OTEL_SERVICE_NAME=my-app` - **Batch size:** Adjust `OTEL_BSP_MAX_QUEUE_SIZE` if you're emitting tons of spans - **Only export what you need:** If Squad is a tiny part of your app, filter traces by service name in Aspire UI - --- - ## 9. Learn More - - [Aspire Documentation](https://aspire.dev) - [OpenTelemetry Protocol (OTLP)](https://opentelemetry.io/docs/specs/otel/protocol/) - [Squad SDK Reference](../reference/sdk.md) — detailed API documentation - Aspire pairs perfectly with Squad: **watch your agents work in real time, catch performance issues early, and prove to yourself (and your team) that AI agents are deterministic and safe.** diff --git a/docs/src/content/docs/scenarios/ci-cd-integration.md b/docs/src/content/docs/scenarios/ci-cd-integration.md index 1f6358929..017cde14b 100644 --- a/docs/src/content/docs/scenarios/ci-cd-integration.md +++ b/docs/src/content/docs/scenarios/ci-cd-integration.md @@ -1,30 +1,18 @@ # Squad in CI/CD Pipelines - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this:** ``` Enable Ralph's heartbeat workflow to triage issues automatically ``` - Ralph runs periodically via GitHub Actions to handle housekeeping between Copilot sessions — triage new issues, apply squad labels, check stale branches, archive old decisions. - --- - ## 1. The Heartbeat Workflow — Ralph Between Sessions - Ralph (the manager agent) runs via GitHub Actions on event-based triggers: - - Triage new issues - Apply squad labels based on routing rules - Check for stale branches - Archive old decisions - The workflow is in `.github/workflows/squad-heartbeat.yml` and runs on issue close, PR merge, and manual dispatch (or via `squad watch` for local polling). - **You don't have to do anything** — it's installed automatically (along with 9 other workflows) when you run `squad`. - ```yaml name: Ralph Heartbeat on: @@ -33,7 +21,6 @@ on: pull_request: types: [closed] workflow_dispatch: - jobs: heartbeat: runs-on: ubuntu-latest @@ -42,79 +29,58 @@ jobs: - name: Run Ralph run: squad heartbeat ``` - > ⚡ **Cron is permanently disabled.** Scheduled cron jobs are no longer supported in GitHub Actions to reduce costs. The heartbeat workflow runs on event-based triggers: when issues are closed, PRs are merged, or you manually trigger via `workflow_dispatch`. For periodic polling without events, use `squad watch` in a separate terminal (local, no GitHub Actions cost). - Ralph reads `.squad/routing.md`, looks at open issues, and applies labels: - ``` Issue #42: "Add Stripe payment integration" → squad:morpheus (backend work, routed to Morpheus) → type:feature → priority:high ``` - Now when you open Copilot, you see issues pre-triaged. - --- - ## 2. Label-Driven Automation - Squad uses GitHub labels to drive workflows: - **Routing labels** (auto-applied by Ralph): - `squad:neo` — routed to Neo (Lead) - `squad:trinity` — routed to Trinity (Frontend Dev) - `squad:morpheus` — routed to Morpheus (Backend Dev) - `squad:tank` — routed to Tank (Tester) - **Control labels**: - `go:neo` — tells Copilot to auto-assign this issue to Neo - `go:trinity` — auto-assign to Trinity - `go:morpheus` — auto-assign to Morpheus - `go:tank` — auto-assign to Tank - **Type labels** (for filtering): - `type:feature` - `type:bug` - `type:refactor` - `type:docs` - **Priority labels**: - `priority:critical` - `priority:high` - `priority:medium` - `priority:low` - **Release labels**: - `release:next` — include in the next release - `release:backlog` — not scheduled yet - -Ralph applies `squad:*` and `type:*` labels automatically. You apply `go:*` labels manually when you want autonomous processing. - +Ralph applies `squad:*` and `type:*` labels automatically. You apply `go:*` labels manually when you want background processing with review gates. --- - -## 3. @copilot Auto-Assign for Autonomous Issue Processing - +## 3. @copilot Auto-Assign for Background Issue Processing When you add a `go:*` label to an issue, the `@copilot` automation picks it up: - 1. Ralph labels issue #42 with `squad:morpheus` (backend work) 2. You review the issue and add `go:morpheus` (approval to proceed) 3. GitHub Actions triggers the `@copilot` workflow 4. Copilot session spawns Morpheus to handle the issue 5. Morpheus reads the issue, implements the feature, opens a PR 6. PR is tagged for human review - -**This is autonomous issue processing.** You don't open Copilot manually — the workflow does. - +**This is background issue processing.** You don't open Copilot manually — the workflow does. Workflow file: `.github/workflows/copilot-auto-assign.yml`: - ```yaml name: Copilot Auto-Assign on: issues: types: [labeled] - jobs: auto-assign: if: startsWith(github.event.label.name, 'go:') @@ -130,26 +96,17 @@ jobs: run: | copilot --agent squad --message "${{ steps.agent.outputs.agent }}, handle issue #${{ github.event.issue.number }}" ``` - **Note:** This workflow requires GitHub Actions to have access to your Copilot session. See GitHub's docs for `gh copilot` in Actions. - --- - ## 4. What You CAN'T Do: Agents Can't Run in Actions (Yet) - **Squad agents require a live Copilot session.** They can't run in a GitHub Actions runner without Copilot CLI access. - This means: - ❌ You **can't** run `Squad, build the feature` inside a GitHub Actions workflow ✅ You **can** use Ralph to triage and label issues ✅ You **can** trigger Copilot sessions via Actions (if you have `gh copilot` access) -❌ You **can't** have agents autonomously merge PRs without human approval (by design) - +❌ You **can't** have agents merge PRs without human approval (by design) --- - ## 5. Sample Workflow: Issue Filed → Triage → Assign → Build → Review - 1. **User files issue** #42: "Add Stripe payment integration" 2. **Ralph (heartbeat)** runs, reads routing rules, applies `squad:morpheus` and `type:feature` 3. **You review** the issue, decide it's good, add `go:morpheus` label @@ -158,66 +115,45 @@ This means: 6. **Morpheus builds** the Stripe integration, writes tests, opens PR #43 7. **Neo (Lead) reviews** PR #43, approves or requests changes 8. **You merge** PR #43 after human review - Steps 2, 4, 5, 6, 7 are **automated**. You only do steps 3 and 8. - --- - ## 6. Workflow Templates Ship with Squad - When you run `squad`, these workflow templates are installed: - - `.squad/templates/workflows/squad-heartbeat.yml` → Ralph runs every 6 hours - `.squad/templates/workflows/copilot-auto-assign.yml` → Triggers Copilot on `go:*` labels - `.squad/templates/workflows/pr-review-reminder.yml` → Reminds you of open PRs needing review - To activate them: - ```bash cp .squad/templates/workflows/*.yml .github/workflows/ git add .github/workflows/ git commit -m "Enable Squad workflows" git push ``` - Now they're live. - --- - ## 7. Sample Prompts for CI-Adjacent Workflows - **Trigger Ralph manually:** - ```bash squad heartbeat ``` - **Check what Ralph would do (dry run):** - ```bash squad heartbeat --dry-run ``` - **Have agents work on labeled issues:** - ``` > Team, review all open issues labeled squad:morpheus and tell me > which ones are ready to work on. ``` - -**Autonomous issue pickup:** - +**Background issue pickup:** ``` > Ralph, triage the 10 newest issues and apply squad labels. > If any are ready to start, let me know. ``` - --- - ## Tips - - **Ralph is your assistant between sessions.** It triages issues, applies labels, and keeps things organized while you're not in Copilot. -- **`go:*` labels mean "approved to proceed."** Don't add them to every issue — only the ones you've reviewed and want agents to handle autonomously. +- **`go:*` labels mean "approved to proceed."** Don't add them to every issue — only the ones you've reviewed and want agents to handle in the background. - **Agents still need human review.** PRs created by agents should be reviewed by a human before merging. - **Workflows are templates.** Customize `.squad/templates/` to match your CI/CD setup, then copy to `.github/workflows/`. - **Heartbeat frequency is configurable.** Edit `squad-heartbeat.yml` to change from every 6 hours to daily, hourly, etc. diff --git a/docs/src/content/docs/scenarios/existing-repo.md b/docs/src/content/docs/scenarios/existing-repo.md index 09932181c..dd54c2678 100644 --- a/docs/src/content/docs/scenarios/existing-repo.md +++ b/docs/src/content/docs/scenarios/existing-repo.md @@ -1,26 +1,16 @@ # Adding Squad to an Existing Repo - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this:** ``` This is a SaaS app with React frontend, Node.js API, and PostgreSQL — set up Squad ``` - Squad discovers your existing project — stack, patterns, conventions — and proposes a team that fits. Agents explore the codebase first, then start working with full context. - --- - ## 1. Install Squad - Navigate to your existing repo and run: - ```bash cd ~/projects/my-saas-app squad ``` - ``` ✅ .github/agents/squad.agent.md (v0.2.0) ✅ .github/workflows/ (10 workflows) @@ -28,59 +18,41 @@ squad ✅ .copilot/skills/ (starter skills) ✅ .squad/ceremonies.md ✅ .gitattributes (merge=union rules) - Squad is ready. - Next steps: 1. Open Copilot: copilot 2. Select Squad from the /agent list (CLI) or /agents (VS Code) 3. Tell it what you're building ``` - If you already have a `.gitattributes` file, Squad appends `merge=union` rules for append-only team state files. Your existing rules are untouched. - --- - ## 2. Open Copilot and Describe What Exists - ```bash copilot ``` - Select **Squad** from the `/agent` list (CLI) or `/agents` (VS Code). When Squad asks what you're building, describe the project as it exists today: - ``` > This is a SaaS invoicing app. React frontend, Node.js/Express API, > PostgreSQL database. We use TypeScript everywhere, Jest for testing, > and Prisma as the ORM. The app has been in production for 8 months. ``` - Squad proposes a team based on your stack: - ``` Here's your team: - 🏗️ Neo — Lead Scope, decisions, code review ⚛️ Trinity — Frontend Dev React, TypeScript, UI 🔧 Morpheus — Backend Dev Node.js, Express, Prisma, PostgreSQL 🧪 Tank — Tester Jest, integration tests, edge cases 📋 Scribe — (silent) Memory, decisions, session logs - Look right? Say yes, add someone, or change a role. ``` - --- - ## 3. Let the Team Explore First - Before giving implementation tasks, let agents discover your codebase: - ``` > Team, review the codebase and tell me what you find. ``` - Agents explore in parallel — each from their own perspective: - ``` 🏗️ Neo — reviewing architecture and project structure ⚛️ Trinity — exploring frontend: components, routing, state management @@ -88,45 +60,34 @@ Agents explore in parallel — each from their own perspective: 🧪 Tank — reviewing test suite: coverage, patterns, fixtures 📋 Scribe — logging findings ``` - Results come back labeled by agent: - ``` 🏗️ Neo — Architecture overview: Monorepo with /client and /server directories. 12 API routes, 34 React components, 89 tests. Prisma schema has 8 models. Auth uses passport-jwt. - ⚛️ Trinity — Frontend findings: React 18 with React Router v6. State via Zustand. Component pattern: each in its own directory with index.tsx, styles.module.css, and ComponentName.test.tsx. - 🔧 Morpheus — Backend findings: Express with middleware chain: auth → validate → handler. All routes in /server/routes/, each file exports a router. Prisma client instantiated in /server/lib/prisma.ts. - 🧪 Tank — Test suite review: 89 tests across 23 files. Jest with ts-jest preset. ~72% coverage. No integration tests for the payment flow. Test fixtures in /server/test/fixtures/. ``` - These findings land in each agent's `history.md`. From now on, agents know your project. - --- - ## 4. Give Context About Conventions - Feed agents information they can't discover from code alone: - ``` > We always use Zod for API validation. Never use any/unknown casts > in TypeScript. All database queries go through Prisma — no raw SQL. > The payment integration with Stripe is fragile, be careful around it. ``` - ``` 📌 Captured. 4 directives added to decisions.md: - Zod for all API validation @@ -134,42 +95,28 @@ Feed agents information they can't discover from code alone: - All queries through Prisma, no raw SQL - Stripe payment integration is fragile — handle with care ``` - These directives persist across every future session. Every agent reads them before working. - --- - ## 5. Start Working - Now give a real task: - ``` > Morpheus, add a recurring invoices feature to the API. > We need a cron job that generates invoices from recurring templates. ``` - ``` 🔧 Morpheus — building recurring invoices feature - Morpheus is reading existing invoice models and API patterns before starting implementation. ``` - Because Morpheus already explored the codebase, he knows the Prisma schema, the router pattern, and the validation conventions. No guessing. - --- - ## 6. Commit Your Team - ```bash git add .squad/ .github/ .gitattributes git commit -m "Add Squad team" ``` - --- - ## Tips - - **Explore first, build second.** The initial codebase review pays for itself immediately. - **Share your conventions.** Agents can't discover team norms from code alone. Tell them. - **Mention fragile areas.** Agents will be more cautious and add extra test coverage. diff --git a/docs/src/content/docs/scenarios/issue-driven-dev.md b/docs/src/content/docs/scenarios/issue-driven-dev.md index c19d94b6a..78d7cc4bc 100644 --- a/docs/src/content/docs/scenarios/issue-driven-dev.md +++ b/docs/src/content/docs/scenarios/issue-driven-dev.md @@ -1,42 +1,26 @@ # Issue-Driven Development with Squad - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this:** ``` Connect to myorg/recipe-app ``` - End-to-end workflow: connect to a repo, pick issues from backlog, let agents create branches, implement, open PRs, handle review feedback, and close issues. See also [GitHub Issues Mode](../features/github-issues.md) for detailed guidance. - **Prerequisite:** The `gh` CLI must be installed and authenticated (`gh auth login`). - --- - ## 1. Connect to the Repository - Tell Squad which repo to track: - ``` > Connect to myorg/recipe-app ``` - ``` ✅ Issue source stored: myorg/recipe-app Using gh CLI for issue tracking. ``` - Squad stores the issue source in team state. From now on, it can read issues, create branches, and open PRs against this repo. - --- - ## 2. View the Backlog - ``` > Show the backlog ``` - ``` ┌─────┬──────────────────────────────────────────┬───────────┬────────────┐ │ # │ Title │ Labels │ Assignee │ @@ -49,33 +33,22 @@ Squad stores the issue source in team state. From now on, it can read issues, cr └─────┴──────────────────────────────────────────┴───────────┴────────────┘ 5 open issues ``` - --- - ## 3. Assign an Issue - Pick an issue for an agent to work on: - ``` > Work on #15 ``` - Squad reads the issue, routes it to the right agent based on domain, and kicks off the full workflow: - ``` 🔧 Dallas — picking up #15 (Fix pagination on recipe list) - Dallas is reading the issue and starting work. ``` - Behind the scenes, the agent: - 1. **Creates a branch** — `15-fix-pagination-on-recipe-list` 2. **Implements the fix** — reads existing code, writes the change, adds tests 3. **Opens a PR** — linked to issue #15 with a description of the fix - When the agent finishes: - ``` 🔧 Dallas — Completed #15 (Fix pagination on recipe list) Branch: 15-fix-pagination-on-recipe-list @@ -84,62 +57,42 @@ When the agent finishes: - src/routes/recipes.ts (fixed cursor calculation) - test/pagination.test.ts (3 new test cases) ``` - --- - ## 4. Handle Review Feedback - A reviewer leaves comments on the PR. Tell Squad: - ``` > There's review feedback on PR #22 ``` - Squad routes the feedback to the agent who opened it: - ``` 🔧 Dallas — reading review comments on PR #22 - Dallas is addressing the feedback now. ``` - The agent reads the comments, makes changes, and pushes new commits to the same branch: - ``` 🔧 Dallas — Addressed review feedback on PR #22 - Added boundary check for empty result set (reviewer concern) - Added test case for zero-results pagination - Pushed 2 new commits to 15-fix-pagination-on-recipe-list ``` - No force-pushes, no new branches. Just additional commits on the existing PR. - --- - ## 5. Merge - When the PR is approved: - ``` > Merge PR #22 ``` - ``` ✅ PR #22 merged — "Fix pagination on recipe list (#15)" Issue #15 closed. Branch 15-fix-pagination-on-recipe-list deleted. ``` - The issue closes automatically — the PR body includes a `Closes #15` reference. - --- - ## 6. Check Remaining Work - ``` > What's left? ``` - ``` ┌─────┬──────────────────────────────────────────┬───────────┬────────────┐ │ # │ Title │ Labels │ Assignee │ @@ -151,29 +104,20 @@ The issue closes automatically — the PR body includes a `Closes #15` reference └─────┴──────────────────────────────────────────┴───────────┴────────────┘ 4 open issues ``` - --- - ## 7. Work Multiple Issues in Parallel - Assign several issues at once: - ``` > Work on #12 and #18 ``` - ``` 🔧 Dallas — picking up #12 (Add ingredient search) ⚛️ Ripley — picking up #18 (Add user profile page) 📋 Scribe — logging session ``` - Each agent creates its own branch and works independently. Both PRs open when agents finish. - --- - ## Full Lifecycle at a Glance - ```mermaid graph LR A["Connect
connect to repo"] --> B["Browse
show backlog"] @@ -187,11 +131,8 @@ graph LR style D fill:#e1f5ff style E fill:#f3e5f5 ``` - --- - ## Tips - - **You don't pick the agent.** Squad routes each issue to the agent whose expertise matches. - **Agents name branches with the issue number.** Pattern: `{number}-{slugified-title}`. - **PRs auto-link to issues.** The PR body includes `Closes #N`, so merging closes the issue. diff --git a/docs/src/content/docs/scenarios/monorepo.md b/docs/src/content/docs/scenarios/monorepo.md index eb70f0036..9ef2da543 100644 --- a/docs/src/content/docs/scenarios/monorepo.md +++ b/docs/src/content/docs/scenarios/monorepo.md @@ -1,21 +1,12 @@ # Squad in Monorepos - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this:** ``` I'm building a microservices platform in a monorepo with 8 services — set up specialists for each domain ``` - One squad per monorepo, not per service. Squad adds specialists per domain and routes work to the right agent based on which service is being modified. - --- - ## 1. The Monorepo Challenge - You have a monorepo with 8 services: - ``` monorepo/ ├── services/ @@ -33,24 +24,17 @@ monorepo/ │ └── config/ └── infra/ ``` - Do you need 8 squads (one per service)? **No. One squad with specialists.** - --- - ## 2. One Squad, Multiple Specialists - Tell Squad about your monorepo structure: - ``` > I'm building a microservices platform in a monorepo. We have 8 services: > auth, payments, notifications, API gateway, analytics, users, products, > and orders. I need specialists for each service domain. ``` - ``` Here's your team: - 🏗️ Michael — Lead Scope, decisions, code review 🔐 Fredo — Auth Specialist Auth service, JWT, sessions 💳 Sonny — Payments Dev Payment service, Stripe, billing @@ -62,27 +46,19 @@ Here's your team: 🛒 Tessio — Orders Dev Order service, checkout, fulfillment 🧪 Hagen — Tester Tests across all services 📋 Scribe — (silent) Memory, decisions, session logs - Look right? I can adjust if you want fewer specialists. ``` - That's a **10-agent team** (9 active + Scribe). You can trim it if you want: - ``` > That's too many. Let's have 3 domain specialists: backend (auth, payments, > users), frontend (gateway, notifications), and infra (analytics, products, > orders). Plus a Lead and Tester. ``` - --- - ## 3. Routing Rules Direct Work to the Right Specialist - Edit `.squad/routing.md`: - ```markdown # Routing Rules - **Auth service work** → Fredo **Payment service work** → Sonny **Notification service work** → Tom @@ -91,41 +67,29 @@ Edit `.squad/routing.md`: **User service work** → Vito **Product service work** → Clemenza **Order service work** → Tessio - **Shared utilities** → Michael (Lead decides who takes it) **Infrastructure changes** → Michael **Cross-service refactoring** → Michael **Testing** → Hagen ``` - Now when you give a task: - ``` > Add two-factor authentication to the auth service. ``` - The coordinator routes to Fredo (auth specialist). **Only Fredo** loads the auth service code. - ``` > Add Stripe subscription management to the payment service. ``` - Routes to Sonny (payments specialist). - --- - ## 4. Worktree Awareness — Multiple Services Simultaneously - You can work on multiple services at once: - ``` > Team, we're adding a loyalty points feature. This spans three services: > users (store points balance), orders (award points on purchase), > and products (display points earned per product). ``` - Squad decomposes and routes: - ``` 🏗️ Michael — coordinating cross-service feature 👤 Vito — adding points balance to user service @@ -133,22 +97,14 @@ Squad decomposes and routes: 📦 Clemenza — displaying points in product service 🧪 Hagen — writing integration tests across services ``` - All four work in parallel, each in their own service directory. - --- - ## 5. Skills That Span Services - Some patterns apply **across all services**: - `.copilot/skills/service-logging-pattern.md`: - ```markdown # Service Logging Pattern - All services use structured logging with Winston. - Log format: { "timestamp": "ISO 8601", @@ -157,115 +113,79 @@ Log format: "message": "human-readable message", "context": { /* additional fields */ } } - Every service must log: - Request IDs for tracing - User IDs (if authenticated) - Error stack traces ``` - This skill is read by **all agents**, regardless of which service they're working on. Consistent logging across the monorepo. - `.copilot/skills/inter-service-communication.md`: - ```markdown # Inter-Service Communication - Services communicate via HTTP REST APIs (synchronous) or RabbitMQ messages (asynchronous events). - Rules: - Never import code from another service - Use the service's public API only - All inter-service calls must have timeouts and retries - Use circuit breakers for downstream failures ``` - Agents know: **don't tightly couple services**. - --- - ## 6. Shared Code in `/shared` - The `/shared` directory has utilities, types, and config used by all services: - ``` > Kay, refactor the rate limiting utility in /shared/utils/rate-limit.ts. > This is used by 5 services, so be careful. ``` - ``` 🌐 Kay — refactoring rate limiting utility in /shared - Kay is checking which services import this utility before changing it. ``` - Kay knows changes to `/shared` affect multiple services. - --- - ## 7. Sample Prompts for Monorepo Workflows - **Cross-service feature:** - ``` > Team, we're adding real-time notifications. This requires: > notification-service (WebSocket server), api-gateway (WebSocket proxy), > and user-service (notification preferences). Split the work. ``` - **Service-specific task:** - ``` > Sonny, add support for Stripe payment intents in the payment service. > Don't touch other services. ``` - **Shared utility change:** - ``` > Michael, we need to update the /shared/types/User.ts type. > This affects auth, users, and orders services. Coordinate the change. ``` - **Infrastructure change:** - ``` > All services need to switch from Winston to Pino for logging. > Team, update each service. Use the same Pino config across all services. ``` - **Integration test:** - ``` > Hagen, write an integration test for the checkout flow. It spans > orders, payments, and notifications services. ``` - **Explore a new service:** - ``` > Clemenza, review the product service. We haven't touched it in weeks. > Tell me what's there and what needs work. ``` - --- - ## 8. One Squad, Not Eight - **Why one squad instead of one per service?** - - **Shared knowledge.** Patterns that span services (logging, auth, error handling) are encoded once in skills. - **Cross-service coordination.** Michael (the Lead) sees the whole monorepo, not just one service. - **Fewer exports/imports.** You don't have to export/import squads between services. - **Consistent conventions.** All services follow the same patterns because the same agents work on them. - If you had 8 separate squads, they'd diverge. One squad keeps the monorepo aligned. - --- - ## Tips - - **One squad per monorepo.** Add specialists per service, keep them in one team. - **Routing rules are critical.** Route work to the right specialist so agents don't wander. - **Skills that span services are gold.** Logging, error handling, API conventions — document once, use everywhere. diff --git a/docs/src/content/docs/scenarios/new-project.md b/docs/src/content/docs/scenarios/new-project.md index ebaec5e21..d303380ff 100644 --- a/docs/src/content/docs/scenarios/new-project.md +++ b/docs/src/content/docs/scenarios/new-project.md @@ -1,36 +1,26 @@ # Starting a New Project with Squad - **Try this to bootstrap a modern web app:** ``` Set up Squad for a new React + Node.js app ``` - **Try this to initialize with deployment target:** ``` Initialize a team for a task management app that can run in Azure Web App PaaS ``` - Start from zero: new directory, new repo, full team in under a minute. Squad analyzes your description and assembles the right specialists. - --- - ## 1. Create the Repo - ```bash mkdir my-app && cd my-app git init ``` - --- - ## 2. Install Squad - ```bash npm install -g @bradygaster/squad-cli cd ~/projects/new-project squad init ``` - ``` ✅ .github/agents/squad.agent.md (v0.2.0) ✅ .github/workflows/ (10 workflows) @@ -38,83 +28,57 @@ squad init ✅ .ai-team/skills/ (starter skills) ✅ .ai-team/ceremonies.md ✅ .gitattributes (merge=union rules) - Squad is ready. - Next steps: 1. Open Copilot: copilot 2. Select Squad from the /agent list (CLI) or /agents (VS Code) 3. Tell it what you're building ``` - --- - ## 3. Open Copilot and Describe Your Project - ```bash copilot ``` - Select **Squad** from the `/agent` list (CLI) or `/agents` (VS Code). Squad greets you: - ``` Hey Jordan, what are you building? ``` - Describe the project — language, stack, what it does: - ``` > I'm building a CLI tool in Go that monitors AWS costs and sends > Slack alerts when spending exceeds thresholds. ``` - --- - ## 4. Confirm the Team - Squad proposes a team. Names come from a fictional universe: - ``` Here's your team: - 🏗️ Danny — Lead Scope, decisions, code review 🔧 Rusty — Backend Dev Go, AWS SDK, infrastructure ⚛️ Linus — Frontend Dev CLI interface, output formatting 🧪 Basher — Tester Tests, edge cases, quality 📋 Scribe — (silent) Memory, decisions, session logs - Look right? Say yes, add someone, or change a role. (Or just give me a task to start!) ``` - You can adjust the team or skip straight to work (which is implicit confirmation): - ``` > Yes. Rusty, set up the Go module and project structure. ``` - Squad creates `.ai-team/` (roster, charters, histories, routing rules, casting state) and spawns Rusty: - ``` 🔧 Rusty — setting up Go module and project structure - Rusty is on it. ``` - --- - ## 5. Give a Bigger Task — Parallel Fan-Out - Use the word "team" to trigger parallel work: - ``` > Team, build the cost monitoring feature. We need an AWS client > that pulls Cost Explorer data and a Slack notifier that sends > alerts when thresholds are exceeded. ``` - Squad decomposes the work and launches agents simultaneously: - ``` 🏗️ Danny — reviewing requirements, defining interfaces 🔧 Rusty — building AWS Cost Explorer client @@ -122,48 +86,33 @@ Squad decomposes the work and launches agents simultaneously: 🧪 Basher — writing test cases from requirements 📋 Scribe — logging session ``` - All five agents work at the same time in separate context windows. - --- - ## 6. Check Decisions - After agents finish, see what they decided: - ``` > Show me the decisions ``` - ``` ### 2025-07-15: AWS Cost Explorer polling interval **By:** Rusty **What:** Poll every 6 hours via cron, not real-time **Why:** Cost Explorer data updates ~3x/day, more frequent polling wastes API calls - ### 2025-07-15: Slack message format **By:** Linus **What:** Use Block Kit for alert messages with cost breakdown table **Why:** Rich formatting, actionable buttons for drill-down ``` - -Every agent reads these decisions before their next task. As the list grows, the team self-aligns. - +Every agent reads these decisions before their next task. As the list grows, the team retains context so you don't need to restate decisions. --- - ## 7. Commit Your Team - ```bash git add .ai-team/ .ai-team-templates/ .github/ .gitattributes git commit -m "Add Squad team" ``` - Commit `.ai-team/` — it's your team's brain. Anyone who clones the repo gets the full team with all their accumulated knowledge. - --- - ## Tips - - **First session is the slowest.** Agents have no history yet. After 2–3 sessions, they know your conventions and stop asking redundant questions. - **Agents improve over sessions.** Each agent appends what it learned to its `history.md`. By week 2, they know your file structure, naming patterns, and preferences. - **Say "team" for parallel work.** Naming a specific agent sends work to just that agent. diff --git a/docs/src/content/docs/scenarios/open-source.md b/docs/src/content/docs/scenarios/open-source.md index ad577c2ac..9179d7e9d 100644 --- a/docs/src/content/docs/scenarios/open-source.md +++ b/docs/src/content/docs/scenarios/open-source.md @@ -1,145 +1,102 @@ # Squad for Open Source Projects - **Try this to onboard as a contributor:** ``` Help me contribute to this open source project — review their CONTRIBUTING.md and set up a team ``` - **Try this to automate maintainer tasks:** ``` Enable auto-triage for incoming issues on my OSS repo ``` - **Try this to handle contributor-friendly tasks:** ``` -Handle good-first-issue #42 autonomously +Handle good-first-issue #42 in the background ``` - -Squad helps OSS maintainers with autonomous issue triage, contributor guidance, and architectural knowledge sharing. Reduces maintainer burden while keeping quality high. - +Squad helps OSS maintainers with automatic issue triage, contributor guidance, and architectural knowledge sharing. It reduces maintainer burden while keeping quality high. --- - ## 1. The Open Source Maintainer Problem - -Autonomous issue triage, community contributor support, and architectural knowledge sharing. - +Automatic issue triage, community contributor support, and architectural knowledge sharing. You maintain an open source project. Issues pile up. PRs from new contributors need guidance. Questions repeat. Triaging takes hours. - Squad helps: - **Triage incoming issues** automatically - **Guide contributors** with documented patterns -- **Handle good-first-issue tasks** autonomously -- **Keep architecture decisions visible** in `.ai-team/decisions.md` - +- **Handle good-first-issue tasks** in the background +- **Keep architecture decisions visible** in `.squad/decisions.md` --- - ## 2. Install Squad and Set Up Triage - ```bash cd ~/projects/my-open-source-lib npm install -g @bradygaster/squad-cli squad init ``` - Enable the Ralph heartbeat workflow: - ```bash cp .ai-team-templates/squad-heartbeat.yml .github/workflows/ git add .github/workflows/squad-heartbeat.yml git commit -m "Enable Squad auto-triage" git push ``` - Ralph now runs every 6 hours, reading new issues and applying labels: - ``` Issue #142: "Add support for custom themes" → squad:trinity (frontend work) → type:feature → priority:medium - Issue #143: "Documentation typo in README" → squad:scribe (docs work) → type:docs → priority:low → good-first-issue ``` - --- - ## 3. Community Contributors File Issues, Squad Triages - A contributor files an issue: - ``` Issue #144: "Add TypeScript type definitions" ``` - Ralph (heartbeat workflow) reads the issue, applies: - `squad:morpheus` (backend/tooling work) - `type:feature` - `priority:high` - `good-first-issue` (if it's suitable) - You review triaged issues and add `go:morpheus` if you approve. - --- - -## 4. @copilot Picks Up Good-First-Issue Tasks Autonomously - +## 4. @copilot Picks Up Good-First-Issue Tasks in the Background Enable the auto-assign workflow: - ```bash cp .ai-team-templates/copilot-auto-assign.yml .github/workflows/ git add .github/workflows/copilot-auto-assign.yml git commit -m "Enable Squad auto-assign" git push ``` - When you add `go:morpheus` to issue #144, GitHub Actions triggers: - ``` GitHub Actions → Copilot session spawns Morpheus Morpheus reads issue #144 → implements TypeScript definitions Morpheus opens PR #145 → "Add TypeScript type definitions" ``` - You review PR #145, approve, merge. Issue #144 closed. - **The contributor filed the issue. Squad handled it.** - --- - ## 5. Skills Document Your Project's Patterns - -After Squad works on your project for a few weeks, `.ai-team/skills/` becomes a **living contributor guide**: - -`.ai-team/skills/testing-conventions.md`: - +After Squad works on your project for a few weeks, `.squad/skills/` becomes a **living contributor guide**: +`.squad/skills/testing-conventions.md`: ```markdown # Testing Conventions - All new features must include tests. Use Jest for unit tests. - Test file naming: `{module}.test.ts` Test structure: describe → it blocks Mock external dependencies with `jest.mock()` ``` - Contributors can **read this file** to understand your testing norms. No need to repeat it in every PR review. - -`.ai-team/skills/api-design-patterns.md`: - +`.squad/skills/api-design-patterns.md`: ```markdown # API Design Patterns - All API endpoints follow RESTful conventions: - GET for read operations - POST for create - PUT for full update - PATCH for partial update - DELETE for removal - Use HTTP status codes correctly: - 200 OK for success - 201 Created for resource creation @@ -147,131 +104,91 @@ Use HTTP status codes correctly: - 404 Not Found for missing resources - 500 Internal Server Error for server issues ``` - **These skills are contributor documentation** that stays up to date because agents use them. - --- - ## 6. Decisions.md is Your Architecture Decision Record (ADR) - -`.ai-team/decisions.md` becomes your **public ADR**: - +`.squad/decisions.md` becomes your **public ADR**: ```markdown ### 2025-07-10: Use esbuild instead of Webpack **By:** Neo **What:** Migrated build system from Webpack to esbuild **Why:** 10x faster builds, simpler config, better DX for contributors - ### 2025-07-12: Stick with CommonJS for now **By:** Neo **What:** Not migrating to ESM yet **Why:** Too many compatibility issues with downstream tools Will revisit in 6 months - ### 2025-07-15: Use Zod for runtime validation **By:** Morpheus **What:** All API input validation uses Zod schemas **Why:** Type-safe, composable, generates TypeScript types ``` - Contributors see **why you made decisions**, not just what the code does. - --- - ## 7. Export Your Squad for Forks - When someone forks your project, they can **import your squad**: - ```bash squad export ``` - Share `squad-export-{date}.zip` in your repo's releases or documentation. - Forkers import it: - ```bash git clone https://github.com/forker/my-lib-fork.git cd my-lib-fork squad import squad-export-2025-07-15.zip ``` - Now they have **your team's knowledge** — skills, decisions, conventions. They're not starting from scratch. - --- - ## 8. Sample Prompts for Open Source Workflows - **Triage a batch of issues:** - ``` > Ralph, triage the 15 newest issues. Apply squad labels based on > routing rules. Flag any that are duplicates or need clarification. ``` - -**Check which issues are ready for autonomous work:** - +**Check which issues are ready for background work:** ``` > Show me all issues labeled good-first-issue and squad:morpheus. -> Which ones are clear enough for Morpheus to handle autonomously? +> Which ones are clear enough for Morpheus to handle in the background? ``` - -**Autonomous issue processing:** - +**Background issue processing:** ``` > Issue #152 is labeled go:morpheus. Morpheus, implement the feature, > write tests, and open a PR. ``` - **Generate contributor documentation from skills:** - ``` > Scribe, create a CONTRIBUTING.md file based on our accumulated > skills and conventions. Include testing patterns, code style, > and PR guidelines. ``` - **Review contributor PRs:** - ``` > Neo, review PR #160 from @contributor. Check if it follows our > conventions (skills, decisions). If not, suggest changes. ``` - **Handle a repeat question:** - ``` > Issue #175 is asking how to add a custom validator again. > Scribe, write a skill file for this so we can point future > contributors to it. ``` - --- - ## 9. Label Your Repo as Squad-Enabled - Add a badge to your README: - ```markdown ## Contributing - This project uses [Squad](https://github.com/bradygaster/squad) for AI-assisted development. - - **Triaging:** Issues are auto-labeled by Squad's Ralph agent -- **Patterns:** See `.ai-team/skills/` for coding conventions -- **Decisions:** See `.ai-team/decisions.md` for architectural rationale +- **Patterns:** See `.squad/skills/` for coding conventions +- **Decisions:** See `.squad/decisions.md` for architectural rationale - **Import the squad:** `squad import squad-export.zip` ``` - Contributors know what to expect. - --- - ## Tips - - **Ralph triages issues for you.** Run the heartbeat workflow every 6 hours to auto-label new issues. -- **Skills are living contributor docs.** As your squad learns, `.ai-team/skills/` becomes a knowledge base contributors can read. +- **Skills are living contributor docs.** As your squad learns, `.squad/skills/` becomes a knowledge base contributors can read. - **Decisions.md is your ADR.** Architectural decisions are visible and explained, not hidden in Git history. - **Export your squad for forks.** Forkers get your team's accumulated knowledge — skills, conventions, decisions. -- **good-first-issue + go:* = autonomous processing.** Mark issues as safe to auto-process, and Squad handles them. +- **good-first-issue + go:* = background processing.** Mark issues as safe to auto-process, and Squad handles them. - **Agents don't merge without approval.** PRs created by agents still require human review before merging. diff --git a/docs/src/content/docs/scenarios/remote-qa.md b/docs/src/content/docs/scenarios/remote-qa.md index 944c31373..5076935ca 100644 --- a/docs/src/content/docs/scenarios/remote-qa.md +++ b/docs/src/content/docs/scenarios/remote-qa.md @@ -1,85 +1,49 @@ -# Remote Q&A with Squad - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - -**Try this:** -``` -@copilot How does authentication work in this project? -``` - -You don't always have the repo cloned locally. Sometimes you want to ask your Squad a question from the browser, the GitHub CLI, or a mobile device — without pulling code. - ---- - -## Current options - -Squad already supports several remote interaction paths. Each trades off convenience, depth, and setup effort. - -### 1. Copilot Chat with squad.agent.md - -If the repo has `.github/agents/squad.agent.md`, GitHub Copilot Chat reads it automatically when you ask questions about the repo. - -**How it works:** -- Open the repo in GitHub.com -- Use Copilot Chat in the browser -- Copilot reads the agent file and answers using your team's context - -**Good for:** Quick questions about architecture, team structure, and project conventions. - -**Limitation:** Copilot reads the default branch only. You can't point it at a feature branch. - -### 2. Assign an issue to @copilot - -Create a GitHub issue and assign it to `@copilot`. If the repo has Squad's issue-assign workflow (`.github/workflows/squad-issue-assign.yml`), the coding agent picks up the issue and works it using your Squad configuration. - -**How it works:** -1. Create an issue describing the question or task -2. Assign it to `@copilot` -3. The workflow triggers and Squad processes it - -**Good for:** Tasks that need code changes, research across files, or multi-step investigation. - -**Limitation:** Designed for work items, not conversational Q&A. The workflow runs against the default branch. - -### 3. Use `squad:` labels on issues - -Add a `squad:{member}` label to any issue, and Squad routes it to the right team member. - -**How it works:** -1. Create or label an issue with `squad:fenster` (or any member name) -2. The triage workflow assigns it to the appropriate agent -3. Work proceeds through the normal Squad flow - -**Good for:** Routing specific work to specific team members without cloning. - -**Limitation:** Requires label setup on the repo. Routes work, not questions. - ---- - -## What's not supported yet - -These features don't exist today but would make remote Q&A more powerful: - -### Branch-aware queries - -All current remote paths read the default branch. You can't ask "How does auth work on the `feature/oauth` branch?" and get branch-specific answers. - -**Workaround:** Mention the branch in your question and ask the agent to check out that branch during investigation. - -### GitHub Discussions integration - -A Discussions-based Q&A channel where Squad monitors and answers questions would make remote interaction feel conversational. This would need a new workflow trigger on `discussion` events. - -### Issue comment commands - -A `/squad ask "question"` command in issue comments that triggers Squad to respond inline would enable threaded Q&A without creating new issues. - ---- - -## Tips - -- **Start with Copilot Chat.** It's the lowest-effort path and works today for repos with `squad.agent.md`. -- **Use issues for anything that needs code.** Copilot Chat answers questions; issues drive work. -- **Include context in your question.** Remote paths don't have your local state. Be specific about which files, features, or branches you mean. -- **Check the default branch.** All remote paths currently read `main` (or whatever the repo's default branch is). If you're asking about unreleased work, mention the branch explicitly. +# Remote Q&A with Squad +**Try this:** +``` +@copilot How does authentication work in this project? +``` +You don't always have the repo cloned locally. Sometimes you want to ask your Squad a question from the browser, the GitHub CLI, or a mobile device — without pulling code. +--- +## Current options +Squad already supports several remote interaction paths. Each trades off convenience, depth, and setup effort. +### 1. Copilot Chat with squad.agent.md +If the repo has `.github/agents/squad.agent.md`, GitHub Copilot Chat reads it automatically when you ask questions about the repo. +**How it works:** +- Open the repo in GitHub.com +- Use Copilot Chat in the browser +- Copilot reads the agent file and answers using your team's context +**Good for:** Quick questions about architecture, team structure, and project conventions. +**Limitation:** Copilot reads the default branch only. You can't point it at a feature branch. +### 2. Assign an issue to @copilot +Create a GitHub issue and assign it to `@copilot`. If the repo has Squad's issue-assign workflow (`.github/workflows/squad-issue-assign.yml`), the coding agent picks up the issue and works it using your Squad configuration. +**How it works:** +1. Create an issue describing the question or task +2. Assign it to `@copilot` +3. The workflow triggers and Squad processes it +**Good for:** Tasks that need code changes, research across files, or multi-step investigation. +**Limitation:** Designed for work items, not conversational Q&A. The workflow runs against the default branch. +### 3. Use `squad:` labels on issues +Add a `squad:{member}` label to any issue, and Squad routes it to the right team member. +**How it works:** +1. Create or label an issue with `squad:fenster` (or any member name) +2. The triage workflow assigns it to the appropriate agent +3. Work proceeds through the normal Squad flow +**Good for:** Routing specific work to specific team members without cloning. +**Limitation:** Requires label setup on the repo. Routes work, not questions. +--- +## What's not supported yet +These features don't exist today but would make remote Q&A more powerful: +### Branch-aware queries +All current remote paths read the default branch. You can't ask "How does auth work on the `feature/oauth` branch?" and get branch-specific answers. +**Workaround:** Mention the branch in your question and ask the agent to check out that branch during investigation. +### GitHub Discussions integration +A Discussions-based Q&A channel where Squad monitors and answers questions would make remote interaction feel conversational. This would need a new workflow trigger on `discussion` events. +### Issue comment commands +A `/squad ask "question"` command in issue comments that triggers Squad to respond inline would enable threaded Q&A without creating new issues. +--- +## Tips +- **Start with Copilot Chat.** It's the lowest-effort path and works today for repos with `squad.agent.md`. +- **Use issues for anything that needs code.** Copilot Chat answers questions; issues drive work. +- **Include context in your question.** Remote paths don't have your local state. Be specific about which files, features, or branches you mean. +- **Check the default branch.** All remote paths currently read `main` (or whatever the repo's default branch is). If you're asking about unreleased work, mention the branch explicitly. diff --git a/docs/src/content/docs/scenarios/solo-dev.md b/docs/src/content/docs/scenarios/solo-dev.md index 340140480..9e07f6999 100644 --- a/docs/src/content/docs/scenarios/solo-dev.md +++ b/docs/src/content/docs/scenarios/solo-dev.md @@ -1,164 +1,104 @@ # Squad for Solo Developers - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this:** ``` I'm working alone on a side project — set up a minimal squad to help with code review and testing ``` - Squad gives solo devs what they're missing: code review, test coverage, and architectural second opinions. Start with 3 agents instead of 5. - --- - ## 1. Why Squad Works for Solo Devs - When you're working alone: - - **No code reviews.** Bugs land in production because no one else looked. - **No testing discipline.** You skip tests when you're in a hurry. - **No second opinion.** Architectural decisions go unchallenged. - Squad gives you a **Lead who reviews everything**, a **tester who catches edge cases**, and **specialists who know their domain**. You're one person, but you have a team that remembers everything. - --- - ## 2. Start Small: 3 Agents Instead of 5 - You don't need a full 5-agent team as a solo dev. Customize the roster: - ``` > I'm a solo developer building a Python API. I want a small team: > a Lead for code review, one backend dev, and a tester. No frontend dev. ``` - ``` Here's your team: - 🏗️ Michael — Lead Code review, scope, decisions 🔧 Fredo — Backend Dev Python, FastAPI, database work 🧪 Kay — Tester Tests, edge cases, quality 📋 Scribe — (silent) Memory, decisions, session logs - Look right? ``` - Lean team. Just what you need. - --- - ## 3. The Reviewer Protocol Catches Bugs - When you give a task to an agent: - ``` > Fredo, add user registration with email verification. ``` - Fredo builds it. **Then Michael (the Lead) automatically reviews the code.** - ``` 🏗️ Michael — reviewing Fredo's user registration implementation - Issues found: - Email validation regex allows invalid TLDs - Verification tokens aren't rate-limited — brute force risk - No test for expired token edge case - Rejected. Fredo, address these issues. ``` - This is the safety net you don't have as a solo dev. Michael catches issues before they land. - --- - ## 4. Skills Accumulate — You Don't Repeat Mistakes - After Michael rejects Fredo's code for missing rate limiting, **it gets encoded as a skill**: - `.copilot/skills/auth-rate-limiting.md`: - ```markdown # Authentication Endpoints Must Be Rate-Limited - When building user-facing auth flows (login, registration, password reset, email verification), always add rate limiting to prevent brute force attacks. - Use a token bucket or sliding window algorithm. Store counters in Redis or an in-memory cache with TTL. ``` - Next time any agent builds an auth feature, they read this skill first. The mistake doesn't happen again. - --- - ## 5. You're Solo, But Your Team Remembers Everything - As a solo dev, you juggle dozens of context switches. Squad doesn't forget: - - **Decisions** are logged. "Why did I use PostgreSQL instead of MongoDB?" — check `.squad/decisions.md`. - **Skills** capture patterns. "How do I structure FastAPI routes?" — check `.copilot/skills/`. - **Histories** track what each agent learned. Fredo knows your database schema after one session. - You get back to a project after 3 weeks, and the team is still up to speed. - --- - ## 6. Workflow: Solo Dev + Squad - ``` > Fredo, add a /users endpoint with pagination and filtering. ``` - Fredo builds it. Michael reviews. Kay writes tests. - ``` 🔧 Fredo — building /users endpoint with pagination and filtering 🏗️ Michael — reviewing Fredo's implementation 🧪 Kay — writing tests for /users endpoint ``` - All three work in parallel. You give one prompt, get three agents. - When it's done: - ``` ✅ /users endpoint implemented ✅ Code review passed (minor refactor for query optimization) ✅ 8 tests written (pagination, filtering, edge cases) ``` - You're a solo dev, but you shipped like a team. - --- - ## 7. Use the Lead as a Sounding Board - ``` > Michael, I'm deciding between REST and GraphQL for this API. > It's a read-heavy app with a mobile client. Thoughts? ``` - ``` 🏗️ Michael — evaluating REST vs GraphQL for your use case - REST pros: Simple, cacheable, mobile SDKs abundant GraphQL pros: Flexible queries, reduces round trips - For a read-heavy app with a mobile client, I'd lean REST. You get better HTTP caching (CDN, browser, mobile OS), simpler mobile integration, and fewer surprises at scale. - If your mobile app needs highly dynamic queries, revisit GraphQL. ``` - Michael's not always right, but having a second opinion is invaluable. - --- - ## Tips - - **Start with 3–4 agents.** You don't need a frontend and backend specialist if you're only building one layer. - **The Lead is your reviewer.** Use them to review your own code before committing. - **Skills are YOUR documentation.** After 10 sessions, `.copilot/skills/` is a custom knowledge base. diff --git a/docs/src/content/docs/scenarios/team-of-humans.md b/docs/src/content/docs/scenarios/team-of-humans.md index 7f7e4dc89..dbeba9456 100644 --- a/docs/src/content/docs/scenarios/team-of-humans.md +++ b/docs/src/content/docs/scenarios/team-of-humans.md @@ -1,232 +1,157 @@ # Squad Alongside a Team of Human Developers - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - **Try this:** ``` My team is all human devs — I just want Squad to help with coordination and memory ``` - Mixed AI + human teams: AI agents handle implementation, humans review and make critical decisions. Route work to humans just like AI agents. - --- - ## 1. Add Human Members to the Roster - Tell Squad about your human team members: - ``` > Add two human developers to the roster: > Sarah (senior backend engineer) and Jamal (frontend lead). > They're not AI agents, they're on my team. ``` - ``` ✅ Added human members: 👤 Sarah — Senior Backend Engineer 👤 Jamal — Frontend Lead - Humans appear in the roster but don't get charters. Work can be routed to them via routing rules. ``` - Squad adds them to `.squad/team.md`: - ```markdown ## Human Team Members - - **Sarah** — Senior Backend Engineer - **Jamal** — Frontend Lead ``` - --- - ## 2. Routing Rules for Mixed Teams - Edit `.squad/routing.md` to route work to humans: - ```markdown # Routing Rules - **Backend architecture decisions** → Sarah (human) **Frontend architectural changes** → Jamal (human) **Payment integration work** → Sarah (human) **UI component library changes** → Jamal (human) - **Backend implementation tasks** → Morpheus (AI agent) **Frontend implementation tasks** → Trinity (AI agent) **Code review** → Neo (AI agent) **Testing** → Tank (AI agent) ``` - Now when you give a task: - ``` > Team, we need to add a real-time notification system. > This is a big architectural change. ``` - The coordinator sees "architectural change" and routes to humans: - ``` 📋 Scribe — routing architectural decision to Sarah and Jamal - ⏸️ Coordinator paused. Waiting for Sarah and Jamal to decide on the notification architecture (WebSockets vs Server-Sent Events vs polling). Resume with their decision. ``` - The AI agents **don't start implementation** until the humans decide. - --- - ## 3. Humans Decide, AI Implements - Sarah and Jamal discuss and decide: WebSockets via Socket.IO. - You relay the decision: - ``` > Sarah and Jamal decided on WebSockets using Socket.IO. > Morpheus, implement the backend. Trinity, integrate on the frontend. ``` - ``` 🔧 Morpheus — building WebSocket server with Socket.IO ⚛️ Trinity — integrating Socket.IO client in React components - Both agents are reading Sarah and Jamal's decision from decisions.md. ``` - Scribe logs the decision: - ```markdown ### 2025-07-15: Real-time notifications via WebSockets **By:** Sarah, Jamal **What:** Use Socket.IO for real-time notifications **Why:** Need bi-directional communication, Socket.IO has better mobile client support than raw WebSockets ``` - AI agents read this before starting. They know the humans already decided. - --- - ## 4. AI Agents Work, Humans Review PRs - Agents create pull requests. Humans review them. - ``` 🏗️ Neo — submitting PR #47: WebSocket notification system - PR includes: - Backend: Socket.IO server integration - Frontend: React component with socket connection - Tests: 12 new tests for connection lifecycle - Docs: Socket.IO event protocol - Requesting review from Sarah and Jamal. ``` - Sarah and Jamal review the PR on GitHub. If they request changes, relay that back: - ``` > Sarah flagged an issue in PR #47: the socket reconnection logic > doesn't handle token expiration. Morpheus, fix it. ``` - ``` 🔧 Morpheus — fixing socket reconnection to handle token expiration ``` - Morpheus pushes a fix. The humans approve. Merge. - --- - ## 5. Pausing the Coordinator - When work routes to a human, the coordinator **pauses** — it doesn't guess what the human will decide. - ``` > Team, should we migrate from REST to GraphQL? ``` - ``` 📋 Scribe — routing API architecture decision to Sarah - ⏸️ Coordinator paused. This is a significant architectural change. Waiting for Sarah's input before proceeding. ``` - You ask Sarah. She says no, stick with REST. - ``` > Sarah said no, we're staying with REST. Document that. ``` - ``` 📋 Scribe — logged decision - ### 2025-07-15: Staying with REST **By:** Sarah **What:** Not migrating to GraphQL **Why:** Current REST API works well, migration cost isn't justified ``` - --- - ## 6. Use Agents for the Tedious Work - Humans handle high-level decisions and code review. Agents handle implementation, testing, and grunt work. - ``` > Tank, we just merged PR #47. Run the full test suite and check > for regressions. If you find any, fix them. ``` - ``` 🧪 Tank — running full test suite Checking for regressions after WebSocket integration... ``` - Tank finds a broken test, fixes it, commits. Sarah doesn't have to. - --- - ## 7. Sample Prompts for Mixed Teams - **Route a decision to a human:** - ``` > This payment gateway change is sensitive. Route it to Sarah > for approval before implementing. ``` - **Delegate implementation after human approval:** - ``` > Sarah approved the Stripe integration plan. Morpheus, implement it. > Follow the plan Sarah outlined in issue #23. ``` - **Have an agent assist a human:** - ``` > Jamal is working on the new dashboard UI. Trinity, help him by > building the data-fetching hooks and TypeScript types. ``` - **Agent-led PR, human review:** - ``` > Neo, create a PR for the caching layer work. Assign it to Sarah > for review. ``` - --- - ## Tips - - **Humans in the roster, not as agents.** Humans don't get charters or histories, but they appear in routing rules. - **Use routing rules to protect critical paths.** Route payment logic, security changes, and architectural decisions to humans. - **Agents don't guess.** If a task routes to a human, the coordinator pauses until you relay the human's decision. diff --git a/docs/src/content/docs/scenarios/team-state-storage.md b/docs/src/content/docs/scenarios/team-state-storage.md index a2dbe4057..481d1f14b 100644 --- a/docs/src/content/docs/scenarios/team-state-storage.md +++ b/docs/src/content/docs/scenarios/team-state-storage.md @@ -1,329 +1,135 @@ -# Keeping Your Squad Where You Want It - -Your `.ai-team/` directory contains everything—team rosters, skills, decisions, agent histories. The question isn't whether to track it, but *how* and *where* to track it. Here are the real options, with honest tradeoffs. - +# Keeping Squad State Where You Want It +Squad's current state directory is **`.squad/`**, not `.ai-team/`. +If you're deciding where team state should live, there are two different mechanisms in the product today: +1. **External state location** — `squad externalize` / `squad internalize` +2. **State backends** — `local`, `orphan`, and `two-layer` +Those shipped mechanisms are the options this page focuses on. --- - -## 1. Committed to Main (The Default) - -**What it is:** `.ai-team/` is tracked in git, committed alongside your code. Anyone who clones the repo gets the full team with all accumulated knowledge. - -```bash -git add .ai-team/ -git commit -m "Add Squad team" -``` - -### Pros - -- **Simplest setup.** No configuration, no branching strategy. -- **Portable.** Clone the repo anywhere, and the team knowledge travels with it. -- **Shared context.** Every collaborator sees the same team definitions, skills, and decisions. -- **Git history.** You can trace how decisions evolved, view old skills, recover deleted files. -- **GitHub Actions work out of the box.** Workflows (heartbeat, triage, label sync) access `.ai-team/` immediately. - -### Cons - -- **PR noise.** Every team change—new skill, updated decision, agent history—shows up in PR diffs. Some people find this distracting. -- **`decisions.md` grows.** Over time, your decisions file accumulates hundreds of entries. Git history is there, but the current file gets long. -- **Some orgs don't allow it.** Enterprise policies sometimes forbid AI artifacts in source repositories. Check before you commit. - -### When to Use This - -- Solo dev or small team, private repo. -- Open source project—your contributors should see how the team works. -- You want maximum portability and zero configuration. - +## What ships today +| Option | How you enable it | What it stores | Best fit | +|---|---|---|---| +| **Local working tree** | default | `.squad/` files in the repo | simplest workflow | +| **External state location** | `squad externalize` | mutable `.squad/` state in a platform-specific app-data directory | branch-switch safety without Git plumbing | +| **Orphan backend** | `squad init --state-backend orphan` or `squad upgrade --state-backend orphan` | mutable state on the `squad-state` orphan branch | clean working tree, Git history | +| **Two-layer backend** | `squad init --state-backend two-layer` or `squad upgrade --state-backend two-layer` | durable state on `squad-state`, plus best-effort git notes annotations | recommended team backend | +> `stateBackend: "external"` is **not** a real backend today. The SDK accepts the value for compatibility, warns that it is a stub, and falls back to `local`. --- - -## 2. Gitignored (Local-Only) - -**What it is:** Add `.ai-team/` to `.gitignore`. Team state lives locally on each dev machine, never committed. - -```bash -echo ".ai-team/" >> .gitignore -git add .gitignore -git commit -m "Gitignore squad team state" -``` - -### Pros - -- **Zero repo noise.** No PR diffs, no git history clutter. -- **No policy concerns.** Enterprise orgs with AI artifact policies sleep easy. -- **Clean main branch.** Code and team are completely separated. - -### Cons - -- **Team knowledge is not portable.** If you delete `.ai-team/`, it's gone. No git history to recover it. -- **Collaborators don't share state.** Your teammate clones the repo and gets a fresh, empty `.ai-team/`. Their team doesn't match yours. -- **No git history for recovery.** You can't `git log` to find an old decision or see when a skill was added. -- **⚠️ GitHub Actions workflows can't access `.ai-team/`.** Actions only see committed files. Triage routing rules in `team.md` won't work in CI/CD. (Label sync and other API-based workflows still function, but the team-based routing logic is silent.) - -### When to Use This - -- Team doesn't need shared state (unlikely for Squad). -- Enterprise policy strictly forbids AI artifacts in repos. -- You're experimenting and don't want to commit yet. - +## 1. Local working tree (default) +This is the default behavior. Squad reads and writes regular files under `.squad/` in your working tree. +**Pros** +- Easiest to understand and inspect +- Works well when you want team state committed with the repo +- No special Git setup required +**Cons** +- State can show up in diffs and PRs +- Uncommitted state is vulnerable to branch switches and cleanup commands +- Shared editing can create merge conflicts in files like `decisions.md` +**Good fit when** you want the repo itself to be the source of truth. --- - -## 3. Separate Branch (e.g., `squad-state`) - -**What it is:** Keep `.ai-team/` on a dedicated branch (`squad-state`, `team-config`, etc.), not on `main`. Use `git worktree` to mount it locally. - -### Setup - -```bash -# Create and push the squad-state branch (if it doesn't exist) -git checkout --orphan squad-state -git rm -rf . -echo "# Squad State Branch\nThis branch tracks .ai-team/ configuration." > README.md -git add README.md -git commit -m "Initial squad-state branch" -git push origin squad-state - -# Back on main -git checkout main - -# Mount squad-state in a worktree -git worktree add .ai-team-worktree squad-state -ln -s .ai-team-worktree/.ai-team .ai-team -git add .gitignore -echo ".ai-team-worktree/" >> .gitignore -git commit -m "Add squad worktree" -``` - -On Windows: - +## 2. External state location (`squad externalize`) +External state location moves mutable state out of the working tree and into a platform-specific Squad home directory. ```bash -# Use mklink instead of ln -s (requires admin or Developer Mode) -git worktree add .ai-team-worktree squad-state -mklink /D .ai-team .ai-team-worktree\.ai-team +squad externalize ``` - -### Pros - -- **Clean main branch.** `.ai-team/` never appears in `main` or in PR diffs. -- **Full git history.** The `squad-state` branch has complete history of all team changes. -- **Shareable with collaborators.** They can check out `squad-state` and pull your team setup. -- **GitHub Actions can access it.** Workflows can check out both `main` and `squad-state` if needed. - -### Cons - -- **Complex setup.** Requires knowledge of `git worktree` and branch management. -- **Merge conflicts.** If multiple people work on `squad-state` simultaneously, conflicts happen. -- **Worktree management overhead.** You need to remember to update the worktree, and it can get stale. -- **Collaborators must set up the worktree.** They can't just clone; they need to run the setup commands. - -### When to Use This - -- Team that values clean main branch but wants shared team state. -- You want full git history of team evolution but don't want PR noise. -- You're already comfortable with `git worktree` or branching strategies. - +What actually happens today: +- Mutable `.squad/` entries are copied to the external directory +- Local-only bootstrap files such as `.squad/config.json`, `manifest.json`, `workstreams.json`, `upstream.json`, `squad-registry.json`, and `_upstream_repos/` stay local +- Squad ensures **`.squad/config.json`** is in `.gitignore` +- `squad internalize` copies the externalized entries back, but does **not** remove the `.gitignore` entry +**Pros** +- Keeps mutable team state out of PRs +- Branch switches no longer destroy the externalized state +- No orphan branch or Git hooks required +**Cons** +- The external directory is machine-local unless you back it up yourself +- Not every file under `.squad/` moves out; bootstrap metadata stays local +- This is separate from the `stateBackend` system +**Good fit when** you want clean code branches but do not need Git-native history for team state. +See also: [External State Storage](/squad/docs/features/external-state/). --- - -## 4. Git Submodule - -**What it is:** `.ai-team/` as a separate Git repository, added as a submodule to your main repo. - -### Setup - -```bash -# Create a separate repository for your squad (e.g., on GitHub) -# Then add it as a submodule -git submodule add https://github.com/you/my-squad-state .ai-team -git commit -m "Add squad state as submodule" -git push -``` - -Collaborators clone with: - -```bash -git clone --recurse-submodules https://github.com/you/my-project -``` - -Or after cloning normally: - +## 3. Orphan backend (`squad-state` branch) +The **orphan** backend stores mutable state on a dedicated `squad-state` branch using Git plumbing commands. The branch is never checked out as your working branch. ```bash -git submodule init -git submodule update +squad init --state-backend orphan +# or +squad upgrade --state-backend orphan ``` - -### Pros - -- **Completely separate history.** The submodule repo has its own git log, independent of your main project. -- **Shareable across repos.** Use the same submodule in multiple projects. -- **Clean main branch.** `.ai-team/` is external; no PR diffs in your project. -- **Full git features.** Submodule repo has branches, tags, and full history. - -### Cons - -- **Submodules are complex.** Widely disliked by the git community. Conflicts, merge issues, and confusion are common. -- **Everyone must remember `--recurse-submodules`.** Collaborators who forget get an empty `.ai-team/` directory. -- **CI/CD needs extra setup.** Your workflows must initialize submodules explicitly: - ```bash - git submodule init && git submodule update - ``` -- **Updating the submodule can cause conflicts.** If two people push to the submodule simultaneously, merging back is painful. - -### When to Use This - -- You're already using submodules elsewhere in your org (they're familiar with the pain). -- You want to share the same squad configuration across 3+ repositories. -- Your team is comfortable with advanced git workflows. - -**Honest take:** Submodules work, but the git community almost universally dislikes them. They're powerful tools for specific use cases, but most teams regret using them. Only reach for submodules if you truly need them. - +What the SDK actually ships: +- `OrphanBranchBackend` stores files as blobs on `squad-state` +- Reads use Git object lookups such as `git show squad-state:` +- Writes create commits on the orphan branch +- The CLI installs Git hooks to help keep the branch synchronized +**Pros** +- Clean working tree +- Full Git history for squad state +- Easy to inspect with normal Git commands +**Cons** +- More Git machinery than local or external state +- Single-writer coordination is still helpful during concurrent updates +**Good fit when** you want Git-versioned squad state without mixing it into normal code commits. --- - -## 5. Symlink to External Directory - -**What it is:** Keep `.ai-team/` somewhere else on your filesystem (e.g., `~/my-squads/my-project-squad/`), then symlink it into your repo. - -### Setup - -On macOS/Linux: - -```bash -mkdir -p ~/my-squads/my-project-squad -ln -s ~/my-squads/my-project-squad .ai-team -``` - -On Windows (requires admin or Developer Mode): - -```bash -mkdir C:\Users\you\my-squads\my-project-squad -mklink /D .ai-team C:\Users\you\my-squads\my-project-squad -``` - -Add `.ai-team` to `.gitignore`: - -```bash -echo ".ai-team" >> .gitignore -``` - -### Pros - -- **Share state across repos.** Point multiple projects to the same squad directory. -- **No git noise.** The symlink itself isn't tracked; `.ai-team/` is ignored. -- **Maximum flexibility.** You can move the squad, reorganize it, or swap it out. - -### Cons - -- **Not portable.** Symlinks are machine-specific. Collaborators need the exact same filesystem layout or the symlink breaks. -- **Windows compatibility is fragile.** Symlinks on Windows require admin privileges or Developer Mode; many orgs disable this. -- **Easy to break.** If the external directory is deleted, the symlink points to nothing. -- **No git history.** Team state changes aren't tracked in your project repo; you're on your own for backups. - -### When to Use This - -- You maintain multiple repositories with the same squad. -- Everyone on your team has the same filesystem layout (rare in practice). -- You're on macOS/Linux and control your development environment. - -**Caveat:** This breaks for most teams sharing code. Collaborators' symlinks will be broken, external contractors can't participate, and CI/CD usually fails. - ---- - -## 6. Dev Branch Only (The Squad Project's Own Approach) - -**What it is:** `.ai-team/` is committed, but *only* on dev/feature branches. On `main`, it's gitignored. When you create a feature branch, you remove `.ai-team/` from `.gitignore` so the team travels with your work. - -### Setup - -On `main`: - -```bash -echo ".ai-team/" >> .gitignore -git add .gitignore -git commit -m "Ignore squad team on main" -``` - -When you start a feature branch: - +## 4. Two-layer backend (recommended for teams) +The **two-layer** backend combines the orphan branch with best-effort Git notes. ```bash -git checkout -b feature/my-feature -git rm .ai-team/ # if it exists from a previous branch -# Remove .ai-team/ from .gitignore -git edit .gitignore -# (remove the .ai-team/ line) -git add .gitignore -git commit -m "Track squad team on this branch" +squad init --state-backend two-layer +# or +squad upgrade --state-backend two-layer ``` - -Agents work with the full `.ai-team/` context while you develop. When you merge back to `main`, the PR shows the `.ai-team/` changes, but `main` stays clean. - -### Pros - -- **Clean main branch.** `main` is pure code, no squad artifacts. -- **Full context on feature branches.** Agents have the team history while you work. -- **Git history preserved.** Team changes are committed on feature branches and visible in git log. -- **Collaborators get team state.** Anyone checking out your feature branch gets `.ai-team/`. -- **GitHub Actions can work both ways.** On `main`, workflows use GitHub API (label sync, heartbeat). On feature branches, they can use team-based routing if needed. - -### Cons - -- **Merge conflicts when syncing branches.** If `main` has `.ai-team/` gitignored but your branch commits it, merging is messy. -- **Easy to forget the pattern.** Developers forget to remove `.ai-team/` from `.gitignore` when creating feature branches (or forget to add it back when switching back to `main`). -- **PR diffs include team changes.** PRs from feature branches show all `.ai-team/` modifications, which some teams find noisy. - -### When to Use This - -- Small team that's aware of the pattern. -- You want clean main but team context on feature branches. -- You're OK with remembering to toggle `.gitignore` per branch. - +What the SDK actually ships: +- `TwoLayerBackend` reads durable state from the orphan branch +- It also attempts note writes through `GitNotesBackend` for commit-scoped annotations +- If the notes layer fails, the durable orphan-layer write still succeeds +- `git-notes` as a standalone backend is deprecated and normalized to `two-layer` +**Pros** +- Clean working tree +- Durable, per-file state on `squad-state` +- Better team story than plain orphan or historical git-notes-only storage +**Cons** +- Most operationally complex option +- Requires Git repository semantics and hook setup +**Good fit when** multiple people or agents need a branch-safe, team-oriented backend. +See also: [State Backends](/squad/docs/features/state-backends/). --- - -## Decision Matrix - -| Scenario | Option | Why | -|----------|--------|-----| -| Solo dev, private repo | **1. Committed** | Simplest, portable, full history | -| Team, shared state, no PR concerns | **1. Committed** | Everyone gets same team | -| Team, clean main, no Actions workflows | **2. Gitignored** | No policy issues, no PR noise | -| Team, clean main, need Actions workflows | **3. Separate Branch** | Full history, shared state, Actions can access it | -| Multiple repos, same squad | **4. Submodule** or **5. Symlink** | Submodule if you need git; symlink if portable | -| Enterprise, AI artifact policy | **2. Gitignored** or **4. Submodule** | Keep AI stuff out of main repo | -| Open source | **1. Committed** | Contributors should see how the team works | - +## What the SDK exports today +If you're building on the SDK, there are two public surfaces to know about. +### State backend surface +From `@bradygaster/squad-sdk`, the current public backend API includes: +- `resolveStateBackend()` +- `WorktreeBackend` +- `GitNotesBackend` (kept for compatibility; standalone use is deprecated) +- `OrphanBranchBackend` +- `TwoLayerBackend` +- `StateBackendStorageAdapter` +- `verifyStateBackend()` +### Typed state facade surface +The `./state` export provides a typed facade over `.squad/` state, including: +- `SquadState` +- `AgentsCollection` +- `DecisionsCollection` +- `RoutingCollection` +- `TeamCollection` +- `SkillsCollection` +- `TemplatesCollection` +- `ConfigCollection` +- `LogCollection` +Use this when you want typed access to Squad state without dealing with raw file paths yourself. --- - -## Tips - -- **GitHub Actions and Gitignored `.ai-team/`:** If you choose option 2 (gitignore), remember that Actions workflows see committed files only. Label sync and heartbeat workflows (which use GitHub API) still work. But `squad.agent.md` triage rules won't see `.ai-team/decisions.md` during automated runs. Workaround: Copy critical decisions to a committed file or pass them as workflow env vars. -- **Merge conflicts on `decisions.md`:** If multiple people are committing to `.ai-team/` at the same time, `decisions.md` and agent histories conflict frequently. Use the `.gitattributes merge=union` rules that Squad sets up. Check the file after merge to ensure it looks reasonable. -- **Backup your team.** If you're gitignoring `.ai-team/`, make sure you have backups. A deleted `.ai-team/` directory with no git history is gone forever. -- **Communicate the pattern to your team.** Whatever you choose, document it. Add a line to your `CONTRIBUTING.md` or `README.md` explaining where the squad lives and how to interact with it. -- **Start simple, migrate later.** Commit `.ai-team/` initially (option 1). If PR noise becomes a real problem, migrate to option 2 or 3. Changing strategies later is possible but requires care. - +## Important distinction: location vs backend +These are related, but not the same thing: +- **External state location** changes where mutable state lives on disk (`stateLocation: "external"` in `.squad/config.json`) +- **State backends** change how mutable state is persisted (`stateBackend: "local" | "orphan" | "two-layer"`) +If you are choosing a strategy, decide first whether you want: +1. plain files, +2. an external directory, or +3. Git-native storage. --- - -## Sample Prompts - -Use these prompts with Squad to implement specific strategies: - -- **"Keep .ai-team/ out of my main branch."** - - Directs you toward option 3 (separate branch) or option 6 (dev-only). - -- **"I want to share my squad across three repos without duplicating the team state."** - - Points to option 4 (submodule) or option 5 (symlink). - -- **"Add .ai-team to .gitignore but make sure GitHub Actions can still route based on team.md."** - - Hybrid: gitignore but keep a committed `squad-routing.md` that Actions reads. - -- **"My enterprise doesn't allow AI artifacts in the main repository."** - - Option 2 (gitignore) or option 4 (submodule in a separate org-controlled repo). - -- **"I deleted .ai-team by accident. How do I recover it?"** - - If committed: `git checkout HEAD~5 .ai-team/` (restore from history). - - If gitignored: No recovery from git. Restore from backup or rebuild the team. - +## Quick guidance +- **Want the simplest setup?** Stay on **local**. +- **Want branch-safe local storage without Git plumbing?** Use **externalize/internalize**. +- **Want Git history but a clean working tree?** Use **orphan**. +- **Want the most team-oriented shipped backend?** Use **two-layer**. --- - ## See Also - -- **[Adding Squad to an Existing Repo](existing-repo.md)** — How to integrate Squad into a project with existing code. -- **[Squad for Solo Developers](solo-dev.md)** — Building alone? Here's how Squad becomes your team. -- **[Multiple Squads](multiple-squads.md)** — Managing more than one AI team. -- **[Team Portability](team-portability.md)** — Moving your squad to a new repo or machine. +- [External State Storage](/squad/docs/features/external-state/) +- [State Backends](/squad/docs/features/state-backends/) +- [Adding Squad to an Existing Repo](existing-repo.md) diff --git a/docs/src/content/docs/tour-first-session.md b/docs/src/content/docs/tour-first-session.md index 9fcbe9a54..bdb041dac 100644 --- a/docs/src/content/docs/tour-first-session.md +++ b/docs/src/content/docs/tour-first-session.md @@ -1,100 +1,67 @@ # First Session Walkthrough - A step-by-step tour of your first time using Squad. Follow along in your terminal. - --- - ## 1. Install Squad - Start with a git repo (new or existing): - ```bash mkdir my-app && cd my-app git init npm install -g @bradygaster/squad-cli squad init ``` - You'll see: - ``` ✅ Squad installed. .github/agents/squad.agent.md — coordinator agent .github/workflows/ — 5 GitHub workflows (heartbeat, triage, CI, etc.) .squad/templates/ — 11 template files - Open GitHub Copilot and select Squad from the agent list. ``` - --- - ## 2. Open Copilot — Your Team Forms - Start the Copilot CLI: - ```bash copilot ``` - Select **Squad** from the `/agent` list (CLI) or `/agents` (VS Code). Squad greets you by name (pulled from `git config user.name`): - ``` Hey Brady, what are you building? ``` - Describe your project: - ``` > I'm building a recipe sharing app with React and Node.js. Users can > post recipes, search by ingredient, and save favorites. ``` - Squad proposes a team. Names come from a fictional universe — the exact universe depends on your project shape and history. Here's what it might look like: - ``` Here's your team: - 🏗️ Hicks — Lead Scope, decisions, code review ⚛️ Ripley — Frontend Dev React, UI, components 🔧 Dallas — Backend Dev Node.js, APIs, database 🧪 Lambert — Tester Tests, quality, edge cases 📋 Scribe — (silent) Memory, decisions, session logs - Look right? Say yes, add someone, or change a role. (Or just give me a task to start!) ``` - --- - ## 3. Confirm and Give Your First Task - You can say "yes" or skip straight to a task (which is implicit confirmation): - ``` > Yes. Dallas, set up the Express server with basic routing. ``` - Squad creates the `.ai-team/` directory structure (team roster, routing rules, casting state, ceremony config, agent charters and histories — all seeded with your project context). Then it spawns Dallas. - ``` 🔧 Dallas — setting up Express server with routing - Dallas is on it — building the Express server now. ``` - --- - ## 4. Parallel Fan-Out — The Launch Table - Now give a bigger task to see parallel work: - ``` > Team, build the recipe listing page. We need an API endpoint > that returns recipes and a React component that displays them. ``` - Squad decomposes the work and launches agents simultaneously: - ``` 🏗️ Hicks — reviewing requirements, defining API contract ⚛️ Ripley — building RecipeList component @@ -102,28 +69,20 @@ Squad decomposes the work and launches agents simultaneously: 🧪 Lambert — writing test cases from requirements 📋 Scribe — logging session ``` - All five agents are working at the same time in separate context windows. The tester writes test cases from the requirements *while* the implementation is being built — no waiting. - --- - ## 5. Results Labeled by Agent - When agents finish, you see results tagged with each agent's identity: - ``` 🏗️ Hicks — Defined API contract: GET /api/recipes returns { recipes: [{ id, title, ingredients, prepTime }] }. Decided on cursor-based pagination for large result sets. - ⚛️ Ripley — Built RecipeList component at src/components/RecipeList.tsx. Uses React Query for data fetching. Card layout with title, prep time, and ingredient count. - 🔧 Dallas — Created GET /api/recipes endpoint at src/routes/recipes.ts. Connected to MongoDB. Supports ?cursor= pagination per Hicks' spec. Also added GET /api/recipes/:id for detail view (proactive). - 🧪 Lambert — Wrote 8 test cases in test/recipes.test.ts: 📌 Proactive: Written from requirements while Dallas was building. May need adjustment once implementation is final. @@ -133,139 +92,97 @@ When agents finish, you see results tagged with each agent's identity: - Detail endpoint returns 404 for missing recipe [+4 more] ``` - Behind the scenes, the Scribe merged decisions from each agent into `decisions.md` and logged the session. - --- - ## 6. Ask "Where Are We?" — Instant Status - At any point, you can check status: - ``` > Where are we? ``` - This is a Direct mode response — no agent spawn, just the coordinator reading recent logs: - ``` Last session: Recipe listing page (15 minutes ago) - Who worked: - Hicks defined the API contract (cursor pagination) - Ripley built RecipeList component - Dallas created recipes endpoint + detail endpoint - Lambert wrote 8 test cases - Key decisions: - Cursor-based pagination for recipe lists - React Query for frontend data fetching - MongoDB as database - 3 decisions in decisions.md, 4 agent history entries updated. ``` - --- - ## 7. Follow-Up Task — Watch Decisions Accumulate - Give a follow-up task: - ``` > Now add a search feature. Users should be able to search > recipes by ingredient name. ``` - Squad sees this involves multiple domains and fans out again: - ``` 🔧 Dallas — adding search endpoint with ingredient filtering ⚛️ Ripley — building search UI with ingredient input 🧪 Lambert — writing search test cases 📋 Scribe — logging session ``` - After completion, check decisions: - ``` > Show me the decisions ``` - ``` ### 2025-07-15: Cursor-based pagination for recipe lists **By:** Hicks **What:** All list endpoints use cursor-based pagination **Why:** Scales better than offset pagination for growing datasets - ### 2025-07-15: React Query for data fetching **By:** Ripley **What:** All API calls use React Query with stale-while-revalidate **Why:** Built-in caching, refetching, and loading states - ### 2025-07-15: Full-text search via MongoDB text index **By:** Dallas **What:** Search uses $text index on recipe title and ingredients array **Why:** Native MongoDB capability, no external search service needed ``` - -Every agent reads these decisions before working. As the list grows, the team self-aligns — agents respect each other's architectural choices without you repeating them. - +Every agent reads these decisions before working. As the list grows, the team retains context so you don't need to restate architectural choices. --- - ## 8. Set a Directive - You notice something you want to standardize: - ``` > Always use Zod for API input validation ``` - Squad captures this as a directive — no agent spawn needed: - ``` 📌 Captured. All API input validation must use Zod. ``` - This is written to `decisions.md` via the inbox. Every agent will see it in their next spawn. - --- - ## 9. Export Your Squad - After a productive session, export your team for portability: - ```bash squad export ``` - ``` ✅ Exported to squad-export.json 5 agents, 3 skills, 6 decisions ``` - This snapshot contains everything — charters, histories, casting state, skills, and decisions. You can import it into another repo later: - ```bash cd ../other-project npm install -g @bradygaster/squad-cli squad init squad import ../my-app/squad-export.json ``` - --- - ## What to Try Next - - **Add a team member:** `"I need a DevOps person"` - **Run a ceremony:** `"Run a design review before we start the auth system"` - **Direct an agent:** `"Lambert, run the test suite and tell me what's failing"` - **Remove someone:** `"Remove the designer — we don't need them anymore"` - **Connect to issues:** `"Connect to myorg/myrepo"` (see [GitHub Issues Walkthrough](tour-github-issues.md)) - --- - ## Tips - - **First session is the slowest.** Agents have no history yet. After 2–3 sessions, they know your conventions and stop asking questions they've answered before. - **Commit `.ai-team/`.** It's your team's brain. Anyone who clones gets the team with all their knowledge. - **Say "team" for big tasks.** The word "team" triggers parallel fan-out across multiple agents. diff --git a/docs/src/content/docs/whatsnew.md b/docs/src/content/docs/whatsnew.md index 86a899876..9aaa86066 100644 --- a/docs/src/content/docs/whatsnew.md +++ b/docs/src/content/docs/whatsnew.md @@ -1,45 +1,27 @@ # What's New - -> ⚠️ **Experimental** — Squad is alpha software. APIs, commands, and behavior may change between releases. - - Full release history for Squad — from beta through the v1 TypeScript replatform. Jump to the version you're looking for, or read top-down to see how the project evolved. - --- - ## v0.9.1 — Current Release - - **Shell agent name extraction** — Robust multi-pattern fallback for extracting agent names from shell transcripts (#577) - **Init scaffolding** — `squad init --sdk` now scaffolds typed casting files; silences remote-lookup warnings (#579) -- **Personal squad global mode** — `squad personal init --global` auto-discovers `~/.config/squad/` (#576) - **Release hardening** — CI playbook rewrite, publish policy linting, docs consistency checks (#564, #557) - **Doctor improvements** — Actionable warnings and `squad.agent.md` existence checks (#565, #533) - ## v0.9.0 — Major Feature Release - -**Governance & Personal Squads** -- **Personal Squad concept** — Isolated developer workspace with own team.md, routing.md, and roster (#508) -- **Ambient discovery** — Auto-detect personal squad at `~/.squad/` via environment variables -- **Personal squad CLI** — Commands: `squad personal init`, `list`, `use`, `remove` (#508) -- **Governance isolation** — Hooks, ceremonies, telemetry scoped per personal squad (#508) - +**Governance & Workspace Isolation** **Worktree Spawning & Distributed Work** - **Worktree creation** — Coordinator spawns managed worktrees for parallel agent work (#529) - **Cross-squad orchestration** — Agents coordinate across multiple squads and worktrees (#446) - **Persistent Ralph** — Long-running daemon with watch + heartbeat health monitoring (#443) - **Worktree .git guard** — Regression detection for file vs directory confusion (#521) - **Capability Discovery & Routing** - **Machine capability inference** — Auto-detect available tools, models, hardware specs at session start (#514) - **`needs:*` label routing** — Agents self-route based on discovered capabilities (#514) - **Rate Limiting & Cost Control** - **Cooperative rate limiting** — Predictive circuit breaker with token budget forecasting (#515) - **Economy Mode** — Automatic cheaper-model selection when quality thresholds permit (#500) - **Token usage tracking** — Per-agent cost visibility in session UI (#453) - **Rate limit recovery** — Actionable error messages for quota pressure (#464) - **Ralph circuit breaker** — Graceful degradation under model quota limits (#451) - **Telemetry & Infrastructure** - **Auto-wire telemetry** — `initSquadTelemetry()` now self-configures, no manual wiring (#281) - **OpenTelemetry propagation** — Automatic context flow across squad sessions @@ -48,18 +30,15 @@ Full release history for Squad — from beta through the v1 TypeScript replatfor - **GAP analysis verification** — After-work checklist ensures all requirements met before completion (#473) - **Session recovery skill** — Find and resume lost sessions without restart (#442) - **GitHub auth isolation skill** — Multi-account GitHub workflows (#470) - **Docs, Stability & Distribution** - **Astro site enhancements** — Search tuning, section badges, coverage indicators (#524) -- **Autonomous agents guide** — Comprehensive SDK guide for building agents (#492) +- **Background agent pipeline guide** — Comprehensive SDK guide for building background-triggered agents (#492) - **CLI terminal rendering** — Fixed scroll flicker, reduced re-render churn, stabilized component keys - **Upgrade hardening** — Context-aware footers, EPERM handling, gitignore coverage (#544, #549) - **ESM compatibility** — Node 22/24 dual-layer fix, Node 24+ hard-fail with guidance (#449, #502) - **Signal handling** — SIGINT/SIGTERM graceful shutdown with 22+ regression tests (#486) - **npm-only distribution** — Removed GitHub-native channel; standard npm registry install - ## v0.8.2 - - **Version alignment** — CLI (0.8.1) and SDK (0.8.0) snapped to 0.8.2 across all packages - **Published to npm** — `@bradygaster/squad-sdk@0.8.2` and `@bradygaster/squad-cli@0.8.2` - **Init flow improvements** — Ralph now included in the initial agent set during `squad init`; routing templates no longer reference `@copilot` by default (#337, #338, #339) @@ -88,11 +67,8 @@ Full release history for Squad — from beta through the v1 TypeScript replatfor - Custom static site generator with markdown-it, frontmatter, search index - **GitHub Pages** — Live docs site with dark mode, client-side search, sidebar nav, beta site UI - **Test baseline** — 2232 tests across 85 test files - ## v0.6.0 — The TypeScript Replatform - The big rewrite. Everything moved to TypeScript with a clean SDK + CLI split. - - **Full rewrite** — JavaScript → TypeScript with strict mode, ESM modules, Node.js ≥20 - **SDK + CLI split** — Two npm packages: `@bradygaster/squad-sdk` (runtime, adapter, resolution) and `@bradygaster/squad-cli` (commands, shell, REPL) - **npm workspace** — Monorepo with `packages/squad-sdk` and `packages/squad-cli` @@ -107,38 +83,28 @@ The big rewrite. Everything moved to TypeScript with a clean SDK + CLI split. - Wave 3: Docs migration, site engine, 5 guides - **CLI entry point** — Moved from `dist/index.js` to `dist/cli-entry.js` - **CRLF normalization** — All 8 parsers normalize line endings; Windows users with `core.autocrlf=true` work correctly - ### Breaking Changes (v0.6.0) - | Change | Migration | |--------|-----------| | Config file: `squad.agent.md` → `squad.config.ts` | Run `squad init` to generate typed config | | Team dir: `.squad/` | Standard directory for all team state | | Routing: markdown rules → typed `RoutingRule[]` | Export existing rules with `squad export` | | Models: string names → tier-based `ModelConfig` | Use `defaultTier` + `fallbackChains` in config | - ## v0.6.0-alpha.0 - - **Initial replatform** — First working TypeScript build - **CLI commands** — init, upgrade, shell, doctor, link - **npm distribution** — `npm install @bradygaster/squad-cli` - **Branch protection** — `main` requires PR + build check - **Changesets** — Infrastructure for independent package versioning - ## v0.5.2 - - **`upgrade --migrate-directory` exits early fix** — The directory rename step no longer calls `process.exit(0)`, so the full upgrade now runs in one command - **`.slnx`, `.fsproj`, `.vbproj` not detected as .NET** — Proper Visual Studio solution files and F#/VB.NET project files now detected - **Migrations use detected squad directory** — Migration steps and `.gitattributes` rules now use the detected squad directory - ## v0.5.1 - - **`squad watch` — Local Watchdog** — Persistent polling for unattended work processing. Run `squad watch` to check GitHub every 10 minutes for untriaged squad work; use `--interval` flag to customize polling - **Project type detection** — Squad detects your project's language and stack to intelligently configure workflows - **Git safety rules** — Guardrails enforced based on detected project type - ## v0.5.0 — The `.squad/` Rename Release - - **`.squad/` directory** — Full directory rename with backward-compatible migration utilities. Existing repos continue to work; migration required by v1.0.0. - **Decision lifecycle management** — Archival and versioning support for design decisions - **Identity layer** — New `wisdom.md` and `now.md` files for agent context and temporal awareness @@ -146,22 +112,16 @@ The big rewrite. Everything moved to TypeScript with a clean SDK + CLI split. - **Cold-path extraction** — Refactored coordinator from ~30KB to ~17KB - **Skills export/import verification** — Enhanced validation for agent skill extension - **Email scrubbing** — Automatic PII removal during migration - ## v0.4.2 - - **`/agent` vs `/agents` CLI command fix** — Correctly reference `/agent` (CLI) and `/agents` (VS Code) - **Insider Program infrastructure** — `insider` branch with guard workflow enforcement - **Branch content policy** — Formal decision document for branch safety - **Custom universe support** — Star Trek universe added by community contributor @codebytes - ## v0.4.1 - - **Task spawn UI** — Role emoji for visual consistency (🏗️ Lead, 🔧 Backend, ⚛️ Frontend, 🧪 Tester, etc.) - **`squad upgrade --self` command** — Refresh `.squad/` from templates while preserving agent history - **Deprecation banner** — CLI and coordinator warn about the `.squad/` rename - ## v0.4.0 - - **Client Compatibility** — Full platform support matrix for CLI and VS Code - **VS Code Support** — First-class VS Code guide with `runSubagent` parallel spawning - **Project Boards** — GitHub Projects V2 integration with board + Kanban views @@ -171,16 +131,12 @@ The big rewrite. Everything moved to TypeScript with a clean SDK + CLI split. - **Plugin Marketplace** — Discover and install curated agent templates and skills - **Universe Expansion** — 20 → 33 casting universes - **Context Optimization** — decisions.md pruned from ~80K to ~33K tokens; per-agent context usage dropped from 41–46% to 17–23% - ## v0.3.0 - - **Per-Agent Model Selection** — Cost-first routing with 16-model catalog and fallback chains -- **Ralph — Work Monitor** — Built-in squad member that autonomously processes backlogs +- **Ralph — Work Monitor** — Built-in squad member that keeps backlogs moving with your guardrails - **@copilot Coding Agent** — GitHub's Copilot agent as a squad member with three-tier capability profile - **Universe Expansion** — 14 → 20 casting universes - ## v0.2.0 - - **Export & Import CLI** — Portable team snapshots for moving squads between repos - **GitHub Issues Mode** — Issue-driven development with `gh` CLI integration - **PRD Mode** — Product requirements decomposition into work items @@ -188,9 +144,7 @@ The big rewrite. Everything moved to TypeScript with a clean SDK + CLI split. - **Skills System** — Earned knowledge with confidence lifecycle - **Tiered Response Modes** — Direct/Lightweight/Standard/Full response depth - **Smart Upgrade** — Version-aware upgrades with migrations - ## v0.1.0 - - **Coordinator agent** — Orchestrates team formation and parallel work - **Init command** — `squad` copies agent file and templates - **Upgrade command** — `squad upgrade` updates Squad-owned files without touching team state diff --git a/docs/src/navigation.ts b/docs/src/navigation.ts index fc85d2b88..986799bc9 100644 --- a/docs/src/navigation.ts +++ b/docs/src/navigation.ts @@ -2,42 +2,35 @@ export interface NavItem { title: string; slug: string; } - export interface NavSection { title: string; dir: string; items: NavItem[]; } - export const NAV_SECTIONS: NavSection[] = [ { title: 'Get Started', dir: 'get-started', items: [ - { title: 'Quick start', slug: 'get-started/five-minute-start' }, + { title: 'Quick Start', slug: 'get-started/five-minute-start' }, { title: 'Installation', slug: 'get-started/installation' }, - { title: 'Choose your path', slug: 'get-started/choosing-your-path' }, { title: 'Your First Session', slug: 'get-started/first-session' }, - { title: 'Migration Guide', slug: 'get-started/migration' }, ], }, { - title: 'Guide', - dir: 'guide', + title: 'Concepts', + dir: 'concepts', items: [ - { title: 'Tips & Tricks', slug: 'guide/tips-and-tricks' }, - { title: 'Sample Prompts', slug: 'guide/sample-prompts' }, - { title: 'Personal Squad', slug: 'guide/personal-squad' }, - { title: 'Interactive Shell', slug: 'guide/shell' }, - { title: 'Extensibility', slug: 'guide/extensibility' }, - { title: 'Building Extensions', slug: 'guide/building-extensions' }, - { title: 'Building Resilient Agents', slug: 'guide/building-resilient-agents' }, - { title: 'Contributing', slug: 'guide/contributing' }, - { title: 'Contributors', slug: 'guide/contributors' }, + { title: 'Architecture', slug: 'concepts/architecture' }, + { title: 'Your Team', slug: 'concepts/your-team' }, + { title: 'Memory & Knowledge', slug: 'concepts/memory-and-knowledge' }, + { title: 'Parallel Work', slug: 'concepts/parallel-work' }, + { title: 'GitHub Workflow', slug: 'concepts/github-workflow' }, + { title: 'Portability', slug: 'concepts/portability' }, ], }, { - title: 'Features', + title: 'Core Features', dir: 'features', items: [ { title: 'Team Setup', slug: 'features/team-setup' }, @@ -48,47 +41,62 @@ export const NAV_SECTIONS: NavSection[] = [ { title: 'Memory', slug: 'features/memory' }, { title: 'Skills', slug: 'features/skills' }, { title: 'Directives', slug: 'features/directives' }, - { title: 'Ceremonies', slug: 'features/ceremonies' }, - { title: 'Reviewer Protocol', slug: 'features/reviewer-protocol' }, { title: 'GitHub Issues', slug: 'features/github-issues' }, { title: 'GitLab Issues', slug: 'features/gitlab-issues' }, - { title: 'Labels & Triage', slug: 'features/labels' }, - { title: 'PRD Mode', slug: 'features/prd-mode' }, - { title: 'Project Boards', slug: 'features/project-boards' }, { title: 'Ralph — Work Monitor', slug: 'features/ralph' }, { title: '@copilot Coding Agent', slug: 'features/copilot-coding-agent' }, + { title: 'VS Code', slug: 'features/vscode' }, + ], + }, + { + title: 'Advanced Features', + dir: 'features', + items: [ + { title: 'Ceremonies', slug: 'features/ceremonies' }, + { title: 'Reviewer Protocol', slug: 'features/reviewer-protocol' }, { title: 'Human Team Members', slug: 'features/human-team-members' }, { title: 'Consult Mode', slug: 'features/consult-mode' }, - { title: 'Remote Control', slug: 'features/remote-control' }, - { title: 'Storage Provider', slug: 'features/storage-provider' }, - { title: 'VS Code', slug: 'features/vscode' }, - { title: 'Git Worktrees', slug: 'features/worktrees' }, + { title: 'Labels & Triage', slug: 'features/labels' }, + { title: 'PRD Mode', slug: 'features/prd-mode' }, + { title: 'Project Boards', slug: 'features/project-boards' }, { title: 'Export & Import', slug: 'features/export-import' }, { title: 'Upstream Inheritance', slug: 'features/upstream-inheritance' }, { title: 'Marketplace', slug: 'features/marketplace' }, { title: 'Plugins', slug: 'features/plugins' }, { title: 'MCP', slug: 'features/mcp' }, { title: 'Notifications', slug: 'features/notifications' }, - { title: 'Enterprise Platforms', slug: 'features/enterprise-platforms' }, - { title: 'Squad RC', slug: 'features/squad-rc' }, { title: 'Streams', slug: 'features/streams' }, + { title: 'Git Worktrees', slug: 'features/worktrees' }, + { title: 'Squad RC', slug: 'features/squad-rc' }, + ], + }, + { + title: 'Infrastructure', + dir: 'features', + items: [ + { title: 'Enterprise Platforms', slug: 'features/enterprise-platforms' }, + { title: 'Storage Provider', slug: 'features/storage-provider' }, { title: 'Distributed Mesh', slug: 'features/distributed-mesh' }, { title: 'Capability Routing', slug: 'features/capability-routing' }, { title: 'Rate Limiting', slug: 'features/rate-limiting' }, { title: 'KEDA Autoscaling', slug: 'features/keda-scaling' }, + { title: 'State Backends', slug: 'features/state-backends' }, + { title: 'Dual-Mode Deployment', slug: 'features/dual-mode-deployment' }, ], }, { - title: 'Reference', - dir: 'reference', + title: 'Guides', + dir: 'guide', items: [ - { title: 'CLI', slug: 'reference/cli' }, - { title: 'SDK', slug: 'reference/sdk' }, - { title: 'SDK API Reference', slug: 'reference/api-reference' }, - { title: 'SDK Integration', slug: 'reference/integration' }, - { title: 'Tools & Hooks', slug: 'reference/tools-and-hooks' }, - { title: 'Config', slug: 'reference/config' }, - { title: 'Glossary', slug: 'reference/glossary' }, + { title: 'Tips & Tricks', slug: 'guide/tips-and-tricks' }, + { title: 'Sample Prompts', slug: 'guide/sample-prompts' }, + { title: 'Recipes', slug: 'cookbook/recipes' }, + { title: 'Building Extensions', slug: 'guide/building-extensions' }, + { title: 'Building Resilient Agents', slug: 'guide/building-resilient-agents' }, + { title: 'Extensibility', slug: 'guide/extensibility' }, + { title: 'GitHub Auth Setup', slug: 'guide/github-auth-setup' }, + { title: 'Contributing', slug: 'guide/contributing' }, + { title: 'Contributors', slug: 'guide/contributors' }, ], }, { @@ -98,56 +106,49 @@ export const NAV_SECTIONS: NavSection[] = [ { title: 'Existing Repo', slug: 'scenarios/existing-repo' }, { title: 'New Project', slug: 'scenarios/new-project' }, { title: 'Solo Developer', slug: 'scenarios/solo-dev' }, + { title: 'Mid-Project', slug: 'scenarios/mid-project' }, { title: 'Issue-Driven Dev', slug: 'scenarios/issue-driven-dev' }, - { title: 'Monorepo', slug: 'scenarios/monorepo' }, - { title: 'CI/CD Integration', slug: 'scenarios/ci-cd-integration' }, { title: 'Team of Humans', slug: 'scenarios/team-of-humans' }, + { title: 'Multiple Squads', slug: 'scenarios/multiple-squads' }, + { title: 'Monorepo', slug: 'scenarios/monorepo' }, { title: 'Large Codebase', slug: 'scenarios/large-codebase' }, + { title: 'Scaling Workstreams', slug: 'scenarios/scaling-workstreams' }, { title: 'Open Source', slug: 'scenarios/open-source' }, - { title: 'Multiple Squads', slug: 'scenarios/multiple-squads' }, - { title: 'Keep My Squad', slug: 'scenarios/keep-my-squad' }, - { title: 'Mid-Project', slug: 'scenarios/mid-project' }, + { title: 'CI/CD Integration', slug: 'scenarios/ci-cd-integration' }, + { title: 'Release Process', slug: 'scenarios/release-process' }, { title: 'Upgrading', slug: 'scenarios/upgrading' }, - { title: 'Multi-Codespace', slug: 'scenarios/multi-codespace' }, + { title: 'Migration Guide', slug: 'get-started/migration' }, { title: 'Private Repos', slug: 'scenarios/private-repos' }, { title: 'Cross-Org Auth', slug: 'scenarios/cross-org-auth' }, { title: 'Team Portability', slug: 'scenarios/team-portability' }, { title: 'Team State Storage', slug: 'scenarios/team-state-storage' }, + { title: 'Multi-Codespace', slug: 'scenarios/multi-codespace' }, { title: 'Switching Models', slug: 'scenarios/switching-models' }, - { title: 'Release Process', slug: 'scenarios/release-process' }, - { title: 'Scaling Workstreams', slug: 'scenarios/scaling-workstreams' }, - { title: 'Client Compatibility', slug: 'scenarios/client-compatibility' }, { title: 'Remote Q&A', slug: 'scenarios/remote-qa' }, { title: 'Disaster Recovery', slug: 'scenarios/disaster-recovery' }, - { title: 'Troubleshooting', slug: 'scenarios/troubleshooting' }, + { title: 'Client Compatibility', slug: 'scenarios/client-compatibility' }, { title: 'Aspire Dashboard', slug: 'scenarios/aspire-dashboard' }, + { title: 'Keep My Squad', slug: 'scenarios/keep-my-squad' }, + { title: 'Troubleshooting', slug: 'scenarios/troubleshooting' }, ], }, { - title: 'Concepts', - dir: 'concepts', - items: [ - { title: 'Architecture', slug: 'concepts/architecture' }, - { title: 'Your Team', slug: 'concepts/your-team' }, - { title: 'Memory & Knowledge', slug: 'concepts/memory-and-knowledge' }, - { title: 'Parallel Work', slug: 'concepts/parallel-work' }, - { title: 'GitHub Workflow', slug: 'concepts/github-workflow' }, - { title: 'Portability', slug: 'concepts/portability' }, - ], - }, - { - title: 'Cookbook', - dir: 'cookbook', + title: 'Reference', + dir: 'reference', items: [ - { title: 'Recipes', slug: 'cookbook/recipes' }, + { title: 'CLI', slug: 'reference/cli' }, + { title: 'SDK', slug: 'reference/sdk' }, + { title: 'SDK API Reference', slug: 'reference/api-reference' }, + { title: 'SDK Integration', slug: 'reference/integration' }, + { title: 'Tools & Hooks', slug: 'reference/tools-and-hooks' }, + { title: 'Config', slug: 'reference/config' }, + { title: 'Glossary', slug: 'reference/glossary' }, ], }, ]; - export const STANDALONE_PAGES = [ { title: "What's New", slug: 'whatsnew' }, { title: 'SDK-First Mode', slug: 'sdk-first-mode' }, { title: 'Community', slug: 'community' }, { title: 'Insider Program', slug: 'insider-program' }, ]; - diff --git a/docs/tests/build-output.test.mjs b/docs/tests/build-output.test.mjs index 029587ee4..b575750ce 100644 --- a/docs/tests/build-output.test.mjs +++ b/docs/tests/build-output.test.mjs @@ -1,231 +1,206 @@ -/** - * Build output tests for Phase 1 search improvements. - * Validates that pagefind attributes are correctly applied in the built HTML. - * Run with: node --test tests/build-output.test.mjs - */ -import { describe, it } from 'node:test'; -import assert from 'node:assert/strict'; -import fs from 'node:fs'; -import path from 'node:path'; - -const DOCS_ROOT = path.resolve(import.meta.dirname, '..'); -const DIST = path.join(DOCS_ROOT, 'dist'); - -// Collect HTML files from dist for reuse across tests -function findHtmlFiles(dir, files = []) { - for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { - const full = path.join(dir, entry.name); - if (entry.isDirectory() && entry.name !== '_astro' && entry.name !== 'pagefind') { - findHtmlFiles(full, files); - } else if (entry.isFile() && entry.name.endsWith('.html')) { - files.push(full); - } - } - return files; -} - -const allHtmlFiles = findHtmlFiles(DIST); - -// Exclude top-level pages (404, home, blog index) which use different layouts -const EXCLUDED_PAGES = ['404.html', `blog${path.sep}index.html`, 'index.html']; -function isContentPage(file) { - const rel = path.relative(DIST, file); - return !EXCLUDED_PAGES.some(exc => rel === exc || rel === exc.replace(/\//g, path.sep)); -} - -const docsHtmlFiles = allHtmlFiles.filter(f => - f.includes(`${path.sep}docs${path.sep}`) && isContentPage(f) -); -const blogHtmlFiles = allHtmlFiles.filter(f => - f.includes(`${path.sep}blog${path.sep}`) && isContentPage(f) -); - -// ── pagefind.yml ────────────────────────────────────────────────────────────── - -describe('pagefind.yml configuration', () => { - it('exists in the docs root', () => { - assert.ok( - fs.existsSync(path.join(DOCS_ROOT, 'pagefind.yml')), - 'pagefind.yml should exist at docs root' - ); - }); - - it('contains exclude_selectors for nav, footer, aside, pre, .astro-code', () => { - const content = fs.readFileSync(path.join(DOCS_ROOT, 'pagefind.yml'), 'utf-8'); - for (const selector of ['nav', 'footer', 'aside', 'pre', '.astro-code']) { - assert.ok( - content.includes(`"${selector}"`) || content.includes(`'${selector}'`) || content.includes(`- ${selector}`) || content.includes(`- "${selector}"`), - `pagefind.yml should exclude selector: ${selector}` - ); - } - }); -}); - -// ── Pagefind index output ───────────────────────────────────────────────────── - -describe('pagefind build output', () => { - it('pagefind directory exists in dist/', () => { - assert.ok( - fs.existsSync(path.join(DIST, 'pagefind')), - 'dist/pagefind/ directory should exist after build' - ); - }); - - it('pagefind.js is present in pagefind directory', () => { - assert.ok( - fs.existsSync(path.join(DIST, 'pagefind', 'pagefind.js')), - 'dist/pagefind/pagefind.js should exist' - ); - }); -}); - -// ── data-pagefind-body on article elements ──────────────────────────────────── - -describe('data-pagefind-body attribute', () => { - it('docs pages contain data-pagefind-body on article elements', () => { - assert.ok(docsHtmlFiles.length > 0, 'Should have docs HTML files to test'); - const missing = []; - for (const file of docsHtmlFiles) { - const html = fs.readFileSync(file, 'utf-8'); - if (!html.includes('data-pagefind-body')) { - missing.push(path.relative(DIST, file)); - } - } - assert.equal(missing.length, 0, `These docs pages lack data-pagefind-body: ${missing.join(', ')}`); - }); - - it('blog post pages contain data-pagefind-body on article elements', () => { - assert.ok(blogHtmlFiles.length > 0, 'Should have blog HTML files to test'); - for (const file of blogHtmlFiles) { - const html = fs.readFileSync(file, 'utf-8'); - assert.ok( - html.includes('data-pagefind-body'), - `Blog page missing data-pagefind-body: ${path.relative(DIST, file)}` - ); - } - }); -}); - -// ── data-pagefind-meta with section values ──────────────────────────────────── - -describe('data-pagefind-meta section attribute', () => { - it('docs pages have data-pagefind-meta with a section value', () => { - assert.ok(docsHtmlFiles.length > 0, 'Should have docs HTML files to test'); - const sectionPattern = /data-pagefind-meta="section:([^"]+)"/; - const missing = []; - for (const file of docsHtmlFiles) { - const html = fs.readFileSync(file, 'utf-8'); - if (!sectionPattern.test(html)) { - missing.push(path.relative(DIST, file)); - } - } - assert.equal( - missing.length, 0, - `These docs pages lack data-pagefind-meta section: ${missing.join(', ')}` - ); - }); - - it('section values match expected categories', () => { - const knownSections = new Set([ - 'Get Started', 'Guide', 'Features', 'Reference', - 'Scenarios', 'Concepts', 'Cookbook', 'Blog', 'Docs', 'Community' - ]); - const sectionPattern = /data-pagefind-meta="section:([^"]+)"/g; - const foundSections = new Set(); - for (const file of [...docsHtmlFiles, ...blogHtmlFiles]) { - const html = fs.readFileSync(file, 'utf-8'); - for (const match of html.matchAll(sectionPattern)) { - foundSections.add(match[1]); - } - } - assert.ok(foundSections.size > 0, 'Should find at least one section value'); - for (const section of foundSections) { - assert.ok( - knownSections.has(section), - `Unexpected section value "${section}" — expected one of: ${[...knownSections].join(', ')}` - ); - } - }); - - it('blog pages have section:Blog', () => { - assert.ok(blogHtmlFiles.length > 0, 'Should have blog HTML files to test'); - for (const file of blogHtmlFiles) { - const html = fs.readFileSync(file, 'utf-8'); - assert.ok( - html.includes('data-pagefind-meta="section:Blog"'), - `Blog page missing section:Blog — ${path.relative(DIST, file)}` - ); - } - }); -}); - -// ── data-pagefind-weight on headings ────────────────────────────────────────── - -describe('data-pagefind-weight on headings', () => { - it('docs pages have data-pagefind-weight="2" on h2 or h3 elements inside article', () => { - const weightPattern = /]*data-pagefind-weight="2"/; - // Only check pages that have h2/h3 inside a prose article (standard content pages) - const articleHeadingPattern = /data-pagefind-body[\s\S]*?]/; - const pagesWithArticleHeadings = docsHtmlFiles.filter(f => { - const html = fs.readFileSync(f, 'utf-8'); - return articleHeadingPattern.test(html); - }); - assert.ok(pagesWithArticleHeadings.length > 0, 'Should have docs pages with h2/h3 inside article'); - - const missing = []; - for (const file of pagesWithArticleHeadings) { - const html = fs.readFileSync(file, 'utf-8'); - if (!weightPattern.test(html)) { - missing.push(path.relative(DIST, file)); - } - } - assert.equal( - missing.length, 0, - `These pages with article headings lack data-pagefind-weight="2": ${missing.join(', ')}` - ); - }); -}); - -// ── data-pagefind-ignore on nav, footer, pre ────────────────────────────────── - -describe('data-pagefind-ignore attributes', () => { - // Use one representative docs page that has all elements - const sampleFile = docsHtmlFiles.find(f => f.includes('built-in-roles')); - const sampleHtml = sampleFile ? fs.readFileSync(sampleFile, 'utf-8') : ''; - - it('nav elements have data-pagefind-ignore', () => { - assert.ok(sampleFile, 'Sample file (built-in-roles) should exist'); - const navTags = [...sampleHtml.matchAll(/]*>/g)].map(m => m[0]); - assert.ok(navTags.length > 0, 'Page should contain nav elements'); - for (const tag of navTags) { - assert.ok( - tag.includes('data-pagefind-ignore'), - `nav element missing data-pagefind-ignore: ${tag.substring(0, 80)}` - ); - } - }); - - it('footer elements have data-pagefind-ignore', () => { - assert.ok(sampleFile, 'Sample file should exist'); - const footerTags = [...sampleHtml.matchAll(/]*>/g)].map(m => m[0]); - assert.ok(footerTags.length > 0, 'Page should contain footer elements'); - for (const tag of footerTags) { - assert.ok( - tag.includes('data-pagefind-ignore'), - `footer element missing data-pagefind-ignore: ${tag.substring(0, 80)}` - ); - } - }); - - it('pre (code block) elements have data-pagefind-ignore', () => { - assert.ok(sampleFile, 'Sample file should exist'); - const preTags = [...sampleHtml.matchAll(/]*>/g)].map(m => m[0]); - assert.ok(preTags.length > 0, 'Page should contain pre elements'); - for (const tag of preTags) { - assert.ok( - tag.includes('data-pagefind-ignore'), - `pre element missing data-pagefind-ignore: ${tag.substring(0, 80)}` - ); - } - }); -}); +/** + * Build output tests for Phase 1 search improvements. + * Validates that pagefind attributes are correctly applied in the built HTML. + * Run with: node --test tests/build-output.test.mjs + */ +import { describe, it } from 'node:test'; +import assert from 'node:assert/strict'; +import fs from 'node:fs'; +import path from 'node:path'; +const DOCS_ROOT = path.resolve(import.meta.dirname, '..'); +const DIST = path.join(DOCS_ROOT, 'dist'); +// Collect HTML files from dist for reuse across tests +function findHtmlFiles(dir, files = []) { + for (const entry of fs.readdirSync(dir, { withFileTypes: true })) { + const full = path.join(dir, entry.name); + if (entry.isDirectory() && entry.name !== '_astro' && entry.name !== 'pagefind') { + findHtmlFiles(full, files); + } else if (entry.isFile() && entry.name.endsWith('.html')) { + files.push(full); + } + } + return files; +} +const allHtmlFiles = findHtmlFiles(DIST); +// Exclude top-level pages (404, home, blog index) which use different layouts +const EXCLUDED_PAGES = ['404.html', `blog${path.sep}index.html`, 'index.html']; +function isContentPage(file) { + const rel = path.relative(DIST, file); + return !EXCLUDED_PAGES.some(exc => rel === exc || rel === exc.replace(/\//g, path.sep)); +} +const docsHtmlFiles = allHtmlFiles.filter(f => + f.includes(`${path.sep}docs${path.sep}`) && isContentPage(f) +); +const blogHtmlFiles = allHtmlFiles.filter(f => + f.includes(`${path.sep}blog${path.sep}`) && isContentPage(f) +); +// ── pagefind.yml ────────────────────────────────────────────────────────────── +describe('pagefind.yml configuration', () => { + it('exists in the docs root', () => { + assert.ok( + fs.existsSync(path.join(DOCS_ROOT, 'pagefind.yml')), + 'pagefind.yml should exist at docs root' + ); + }); + it('contains exclude_selectors for nav, footer, aside, pre, .astro-code', () => { + const content = fs.readFileSync(path.join(DOCS_ROOT, 'pagefind.yml'), 'utf-8'); + for (const selector of ['nav', 'footer', 'aside', 'pre', '.astro-code']) { + assert.ok( + content.includes(`"${selector}"`) || content.includes(`'${selector}'`) || content.includes(`- ${selector}`) || content.includes(`- "${selector}"`), + `pagefind.yml should exclude selector: ${selector}` + ); + } + }); +}); +// ── Pagefind index output ───────────────────────────────────────────────────── +describe('pagefind build output', () => { + it('pagefind directory exists in dist/', () => { + assert.ok( + fs.existsSync(path.join(DIST, 'pagefind')), + 'dist/pagefind/ directory should exist after build' + ); + }); + it('pagefind.js is present in pagefind directory', () => { + assert.ok( + fs.existsSync(path.join(DIST, 'pagefind', 'pagefind.js')), + 'dist/pagefind/pagefind.js should exist' + ); + }); +}); +// ── data-pagefind-body on article elements ──────────────────────────────────── +describe('data-pagefind-body attribute', () => { + it('docs pages contain data-pagefind-body on article elements', () => { + assert.ok(docsHtmlFiles.length > 0, 'Should have docs HTML files to test'); + const missing = []; + for (const file of docsHtmlFiles) { + const html = fs.readFileSync(file, 'utf-8'); + if (!html.includes('data-pagefind-body')) { + missing.push(path.relative(DIST, file)); + } + } + assert.equal(missing.length, 0, `These docs pages lack data-pagefind-body: ${missing.join(', ')}`); + }); + it('blog post pages contain data-pagefind-body on article elements', () => { + assert.ok(blogHtmlFiles.length > 0, 'Should have blog HTML files to test'); + for (const file of blogHtmlFiles) { + const html = fs.readFileSync(file, 'utf-8'); + assert.ok( + html.includes('data-pagefind-body'), + `Blog page missing data-pagefind-body: ${path.relative(DIST, file)}` + ); + } + }); +}); +// ── data-pagefind-meta with section values ──────────────────────────────────── +describe('data-pagefind-meta section attribute', () => { + it('docs pages have data-pagefind-meta with a section value', () => { + assert.ok(docsHtmlFiles.length > 0, 'Should have docs HTML files to test'); + const sectionPattern = /data-pagefind-meta="section:([^"]+)"/; + const missing = []; + for (const file of docsHtmlFiles) { + const html = fs.readFileSync(file, 'utf-8'); + if (!sectionPattern.test(html)) { + missing.push(path.relative(DIST, file)); + } + } + assert.equal( + missing.length, 0, + `These docs pages lack data-pagefind-meta section: ${missing.join(', ')}` + ); + }); + it('section values match expected categories', () => { + const knownSections = new Set([ + 'Get Started', 'Guides', 'Core Features', 'Advanced Features', + 'Infrastructure', 'Reference', 'Scenarios', 'Concepts', + 'Blog', 'Docs', 'Community' + ]); + const sectionPattern = /data-pagefind-meta="section:([^"]+)"/g; + const foundSections = new Set(); + for (const file of [...docsHtmlFiles, ...blogHtmlFiles]) { + const html = fs.readFileSync(file, 'utf-8'); + for (const match of html.matchAll(sectionPattern)) { + foundSections.add(match[1]); + } + } + assert.ok(foundSections.size > 0, 'Should find at least one section value'); + for (const section of foundSections) { + assert.ok( + knownSections.has(section), + `Unexpected section value "${section}" — expected one of: ${[...knownSections].join(', ')}` + ); + } + }); + it('blog pages have section:Blog', () => { + assert.ok(blogHtmlFiles.length > 0, 'Should have blog HTML files to test'); + for (const file of blogHtmlFiles) { + const html = fs.readFileSync(file, 'utf-8'); + assert.ok( + html.includes('data-pagefind-meta="section:Blog"'), + `Blog page missing section:Blog — ${path.relative(DIST, file)}` + ); + } + }); +}); +// ── data-pagefind-weight on headings ────────────────────────────────────────── +describe('data-pagefind-weight on headings', () => { + it('docs pages have data-pagefind-weight="2" on h2 or h3 elements inside article', () => { + const weightPattern = /]*data-pagefind-weight="2"/; + // Only check pages that have h2/h3 inside a prose article (standard content pages) + const articleHeadingPattern = /data-pagefind-body[\s\S]*?]/; + const pagesWithArticleHeadings = docsHtmlFiles.filter(f => { + const html = fs.readFileSync(f, 'utf-8'); + return articleHeadingPattern.test(html); + }); + assert.ok(pagesWithArticleHeadings.length > 0, 'Should have docs pages with h2/h3 inside article'); + const missing = []; + for (const file of pagesWithArticleHeadings) { + const html = fs.readFileSync(file, 'utf-8'); + if (!weightPattern.test(html)) { + missing.push(path.relative(DIST, file)); + } + } + assert.equal( + missing.length, 0, + `These pages with article headings lack data-pagefind-weight="2": ${missing.join(', ')}` + ); + }); +}); +// ── data-pagefind-ignore on nav, footer, pre ────────────────────────────────── +describe('data-pagefind-ignore attributes', () => { + // Use one representative docs page that has all elements + const sampleFile = docsHtmlFiles.find(f => f.includes('built-in-roles')); + const sampleHtml = sampleFile ? fs.readFileSync(sampleFile, 'utf-8') : ''; + it('nav elements have data-pagefind-ignore', () => { + assert.ok(sampleFile, 'Sample file (built-in-roles) should exist'); + const navTags = [...sampleHtml.matchAll(/]*>/g)].map(m => m[0]); + assert.ok(navTags.length > 0, 'Page should contain nav elements'); + for (const tag of navTags) { + assert.ok( + tag.includes('data-pagefind-ignore'), + `nav element missing data-pagefind-ignore: ${tag.substring(0, 80)}` + ); + } + }); + it('footer elements have data-pagefind-ignore', () => { + assert.ok(sampleFile, 'Sample file should exist'); + const footerTags = [...sampleHtml.matchAll(/]*>/g)].map(m => m[0]); + assert.ok(footerTags.length > 0, 'Page should contain footer elements'); + for (const tag of footerTags) { + assert.ok( + tag.includes('data-pagefind-ignore'), + `footer element missing data-pagefind-ignore: ${tag.substring(0, 80)}` + ); + } + }); + it('pre (code block) elements have data-pagefind-ignore', () => { + assert.ok(sampleFile, 'Sample file should exist'); + const preTags = [...sampleHtml.matchAll(/]*>/g)].map(m => m[0]); + assert.ok(preTags.length > 0, 'Page should contain pre elements'); + for (const tag of preTags) { + assert.ok( + tag.includes('data-pagefind-ignore'), + `pre element missing data-pagefind-ignore: ${tag.substring(0, 80)}` + ); + } + }); +});