From 2839b798fccb1dd8c25158b4153b15416ab15329 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <azureuser@devy.yb0a3bvkdghunmsjr4s3fnfhra.phxx.internal.cloudapp.net>
Date: Tue, 10 Mar 2026 14:23:42 +0000
Subject: [PATCH 1/4] Rename outside-in-testing skill to qa-team

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .claude/profiles/coding.yaml                  |    1 +
 .../e2e-outside-in-test-generator/SKILL.md    |    4 +-
 .claude/skills/outside-in-testing/README.md   |  766 +-----
 .claude/skills/outside-in-testing/SKILL.md    | 2045 +---------------
 .claude/skills/outside-in-testing/examples    |    1 +
 .claude/skills/outside-in-testing/scripts     |    1 +
 .claude/skills/outside-in-testing/tests       |    1 +
 .claude/skills/qa-team/README.md              |  794 +++++++
 .claude/skills/qa-team/SKILL.md               | 2100 +++++++++++++++++
 .../examples/cli/calculator-basic.yaml        |    0
 .../examples/cli/cli-error-handling.yaml      |    0
 .../examples/cli/cli-interactive-session.yaml |    0
 .../custom-comprehension-agent.yaml           |    0
 .../custom-reporter-integration.yaml          |    0
 .../electron/electron-ipc-testing.yaml        |    0
 .../electron/electron-menu-testing.yaml       |    0
 .../electron/multi-window-coordination.yaml   |    0
 .../electron/single-window-basic.yaml         |    0
 .../examples/tui/file-manager-navigation.yaml |    0
 .../examples/tui/tui-form-validation.yaml     |    0
 .../tui/tui-performance-monitoring.yaml       |    0
 .../examples/web/dashboard-smoke-test.yaml    |    0
 .../examples/web/web-authentication-flow.yaml |    0
 .../examples/web/web-visual-regression.yaml   |    0
 .../scripts/check-freshness.py                |    0
 .../tests/test_skill_examples.py              |    3 +-
 .claude/skills/shadow-testing/README.md       |    6 +-
 .claude/skills/shadow-testing/SKILL.md        |    6 +-
 .claude/skills/smart-test/README.md           |    2 +-
 .claude/skills/smart-test/SKILL.md            |    4 +-
 .claude/workflow/DEFAULT_WORKFLOW.md          |    2 +-
 amplifier-bundle/bundle.md                    |    1 +
 .../recipes/default-workflow.yaml             |   14 +-
 .../recipes/smart-orchestrator.yaml           |    6 +-
 .../skills/outside-in-testing/README.md       |  766 +-----
 .../skills/outside-in-testing/SKILL.md        | 2045 +---------------
 .../skills/outside-in-testing/examples        |    1 +
 .../skills/outside-in-testing/scripts         |    1 +
 .../skills/outside-in-testing/tests           |    1 +
 amplifier-bundle/skills/qa-team/README.md     |  794 +++++++
 amplifier-bundle/skills/qa-team/SKILL.md      | 2100 +++++++++++++++++
 .../examples/cli/calculator-basic.yaml        |    0
 .../examples/cli/cli-error-handling.yaml      |    0
 .../examples/cli/cli-interactive-session.yaml |    0
 .../custom-comprehension-agent.yaml           |    0
 .../custom-reporter-integration.yaml          |    0
 .../electron/electron-ipc-testing.yaml        |    0
 .../electron/electron-menu-testing.yaml       |    0
 .../electron/multi-window-coordination.yaml   |    0
 .../electron/single-window-basic.yaml         |    0
 .../examples/tui/file-manager-navigation.yaml |    0
 .../examples/tui/tui-form-validation.yaml     |    0
 .../tui/tui-performance-monitoring.yaml       |    0
 .../examples/web/dashboard-smoke-test.yaml    |    0
 .../examples/web/web-authentication-flow.yaml |    0
 .../examples/web/web-visual-regression.yaml   |    0
 .../scripts/check-freshness.py                |    0
 .../tests/test_skill_examples.py              |    3 +-
 .../skills/outside-in-testing/README.md       |  766 +-----
 .../claude/skills/outside-in-testing/SKILL.md | 2045 +---------------
 .../claude/skills/outside-in-testing/examples |    1 +
 docs/claude/skills/outside-in-testing/scripts |    1 +
 docs/claude/skills/outside-in-testing/tests   |    1 +
 docs/claude/skills/qa-team/README.md          |  794 +++++++
 docs/claude/skills/qa-team/SKILL.md           | 2100 +++++++++++++++++
 .../examples/cli/calculator-basic.yaml        |    0
 .../examples/cli/cli-error-handling.yaml      |    0
 .../examples/cli/cli-interactive-session.yaml |    0
 .../custom-comprehension-agent.yaml           |    0
 .../custom-reporter-integration.yaml          |    0
 .../electron/electron-ipc-testing.yaml        |    0
 .../electron/electron-menu-testing.yaml       |    0
 .../electron/multi-window-coordination.yaml   |    0
 .../electron/single-window-basic.yaml         |    0
 .../examples/tui/file-manager-navigation.yaml |    0
 .../examples/tui/tui-form-validation.yaml     |    0
 .../tui/tui-performance-monitoring.yaml       |    0
 .../examples/web/dashboard-smoke-test.yaml    |    0
 .../examples/web/web-authentication-flow.yaml |    0
 .../examples/web/web-visual-regression.yaml   |    0
 .../scripts/check-freshness.py                |    0
 .../tests/test_skill_examples.py              |    3 +-
 src/amplihack/known_skills.py                 |    1 +
 tests/skills/test_qa_team_skill.py            |   87 +
 84 files changed, 8836 insertions(+), 8431 deletions(-)
 mode change 100644 => 120000 .claude/skills/outside-in-testing/README.md
 create mode 120000 .claude/skills/outside-in-testing/examples
 create mode 120000 .claude/skills/outside-in-testing/scripts
 create mode 120000 .claude/skills/outside-in-testing/tests
 create mode 100644 .claude/skills/qa-team/README.md
 create mode 100644 .claude/skills/qa-team/SKILL.md
 rename .claude/skills/{outside-in-testing => qa-team}/examples/cli/calculator-basic.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/cli/cli-error-handling.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/cli/cli-interactive-session.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-comprehension-agent.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-reporter-integration.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/electron/electron-ipc-testing.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/electron/electron-menu-testing.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/electron/multi-window-coordination.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/electron/single-window-basic.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/tui/file-manager-navigation.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/tui/tui-form-validation.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/tui/tui-performance-monitoring.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/web/dashboard-smoke-test.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/web/web-authentication-flow.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/examples/web/web-visual-regression.yaml (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/scripts/check-freshness.py (100%)
 rename .claude/skills/{outside-in-testing => qa-team}/tests/test_skill_examples.py (98%)
 mode change 100644 => 120000 amplifier-bundle/skills/outside-in-testing/README.md
 create mode 120000 amplifier-bundle/skills/outside-in-testing/examples
 create mode 120000 amplifier-bundle/skills/outside-in-testing/scripts
 create mode 120000 amplifier-bundle/skills/outside-in-testing/tests
 create mode 100644 amplifier-bundle/skills/qa-team/README.md
 create mode 100644 amplifier-bundle/skills/qa-team/SKILL.md
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/cli/calculator-basic.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/cli/cli-error-handling.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/cli/cli-interactive-session.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-comprehension-agent.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-reporter-integration.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/electron/electron-ipc-testing.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/electron/electron-menu-testing.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/electron/multi-window-coordination.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/electron/single-window-basic.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/tui/file-manager-navigation.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/tui/tui-form-validation.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/tui/tui-performance-monitoring.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/web/dashboard-smoke-test.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/web/web-authentication-flow.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/web/web-visual-regression.yaml (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/scripts/check-freshness.py (100%)
 rename amplifier-bundle/skills/{outside-in-testing => qa-team}/tests/test_skill_examples.py (98%)
 mode change 100644 => 120000 docs/claude/skills/outside-in-testing/README.md
 create mode 120000 docs/claude/skills/outside-in-testing/examples
 create mode 120000 docs/claude/skills/outside-in-testing/scripts
 create mode 120000 docs/claude/skills/outside-in-testing/tests
 create mode 100644 docs/claude/skills/qa-team/README.md
 create mode 100644 docs/claude/skills/qa-team/SKILL.md
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/cli/calculator-basic.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/cli/cli-error-handling.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/cli/cli-interactive-session.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-comprehension-agent.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-reporter-integration.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/electron/electron-ipc-testing.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/electron/electron-menu-testing.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/electron/multi-window-coordination.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/electron/single-window-basic.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/tui/file-manager-navigation.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/tui/tui-form-validation.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/tui/tui-performance-monitoring.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/web/dashboard-smoke-test.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/web/web-authentication-flow.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/examples/web/web-visual-regression.yaml (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/scripts/check-freshness.py (100%)
 rename docs/claude/skills/{outside-in-testing => qa-team}/tests/test_skill_examples.py (98%)
 create mode 100644 tests/skills/test_qa_team_skill.py

diff --git a/.claude/profiles/coding.yaml b/.claude/profiles/coding.yaml
index ff67611fb..567953969 100644
--- a/.claude/profiles/coding.yaml
+++ b/.claude/profiles/coding.yaml
@@ -42,6 +42,7 @@ components:
       - "creative"
       - "research"
     include:
+      - "qa-team"
       - "outside-in-testing"
       - "design-patterns-expert"
 
diff --git a/.claude/skills/e2e-outside-in-test-generator/SKILL.md b/.claude/skills/e2e-outside-in-test-generator/SKILL.md
index 7204ab786..1f9a5893f 100644
--- a/.claude/skills/e2e-outside-in-test-generator/SKILL.md
+++ b/.claude/skills/e2e-outside-in-test-generator/SKILL.md
@@ -21,7 +21,7 @@ requires: []
 invokes:
   - test-gap-analyzer (test coverage analysis)
   - shadow-testing (parallel test execution)
-  - outside-in-testing (methodology validation)
+  - qa-team (primary methodology validation; outside-in-testing remains an alias)
 output_location: e2e/
 ---
 
@@ -614,7 +614,7 @@ The skill:
 3. Runs tests against both environments
 4. Reports discrepancies
 
-**With outside-in-testing methodology:**
+**With qa-team methodology (formerly outside-in-testing):**
 
 The skill inherently follows outside-in testing:
 
diff --git a/.claude/skills/outside-in-testing/README.md b/.claude/skills/outside-in-testing/README.md
deleted file mode 100644
index 9eb17ee84..000000000
--- a/.claude/skills/outside-in-testing/README.md
+++ /dev/null
@@ -1,765 +0,0 @@
-# Outside-In Testing Skill
-
-## Overview
-
-The Outside-In Testing Skill helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate.
-
-**Key Benefits**:
-
-- Tests survive refactoring (implementation changes don't break tests)
-- Readable by non-developers (declarative YAML format)
-- Platform-agnostic (same structure for CLI, TUI, Web, Electron)
-- AI-powered execution (agents handle complex interactions)
-- Evidence-based validation (screenshots, logs, output captures)
-
-## What is Outside-In Testing?
-
-**Traditional Testing** (Inside-Out):
-
-```python
-# Knows internal implementation
-def test_user_service():
-    service = UserService()
-    user = service.create_user("test@example.com")
-    assert user.id is not None
-    assert user.email == "test@example.com"
-    assert user.created_at <= datetime.now()  # Internal state
-```
-
-**Outside-In Testing**:
-
-```yaml
-# Only knows external behavior
-scenario:
-  name: "User Registration"
-  type: web
-  steps:
-    - action: navigate
-      url: "/register"
-    - action: type
-      selector: "#email"
-      value: "test@example.com"
-    - action: click
-      selector: "button[type=submit]"
-    - action: verify_url
-      contains: "/welcome"
-```
-
-The outside-in test verifies the same functionality but:
-
-- Doesn't depend on internal classes (`UserService`)
-- Doesn't check internal state (`created_at`, `id`)
-- Tests from user's perspective (what they see and do)
-- Remains valid even if implementation completely changes
-
-## When to Use This Skill
-
-### Perfect Scenarios
-
-1. **Smoke Testing** - Quickly verify critical paths work
-2. **Acceptance Testing** - Validate features meet requirements
-3. **Regression Testing** - Ensure changes don't break existing behavior
-4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach
-5. **Refactoring Safety** - Tests protect behavior during rewrites
-6. **Documentation as Tests** - YAML doubles as executable specifications
-
-### Complementary to Unit Tests
-
-Outside-in tests work best alongside unit tests:
-
-- **Unit Tests** (60%): Internal logic, edge cases, error handling
-- **Integration Tests** (30%): Component interactions, API contracts
-- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths
-
-Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation.
-
-## Quick Start
-
-### 1. Install Framework
-
-**Option A: From GitHub (Recommended - Latest)**
-
-```bash
-# Install globally
-npm install -g github:rysweet/gadugi-agentic-test
-
-# Or use with npx
-npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml
-
-# Or clone and build
-git clone https://github.com/rysweet/gadugi-agentic-test
-cd gadugi-agentic-test
-npm install
-npm run build
-node dist/cli.js run scenarios/your-test.yaml
-```
-
-**Option B: From npm (when published)**
-
-```bash
-npm install -g gadugi-agentic-test
-gadugi-test run test.yaml
-```
-
-**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below.
-
-### 2. Create Your First Test
-
-Save as `test-hello.yaml`:
-
-```yaml
-scenario:
-  name: "Hello World Test"
-  description: "Verify application prints greeting"
-  type: cli
-
-  steps:
-    - action: launch
-      target: "./hello-world"
-
-    - action: verify_output
-      contains: "Hello, World!"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-### 3. Run the Test
-
-**If installed globally**:
-
-```bash
-gadugi-test run test-hello.yaml
-```
-
-**If using from source**:
-
-```bash
-cd /path/to/gadugi-agentic-test
-node dist/cli.js run /path/to/test-hello.yaml
-```
-
-**Run all tests in directory**:
-
-```bash
-node dist/cli.js run -d ./my-test-scenarios
-```
-
-### 4. Review Results
-
-The framework generates evidence in `./evidence/`:
-
-- Execution logs
-- Output captures
-- Screenshots (for TUI/Web/Electron)
-- Timing data
-- HTML report
-
-## Supported Application Types
-
-### CLI (Command-Line Interface)
-
-Test command-line tools, scripts, and utilities:
-
-```yaml
-scenario:
-  name: "Git Status Test"
-  type: cli
-  steps:
-    - action: launch
-      target: "git"
-      args: ["status"]
-    - action: verify_output
-      contains: "On branch"
-```
-
-**Common Use Cases**:
-
-- Package managers (npm, pip, cargo)
-- Build tools (make, gradle, webpack)
-- DevOps tools (docker, kubectl, terraform)
-- Custom CLI applications
-
-### TUI (Terminal User Interface)
-
-Test interactive terminal applications:
-
-```yaml
-scenario:
-  name: "TUI Navigation"
-  type: tui
-  steps:
-    - action: launch
-      target: "./file-manager"
-    - action: send_keypress
-      value: "down"
-      times: 3
-    - action: verify_screen
-      contains: "> documents/"
-```
-
-**Common Use Cases**:
-
-- System monitors (htop, top)
-- Text editors (vim, nano)
-- File managers (ranger, midnight commander)
-- Custom TUI dashboards
-
-### Web Applications
-
-Test browser-based applications:
-
-```yaml
-scenario:
-  name: "Web Dashboard Test"
-  type: web
-  steps:
-    - action: navigate
-      url: "http://localhost:3000"
-    - action: verify_element
-      selector: "h1"
-      contains: "Dashboard"
-```
-
-**Common Use Cases**:
-
-- SPAs (React, Vue, Angular apps)
-- Admin panels
-- E-commerce sites
-- SaaS applications
-
-### Electron Applications
-
-Test desktop apps built with Electron:
-
-```yaml
-scenario:
-  name: "Desktop App Test"
-  type: electron
-  steps:
-    - action: launch
-      target: "./dist/my-app"
-    - action: verify_window
-      title: "My Application"
-```
-
-**Common Use Cases**:
-
-- Code editors (VS Code-like apps)
-- Chat applications (Slack, Discord clones)
-- Productivity tools
-- Custom desktop applications
-
-## Progressive Learning Path
-
-The skill teaches testing in three levels:
-
-### Level 1: Fundamentals (Start Here)
-
-- Basic test structure (YAML anatomy)
-- Single-action tests
-- Simple verification
-- Smoke tests
-
-**Examples**:
-
-- `examples/cli/calculator-basic.yaml`
-- `examples/tui/file-manager-navigation.yaml`
-- `examples/web/dashboard-smoke-test.yaml`
-- `examples/electron/single-window-basic.yaml`
-
-### Level 2: Intermediate
-
-- Multi-step workflows
-- Conditional logic
-- Error handling
-- Variables and templating
-
-**Examples**:
-
-- `examples/cli/cli-error-handling.yaml`
-- `examples/tui/tui-form-validation.yaml`
-- `examples/web/web-authentication-flow.yaml`
-- `examples/electron/multi-window-coordination.yaml`
-
-### Level 3: Advanced
-
-- Custom comprehension agents
-- Visual regression testing
-- Performance validation
-- IPC testing (Electron)
-
-**Examples**:
-
-- `examples/tui/tui-performance-monitoring.yaml`
-- `examples/electron/electron-ipc-testing.yaml`
-- `examples/custom-agents/custom-comprehension-agent.yaml`
-- `examples/custom-agents/custom-reporter-integration.yaml`
-
-## Example Library
-
-This skill includes **15 complete working examples**:
-
-### CLI (3 examples)
-
-- Basic calculator operations [Level 1]
-- Error handling and recovery [Level 2]
-- Interactive session management [Level 2]
-
-### TUI (3 examples)
-
-- File manager navigation [Level 1]
-- Form validation [Level 2]
-- Performance monitoring [Level 3]
-
-### Web (3 examples)
-
-- Dashboard smoke test [Level 1]
-- Authentication flow [Level 2]
-- Visual regression [Level 2]
-
-### Electron (4 examples)
-
-- Single window basics [Level 1]
-- Multi-window coordination [Level 2]
-- Menu interactions [Level 2]
-- IPC testing [Level 3]
-
-### Custom Agents (2 examples)
-
-- Domain-specific comprehension [Level 3]
-- Custom reporting [Level 3]
-
-All examples include:
-
-- Complete working YAML
-- Inline documentation
-- Expected output
-- Prerequisites
-- Level indicators
-
-## Using This Skill in Claude
-
-### Invoke the Skill
-
-```
-Claude, use the outside-in-testing skill to create a CLI test for my calculator app.
-
-Claude, use outside-in-testing to generate web tests for user login.
-
-Claude, create Electron tests using outside-in-testing for my desktop app.
-```
-
-### What You'll Receive
-
-1. **Complete YAML test scenario** matching your requirements
-2. **Inline comments** explaining each section
-3. **Best practices** applied (timeouts, waits, verification)
-4. **Appropriate complexity** (Level 1, 2, or 3 based on needs)
-5. **Instructions** for running the test
-
-### Example Interaction
-
-**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard"
-
-**Claude** (using this skill): Generates a complete Level 2 YAML scenario with:
-
-- Navigation to login page
-- Form filling (email, password)
-- Submit button click
-- URL verification (redirected to dashboard)
-- Element verification (user profile visible)
-- Screenshot capture
-- Proper timeouts and waits
-
-## Integration with Amplihack Philosophy
-
-This skill embodies amplihack's core principles:
-
-### Ruthless Simplicity
-
-- Declarative YAML over complex code
-- Minimal boilerplate
-- Focus on behavior, not implementation
-
-### Modular Design (Bricks & Studs)
-
-- Self-contained test scenarios
-- Clear action contracts
-- Composable test steps
-
-### Zero-BS Implementation
-
-- No stubs or placeholders
-- Every example is runnable
-- Clear, actionable error messages
-
-### Outside-In Thinking
-
-- User perspective first
-- Implementation-agnostic tests
-- Behavior-driven validation
-
-## Best Practices
-
-### 1. Start Simple
-
-Begin with basic smoke tests, then add complexity:
-
-```yaml
-# Level 1: Smoke test
-steps:
-  - action: launch
-    target: "./app"
-  - action: verify_output
-    contains: "Ready"
-
-# Level 2: Add interaction
-steps:
-  - action: launch
-    target: "./app"
-  - action: send_input
-    value: "command\n"
-  - action: verify_output
-    contains: "Success"
-```
-
-### 2. Use Descriptive Names
-
-```yaml
-# Good
-scenario:
-  name: "User Login - Valid Credentials"
-  description: "Verifies successful login with email and password"
-
-# Bad
-scenario:
-  name: "Test 1"
-```
-
-### 3. Verify Critical Paths Only
-
-Don't test every detail. Focus on user-facing behavior:
-
-```yaml
-# Good - User-visible behavior
-- action: verify_element
-  selector: ".welcome-message"
-  contains: "Welcome back"
-
-# Bad - Implementation detail
-- action: verify_element
-  selector: ".cache-status"
-  contains: "initialized"
-```
-
-### 4. Always Wait for Dynamic Content
-
-```yaml
-# Good - Wait before verification
-- action: click
-  selector: ".load-data"
-- action: wait_for_element
-  selector: ".data-table"
-  timeout: 10s
-- action: verify_element
-  selector: ".data-table"
-
-# Bad - May fail due to timing
-- action: click
-  selector: ".load-data"
-- action: verify_element
-  selector: ".data-table" # Might not exist yet!
-```
-
-### 5. Clean Up After Tests
-
-```yaml
-steps:
-  # Test steps...
-
-cleanup:
-  - action: delete_file
-    path: "./test-data.json"
-  - action: stop_application
-```
-
-## Troubleshooting
-
-### Installation Issues
-
-**Problem**: `@types/node-pty` not found error
-
-**Solution**: This was fixed in gadugi-agentic-test. If you see this:
-
-```bash
-# Update to latest version
-npm install -g github:rysweet/gadugi-agentic-test
-
-# Or if you cloned, pull latest:
-git pull origin main
-npm install
-npm run build
-```
-
-**Problem**: `tsc: command not found` when building
-
-**Solution**: TypeScript not installed
-
-```bash
-npm install  # Installs all dependencies including TypeScript
-npm run build  # Now will work
-```
-
-### Test Times Out
-
-**Problem**: Test exceeds timeout and fails
-
-**Solution**: Increase timeout for slow operations
-
-```yaml
-- action: wait_for_element
-  selector: ".slow-loading-data"
-  timeout: 30s # Generous timeout
-```
-
-### Scenario Format Issues
-
-**Problem**: "Scenario must have a name" error
-
-**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`:
-
-```yaml
-# WRONG (won't load)
-scenario:
-  name: "My Test"
-  steps: [...]
-
-# RIGHT
-name: "My Test"
-description: "What this tests"
-version: "1.0.0"
-config:
-  timeout: 120000
-steps: [...]
-```
-
-### Element Not Found
-
-**Problem**: Cannot find element to interact with
-
-**Solutions**:
-
-1. Use `wait_for_element` before interaction
-2. Verify selector is correct
-3. Check if element is in iframe
-
-```yaml
-- action: wait_for_element
-  selector: ".target"
-  timeout: 10s
-- action: click
-  selector: ".target"
-```
-
-### Flaky Tests in CI
-
-**Problem**: Tests pass locally but fail in CI
-
-**Solutions**:
-
-1. Add longer timeouts for CI environments
-2. Set explicit viewport sizes
-3. Wait for application readiness
-
-```yaml
-scenario:
-  environment:
-    viewport:
-      width: 1920
-      height: 1080
-
-  steps:
-    - action: wait_for_element
-      selector: ".app-ready"
-      timeout: 30s # Generous for CI
-```
-
-## Framework Version Check
-
-This skill embeds gadugi-agentic-test version **0.1.0**.
-
-To check for newer versions:
-
-```bash
-python scripts/check-freshness.py
-```
-
-The script compares the embedded version against the latest GitHub release and notifies you of new features.
-
-## Related Skills
-
-- **test-gap-analyzer**: Find untested code paths (unit test focus)
-- **philosophy-guardian**: Review test philosophy compliance
-- **pr-review-assistant**: Include tests in PR reviews
-- **module-spec-generator**: Generate specs with test scenarios
-
-## Resources
-
-### Documentation
-
-- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation
-- **Examples**: `examples/` - 15 complete working examples
-- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test
-
-### Getting Help
-
-- Review examples in `examples/` directory
-- Check `SKILL.md` for detailed explanations
-- See troubleshooting section in `SKILL.md`
-- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues
-
-## Quick Reference
-
-### Basic Test Template
-
-```yaml
-scenario:
-  name: "Test Name"
-  description: "What this verifies"
-  type: cli | tui | web | electron
-
-  prerequisites:
-    - "Condition 1"
-
-  steps:
-    - action: launch
-      target: "./app"
-
-    - action: verify_output
-      contains: "Expected"
-
-  cleanup:
-    - action: stop_application
-```
-
-### Common Actions
-
-**CLI**:
-
-- `launch` - Start application
-- `send_input` - Send text
-- `verify_output` - Check output
-- `verify_exit_code` - Validate exit code
-
-**TUI**:
-
-- `send_keypress` - Send keys
-- `verify_screen` - Check screen
-- `capture_screenshot` - Save screenshot
-
-**Web**:
-
-- `navigate` - Go to URL
-- `click` - Click element
-- `type` - Type text
-- `verify_element` - Check element
-
-**Electron**:
-
-- `window_action` - Control windows
-- `menu_click` - Click menus
-- `dialog_action` - Handle dialogs
-- All web actions
-
-## Success Stories
-
-Outside-in testing shines when:
-
-1. **Refactoring**: Change implementation without updating tests
-2. **Collaboration**: Non-developers can read and understand tests
-3. **Documentation**: Tests serve as executable specifications
-4. **Regression Prevention**: Catch breaking changes in critical flows
-5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron
-
-Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen.
-
----
-
-**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation.
-
-## Real-World Example: Testing amplihack Guide Agent
-
-Based on actual testing of amplihack's guide agent, here's a complete working example:
-
-### Scenario: Naive Student Learning Flow
-
-```yaml
-name: "Guide Agent - Beginner First Question"
-description: "Test how guide responds to complete beginner"
-version: "1.0.0"
-
-config:
-  timeout: 180000 # 3 minutes for AI response
-  retries: 1
-  parallel: false
-
-agents:
-  - name: "student-cli"
-    type: "system"
-    config:
-      shell: "bash"
-      cwd: "/tmp/test-student"
-      timeout: 180000
-      capture_output: true
-
-steps:
-  - name: "Student asks: What is amplihack?"
-    agent: "student-cli"
-    action: "execute_command"
-    params:
-      command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100'
-    expect:
-      exit_code: 0
-      stdout_contains:
-        - "amplihack"
-        - "AI"
-    timeout: 180000
-
-  - name: "Verify guide gives immediate action"
-    agent: "student-cli"
-    action: "execute_command"
-    params:
-      command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md"
-    expect:
-      exit_code: 0
-    timeout: 5000
-
-metadata:
-  tags: ["guide-agent", "beginner", "real-world"]
-  priority: "high"
-```
-
-### What This Tests
-
-1. **Installation via uvx** - Tests users can run without installing
-2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works
-3. **Beginner-friendly response** - Checks for immediate actionable command
-4. **Interactive elements** - Looks for TRY IT prompts
-
-### Running This Test
-
-```bash
-cd gadugi-agentic-test
-node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose
-```
-
-### What We Learned
-
-**From testing amplihack guide agent**:
-
-- Long-running AI commands need 180s+ timeouts
-- Testing in clean `/tmp` directory avoids state pollution
-- Combining `uvx --from git+...` with gadugi tests unreleased branches
-- Checking file content (guide.md) verifies features beyond just output
-- Real-world tests exposed gaps (guide showing bash commands in REPL context)
diff --git a/.claude/skills/outside-in-testing/README.md b/.claude/skills/outside-in-testing/README.md
new file mode 120000
index 000000000..40402e77f
--- /dev/null
+++ b/.claude/skills/outside-in-testing/README.md
@@ -0,0 +1 @@
+../qa-team/README.md
\ No newline at end of file
diff --git a/.claude/skills/outside-in-testing/SKILL.md b/.claude/skills/outside-in-testing/SKILL.md
index 91526566b..891b6fb02 100644
--- a/.claude/skills/outside-in-testing/SKILL.md
+++ b/.claude/skills/outside-in-testing/SKILL.md
@@ -1,2045 +1,16 @@
 ---
 name: outside-in-testing
 description: |
-  Generates agentic outside-in tests using gadugi-agentic-test framework for CLI, TUI, Web, and Electron apps.
-  Use when you need behavior-driven tests that verify external interfaces without internal implementation knowledge.
-  Creates YAML test scenarios that AI agents execute, observe, and validate against expected outcomes.
-  Supports progressive complexity from simple smoke tests to advanced multi-step workflows.
-version: 1.0.0
-embedded_framework_version: 0.1.0
-github_repo: https://github.com/rysweet/gadugi-agentic-test
-issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
+  Deprecated compatibility alias for qa-team.
+  Use when existing recipes or agents still invoke outside-in-testing by name.
+  Redirects future work to the qa-team skill for outside-in validation and parity loops.
+version: 1.1.0
 ---
 
-# Outside-In Testing Skill
+# outside-in-testing (Alias)
 
-## Purpose [LEVEL 1]
+`outside-in-testing` is now a compatibility alias for `qa-team`.
 
-This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate.
+Use `qa-team` for all new work. This alias remains so existing workflows, recipes, and skills that still invoke `outside-in-testing` continue to resolve cleanly while the rename propagates.
 
-**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details.
-
-## When to Use This Skill [LEVEL 1]
-
-### Perfect For
-
-- **Smoke Tests**: Quick validation that critical user flows work
-- **Behavior-Driven Testing**: Verify features from user perspective
-- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron
-- **Refactoring Safety**: Tests remain valid when implementation changes
-- **AI-Powered Testing**: Let agents handle complex interactions
-- **Documentation as Tests**: YAML scenarios double as executable specs
-
-### Use This Skill When
-
-- Starting a new project and defining expected behaviors
-- Refactoring code and need tests that won't break with internal changes
-- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps)
-- Writing acceptance criteria that can be automatically verified
-- Need tests that non-developers can read and understand
-- Want to catch regressions in critical user workflows
-- Testing complex multi-step interactions
-
-### Don't Use This Skill When
-
-- Need unit tests for internal functions (use test-gap-analyzer instead)
-- Testing performance or load characteristics
-- Need precise timing or concurrency control
-- Testing non-interactive batch processes
-- Implementation details matter more than behavior
-
-## Core Concepts [LEVEL 1]
-
-### Outside-In Testing Philosophy
-
-**Traditional Inside-Out Testing**:
-
-```python
-# Tightly coupled to implementation
-def test_calculator_add():
-    calc = Calculator()
-    result = calc.add(2, 3)
-    assert result == 5
-    assert calc.history == [(2, 3, 5)]  # Knows internal state
-```
-
-**Agentic Outside-In Testing**:
-
-```yaml
-# Implementation-agnostic behavior verification
-scenario:
-  name: "Calculator Addition"
-  steps:
-    - action: launch
-      target: "./calculator"
-    - action: send_input
-      value: "add 2 3"
-    - action: verify_output
-      contains: "Result: 5"
-```
-
-**Benefits**:
-
-- Tests survive refactoring (internal changes don't break tests)
-- Readable by non-developers (YAML is declarative)
-- Platform-agnostic (same structure for CLI/TUI/Web/Electron)
-- AI agents handle complexity (navigation, timing, screenshots)
-
-### The Gadugi Agentic Test Framework [LEVEL 2]
-
-Gadugi-agentic-test is a Python framework that:
-
-1. **Parses YAML test scenarios** with declarative steps
-2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents)
-3. **Executes actions** (launch, input, click, wait, verify)
-4. **Collects evidence** (screenshots, logs, output captures)
-5. **Validates outcomes** against expected results
-6. **Generates reports** with evidence trails
-
-**Architecture**:
-
-```
-YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine
-                                          ↓
-                     [CLI Agent, TUI Agent, Web Agent, Electron Agent]
-                                          ↓
-                           Observers → Comprehension Agent
-                                          ↓
-                                   Evidence Report
-```
-
-### Progressive Disclosure Levels [LEVEL 1]
-
-This skill teaches testing in three levels:
-
-- **Level 1: Fundamentals** - Basic single-action tests, simple verification
-- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling
-- **Level 3: Advanced** - Custom agents, visual regression, performance validation
-
-Each example is marked with its level. Start at Level 1 and progress as needed.
-
-## Quick Start [LEVEL 1]
-
-### Installation
-
-**Prerequisites (for native module compilation):**
-
-```bash
-# macOS
-xcode-select --install
-
-# Ubuntu/Debian
-sudo apt-get install -y build-essential python3
-
-# Windows: Install Visual Studio Build Tools with "Desktop development with C++"
-```
-
-**Install the framework:**
-
-```bash
-# Install globally for CLI access
-npm install -g @gadugi/agentic-test
-
-# Or install locally in your project
-npm install @gadugi/agentic-test
-
-# Verify installation
-gadugi-test --version
-```
-
-### Your First Test (CLI Example)
-
-Create `test-hello.yaml`:
-
-```yaml
-scenario:
-  name: "Hello World CLI Test"
-  description: "Verify CLI prints greeting"
-  type: cli
-
-  prerequisites:
-    - "./hello-world executable exists"
-
-  steps:
-    - action: launch
-      target: "./hello-world"
-
-    - action: verify_output
-      contains: "Hello, World!"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-Run the test:
-
-```bash
-gadugi-test run test-hello.yaml
-```
-
-Output:
-
-```
-✓ Scenario: Hello World CLI Test
-  ✓ Step 1: Launched ./hello-world
-  ✓ Step 2: Output contains "Hello, World!"
-  ✓ Step 3: Exit code is 0
-
-PASSED (3/3 steps successful)
-Evidence saved to: ./evidence/test-hello-20250116-093045/
-```
-
-### Understanding the YAML Structure [LEVEL 1]
-
-Every test scenario has this structure:
-
-```yaml
-scenario:
-  name: "Descriptive test name"
-  description: "What this test verifies"
-  type: cli | tui | web | electron
-
-  # Optional metadata
-  tags: [smoke, critical, auth]
-  timeout: 30s
-
-  # What must be true before test runs
-  prerequisites:
-    - "Condition 1"
-    - "Condition 2"
-
-  # The test steps (executed sequentially)
-  steps:
-    - action: action_name
-      parameter1: value1
-      parameter2: value2
-
-    - action: verify_something
-      expected: value
-
-  # Optional cleanup
-  cleanup:
-    - action: stop_application
-```
-
-## Application Types and Agents [LEVEL 2]
-
-### CLI Applications [LEVEL 1]
-
-**Use Case**: Command-line tools, scripts, build tools, package managers
-
-**Supported Actions**:
-
-- `launch` - Start the CLI program
-- `send_input` - Send text or commands via stdin
-- `send_signal` - Send OS signals (SIGINT, SIGTERM)
-- `wait_for_output` - Wait for specific text in stdout/stderr
-- `verify_output` - Check stdout/stderr contains/matches expected text
-- `verify_exit_code` - Validate process exit code
-- `capture_output` - Save output for later verification
-
-**Example** (see `examples/cli/calculator-basic.yaml`):
-
-```yaml
-scenario:
-  name: "CLI Calculator Basic Operations"
-  type: cli
-
-  steps:
-    - action: launch
-      target: "./calculator"
-      args: ["--mode", "interactive"]
-
-    - action: send_input
-      value: "add 5 3\n"
-
-    - action: verify_output
-      contains: "Result: 8"
-      timeout: 2s
-
-    - action: send_input
-      value: "multiply 4 7\n"
-
-    - action: verify_output
-      contains: "Result: 28"
-
-    - action: send_input
-      value: "exit\n"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-### TUI Applications [LEVEL 1]
-
-**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs)
-
-**Supported Actions**:
-
-- `launch` - Start TUI application
-- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.)
-- `wait_for_screen` - Wait for specific text to appear on screen
-- `verify_screen` - Check screen contents match expectations
-- `capture_screenshot` - Save terminal screenshot (ANSI art)
-- `navigate_menu` - Navigate menu structures
-- `fill_form` - Fill TUI form fields
-
-**Example** (see `examples/tui/file-manager-navigation.yaml`):
-
-```yaml
-scenario:
-  name: "TUI File Manager Navigation"
-  type: tui
-
-  steps:
-    - action: launch
-      target: "./file-manager"
-
-    - action: wait_for_screen
-      contains: "File Manager v1.0"
-      timeout: 3s
-
-    - action: send_keypress
-      value: "down"
-      times: 3
-
-    - action: verify_screen
-      contains: "> documents/"
-      description: "Third item should be selected"
-
-    - action: send_keypress
-      value: "enter"
-
-    - action: wait_for_screen
-      contains: "documents/"
-      timeout: 2s
-
-    - action: capture_screenshot
-      save_as: "documents-view.txt"
-```
-
-### Web Applications [LEVEL 1]
-
-**Use Case**: Web apps, dashboards, SPAs, admin panels
-
-**Supported Actions**:
-
-- `navigate` - Go to URL
-- `click` - Click element by selector or text
-- `type` - Type into input fields
-- `wait_for_element` - Wait for element to appear
-- `verify_element` - Check element exists/contains text
-- `verify_url` - Validate current URL
-- `screenshot` - Capture browser screenshot
-- `scroll` - Scroll page or element
-
-**Example** (see `examples/web/dashboard-smoke-test.yaml`):
-
-```yaml
-scenario:
-  name: "Dashboard Smoke Test"
-  type: web
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000/dashboard"
-
-    - action: wait_for_element
-      selector: "h1.dashboard-title"
-      timeout: 5s
-
-    - action: verify_element
-      selector: "h1.dashboard-title"
-      contains: "Analytics Dashboard"
-
-    - action: verify_element
-      selector: ".widget-stats"
-      count: 4
-      description: "Should have 4 stat widgets"
-
-    - action: click
-      selector: "button.refresh-data"
-
-    - action: wait_for_element
-      selector: ".loading-spinner"
-      disappears: true
-      timeout: 10s
-
-    - action: screenshot
-      save_as: "dashboard-loaded.png"
-```
-
-### Electron Applications [LEVEL 2]
-
-**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones)
-
-**Supported Actions**:
-
-- `launch` - Start Electron app
-- `window_action` - Interact with windows (focus, minimize, close)
-- `menu_click` - Click application menu items
-- `dialog_action` - Handle native dialogs (open file, save, confirm)
-- `ipc_send` - Send IPC message to main process
-- `verify_window` - Check window state/properties
-- All web actions (since Electron uses Chromium)
-
-**Example** (see `examples/electron/single-window-basic.yaml`):
-
-```yaml
-scenario:
-  name: "Electron Single Window Test"
-  type: electron
-
-  steps:
-    - action: launch
-      target: "./dist/my-app"
-      wait_for_window: true
-      timeout: 10s
-
-    - action: verify_window
-      title: "My Application"
-      visible: true
-
-    - action: menu_click
-      path: ["File", "New Document"]
-
-    - action: wait_for_element
-      selector: ".document-editor"
-
-    - action: type
-      selector: ".document-editor"
-      value: "Hello from test"
-
-    - action: menu_click
-      path: ["File", "Save"]
-
-    - action: dialog_action
-      type: save_file
-      filename: "test-document.txt"
-
-    - action: verify_window
-      title_contains: "test-document.txt"
-```
-
-## Test Scenario Anatomy [LEVEL 2]
-
-### Metadata Section
-
-```yaml
-scenario:
-  name: "Clear descriptive name"
-  description: "Detailed explanation of what this test verifies"
-  type: cli | tui | web | electron
-
-  # Optional fields
-  tags: [smoke, regression, auth, payment]
-  priority: high | medium | low
-  timeout: 60s # Overall scenario timeout
-  retry_on_failure: 2 # Retry count
-
-  # Environment requirements
-  environment:
-    variables:
-      API_URL: "http://localhost:8080"
-      DEBUG: "true"
-    files:
-      - "./config.json must exist"
-```
-
-### Prerequisites
-
-Prerequisites are conditions that must be true before the test runs. The framework validates these before execution.
-
-```yaml
-prerequisites:
-  - "./application binary exists"
-  - "Port 8080 is available"
-  - "Database is running"
-  - "User account test@example.com exists"
-  - "File ./test-data.json exists"
-```
-
-If prerequisites fail, the test is skipped (not failed).
-
-### Steps
-
-Steps execute sequentially. Each step has:
-
-- **action**: Required - the action to perform
-- **Parameters**: Action-specific parameters
-- **description**: Optional - human-readable explanation
-- **timeout**: Optional - step-specific timeout
-- **continue_on_failure**: Optional - don't fail scenario if step fails
-
-```yaml
-steps:
-  # Simple action
-  - action: launch
-    target: "./app"
-
-  # Action with multiple parameters
-  - action: verify_output
-    contains: "Success"
-    timeout: 5s
-    description: "App should print success message"
-
-  # Continue even if this fails
-  - action: click
-    selector: ".optional-button"
-    continue_on_failure: true
-```
-
-### Verification Actions [LEVEL 1]
-
-Verification actions check expected outcomes. They fail the test if expectations aren't met.
-
-**Common Verifications**:
-
-```yaml
-# CLI: Check output contains text
-- action: verify_output
-  contains: "Expected text"
-
-# CLI: Check output matches regex
-- action: verify_output
-  matches: "Result: \\d+"
-
-# CLI: Check exit code
-- action: verify_exit_code
-  expected: 0
-
-# Web/TUI: Check element exists
-- action: verify_element
-  selector: ".success-message"
-
-# Web/TUI: Check element contains text
-- action: verify_element
-  selector: "h1"
-  contains: "Welcome"
-
-# Web: Check URL
-- action: verify_url
-  equals: "http://localhost:3000/dashboard"
-
-# Web: Check element count
-- action: verify_element
-  selector: ".list-item"
-  count: 5
-
-# Electron: Check window state
-- action: verify_window
-  title: "My App"
-  visible: true
-  focused: true
-```
-
-### Cleanup Section
-
-Cleanup runs after all steps complete (success or failure). Use for teardown actions.
-
-```yaml
-cleanup:
-  - action: stop_application
-    force: true
-
-  - action: delete_file
-    path: "./temp-test-data.json"
-
-  - action: reset_database
-    connection: "test_db"
-```
-
-## Advanced Patterns [LEVEL 2]
-
-### Conditional Logic
-
-Execute steps based on conditions:
-
-```yaml
-steps:
-  - action: launch
-    target: "./app"
-
-  - action: verify_output
-    contains: "Login required"
-    id: login_check
-
-  # Only run if login_check passed
-  - action: send_input
-    value: "login admin password123\n"
-    condition: login_check.passed
-```
-
-### Variables and Templating [LEVEL 2]
-
-Define variables and use them throughout the scenario:
-
-```yaml
-scenario:
-  name: "Test with Variables"
-  type: cli
-
-  variables:
-    username: "testuser"
-    api_url: "http://localhost:8080"
-
-  steps:
-    - action: launch
-      target: "./app"
-      args: ["--api", "${api_url}"]
-
-    - action: send_input
-      value: "login ${username}\n"
-
-    - action: verify_output
-      contains: "Welcome, ${username}!"
-```
-
-### Loops and Repetition [LEVEL 2]
-
-Repeat actions multiple times:
-
-```yaml
-steps:
-  - action: launch
-    target: "./app"
-
-  # Repeat action N times
-  - action: send_keypress
-    value: "down"
-    times: 5
-
-  # Loop over list
-  - action: send_input
-    value: "${item}\n"
-    for_each:
-      - "apple"
-      - "banana"
-      - "cherry"
-```
-
-### Error Handling [LEVEL 2]
-
-Handle expected errors gracefully:
-
-```yaml
-steps:
-  - action: send_input
-    value: "invalid command\n"
-
-  # Verify error message appears
-  - action: verify_output
-    contains: "Error: Unknown command"
-    expected_failure: true
-
-  # App should still be running
-  - action: verify_running
-    expected: true
-```
-
-### Multi-Step Workflows [LEVEL 2]
-
-Complex scenarios with multiple phases:
-
-```yaml
-scenario:
-  name: "E-commerce Purchase Flow"
-  type: web
-
-  steps:
-    # Phase 1: Authentication
-    - action: navigate
-      url: "http://localhost:3000/login"
-
-    - action: type
-      selector: "#username"
-      value: "test@example.com"
-
-    - action: type
-      selector: "#password"
-      value: "password123"
-
-    - action: click
-      selector: "button[type=submit]"
-
-    - action: wait_for_url
-      contains: "/dashboard"
-
-    # Phase 2: Product Selection
-    - action: navigate
-      url: "http://localhost:3000/products"
-
-    - action: click
-      text: "Add to Cart"
-      nth: 1
-
-    - action: verify_element
-      selector: ".cart-badge"
-      contains: "1"
-
-    # Phase 3: Checkout
-    - action: click
-      selector: ".cart-icon"
-
-    - action: click
-      text: "Proceed to Checkout"
-
-    - action: fill_form
-      fields:
-        "#shipping-address": "123 Test St"
-        "#city": "Testville"
-        "#zip": "12345"
-
-    - action: click
-      selector: "#place-order"
-
-    - action: wait_for_element
-      selector: ".order-confirmation"
-      timeout: 10s
-
-    - action: verify_element
-      selector: ".order-number"
-      exists: true
-```
-
-## Level 3: Advanced Topics [LEVEL 3]
-
-### Custom Comprehension Agents
-
-The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic.
-
-**Default Comprehension Agent**:
-
-- Observes raw output (text, HTML, screenshots)
-- Applies general reasoning to verify expectations
-- Returns pass/fail with explanation
-
-**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`):
-
-```yaml
-scenario:
-  name: "Financial Dashboard Test with Custom Agent"
-  type: web
-
-  # Define custom comprehension logic
-  comprehension_agent:
-    model: "gpt-4"
-    system_prompt: |
-      You are a financial data validator. When verifying dashboard content:
-      1. All monetary values must use proper formatting ($1,234.56)
-      2. Percentages must include % symbol
-      3. Dates must be in MM/DD/YYYY format
-      4. Negative values must be red
-      5. Chart data must be logically consistent
-
-      Be strict about formatting and data consistency.
-
-    examples:
-      - input: "Total Revenue: 45000"
-        output: "FAIL - Missing currency symbol and comma separator"
-      - input: "Total Revenue: $45,000.00"
-        output: "PASS - Correctly formatted"
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000/financial-dashboard"
-
-    - action: verify_element
-      selector: ".revenue-widget"
-      use_custom_comprehension: true
-      description: "Revenue should be properly formatted"
-```
-
-### Visual Regression Testing [LEVEL 3]
-
-Compare screenshots against baseline images:
-
-```yaml
-scenario:
-  name: "Visual Regression - Homepage"
-  type: web
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000"
-
-    - action: wait_for_element
-      selector: ".page-loaded"
-
-    - action: screenshot
-      save_as: "homepage.png"
-
-    - action: visual_compare
-      screenshot: "homepage.png"
-      baseline: "./baselines/homepage-baseline.png"
-      threshold: 0.05 # 5% difference allowed
-      highlight_differences: true
-```
-
-### Performance Validation [LEVEL 3]
-
-Measure and validate performance metrics:
-
-```yaml
-scenario:
-  name: "Performance - Dashboard Load Time"
-  type: web
-
-  performance:
-    metrics:
-      - page_load_time
-      - first_contentful_paint
-      - time_to_interactive
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000/dashboard"
-      measure_timing: true
-
-    - action: verify_performance
-      metric: page_load_time
-      less_than: 3000 # 3 seconds
-
-    - action: verify_performance
-      metric: first_contentful_paint
-      less_than: 1500 # 1.5 seconds
-```
-
-### Multi-Window Coordination (Electron) [LEVEL 3]
-
-Test applications with multiple windows:
-
-```yaml
-scenario:
-  name: "Multi-Window Chat Application"
-  type: electron
-
-  steps:
-    - action: launch
-      target: "./chat-app"
-
-    - action: menu_click
-      path: ["Window", "New Chat"]
-
-    - action: verify_window
-      count: 2
-
-    - action: window_action
-      window: 1
-      action: focus
-
-    - action: type
-      selector: ".message-input"
-      value: "Hello from window 1"
-
-    - action: click
-      selector: ".send-button"
-
-    - action: window_action
-      window: 2
-      action: focus
-
-    - action: wait_for_element
-      selector: ".message"
-      contains: "Hello from window 1"
-      timeout: 5s
-```
-
-### IPC Testing (Electron) [LEVEL 3]
-
-Test Inter-Process Communication between renderer and main:
-
-```yaml
-scenario:
-  name: "Electron IPC Communication"
-  type: electron
-
-  steps:
-    - action: launch
-      target: "./my-app"
-
-    - action: ipc_send
-      channel: "get-system-info"
-
-    - action: ipc_expect
-      channel: "system-info-reply"
-      timeout: 3s
-
-    - action: verify_ipc_payload
-      contains:
-        platform: "darwin"
-        arch: "x64"
-```
-
-### Custom Reporters [LEVEL 3]
-
-Generate custom test reports:
-
-```yaml
-scenario:
-  name: "Test with Custom Reporting"
-  type: cli
-
-  reporting:
-    format: custom
-    template: "./report-template.html"
-    include:
-      - screenshots
-      - logs
-      - timing_data
-      - video_recording
-
-    email:
-      enabled: true
-      recipients: ["team@example.com"]
-      on_failure_only: true
-
-  steps:
-    # ... test steps ...
-```
-
-## Framework Integration [LEVEL 2]
-
-### Running Tests
-
-**Single test**:
-
-```bash
-gadugi-test run test-scenario.yaml
-```
-
-**Multiple tests**:
-
-```bash
-gadugi-test run tests/*.yaml
-```
-
-**With options**:
-
-```bash
-gadugi-test run test.yaml \
-  --verbose \
-  --evidence-dir ./test-evidence \
-  --retry 2 \
-  --timeout 60s
-```
-
-### CI/CD Integration
-
-**GitHub Actions** (`.github/workflows/agentic-tests.yml`):
-
-```yaml
-name: Agentic Tests
-
-on: [push, pull_request]
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Install gadugi-agentic-test
-        run: npm install -g @gadugi/agentic-test
-
-      - name: Run tests
-        run: gadugi-test run tests/agentic/*.yaml
-
-      - name: Upload evidence
-        if: always()
-        uses: actions/upload-artifact@v3
-        with:
-          name: test-evidence
-          path: ./evidence/
-```
-
-### Evidence Collection
-
-The framework automatically collects evidence for debugging:
-
-```
-evidence/
-  scenario-name-20250116-093045/
-    ├── scenario.yaml          # Original test scenario
-    ├── execution-log.json     # Detailed execution log
-    ├── screenshots/           # All captured screenshots
-    │   ├── step-1.png
-    │   ├── step-3.png
-    │   └── step-5.png
-    ├── output-captures/       # CLI/TUI output
-    │   ├── stdout.txt
-    │   └── stderr.txt
-    ├── timing.json            # Performance metrics
-    └── report.html            # Human-readable report
-```
-
-## Best Practices [LEVEL 2]
-
-### 1. Start Simple, Add Complexity
-
-Begin with basic smoke tests, then add detail:
-
-```yaml
-# Level 1: Basic smoke test
-steps:
-  - action: launch
-    target: "./app"
-  - action: verify_output
-    contains: "Ready"
-
-# Level 2: Add interaction
-steps:
-  - action: launch
-    target: "./app"
-  - action: send_input
-    value: "command\n"
-  - action: verify_output
-    contains: "Success"
-
-# Level 3: Add error handling and edge cases
-steps:
-  - action: launch
-    target: "./app"
-  - action: send_input
-    value: "invalid\n"
-  - action: verify_output
-    contains: "Error"
-  - action: send_input
-    value: "command\n"
-  - action: verify_output
-    contains: "Success"
-```
-
-### 2. Use Descriptive Names and Descriptions
-
-```yaml
-# Bad
-scenario:
-  name: "Test 1"
-  steps:
-    - action: click
-      selector: "button"
-
-# Good
-scenario:
-  name: "User Login Flow - Valid Credentials"
-  description: "Verifies user can log in with valid email and password"
-  steps:
-    - action: click
-      selector: "button[type=submit]"
-      description: "Submit login form"
-```
-
-### 3. Verify Critical Paths Only
-
-Don't test every tiny detail. Focus on user-facing behavior:
-
-```yaml
-# Bad - Tests implementation details
-- action: verify_element
-  selector: ".internal-cache-status"
-  contains: "initialized"
-
-# Good - Tests user-visible behavior
-- action: verify_element
-  selector: ".welcome-message"
-  contains: "Welcome back"
-```
-
-### 4. Use Prerequisites for Test Dependencies
-
-```yaml
-scenario:
-  name: "User Profile Edit"
-
-  prerequisites:
-    - "User testuser@example.com exists"
-    - "User is logged in"
-    - "Database is seeded with test data"
-
-  steps:
-    # Test assumes prerequisites are met
-    - action: navigate
-      url: "/profile"
-```
-
-### 5. Keep Tests Independent
-
-Each test should set up its own state and clean up:
-
-```yaml
-scenario:
-  name: "Create Document"
-
-  steps:
-    # Create test user (don't assume exists)
-    - action: api_call
-      endpoint: "/api/users"
-      method: POST
-      data: { email: "test@example.com" }
-
-    # Run test
-    - action: navigate
-      url: "/documents/new"
-    # ... test steps ...
-
-  cleanup:
-    # Remove test user
-    - action: api_call
-      endpoint: "/api/users/test@example.com"
-      method: DELETE
-```
-
-### 6. Use Tags for Organization
-
-```yaml
-scenario:
-  name: "Critical Payment Flow"
-  tags: [smoke, critical, payment, e2e]
-  # Run with: gadugi-test run --tags critical
-```
-
-### 7. Add Timeouts Strategically
-
-```yaml
-steps:
-  # Quick operations - short timeout
-  - action: click
-    selector: "button"
-    timeout: 2s
-
-  # Network operations - longer timeout
-  - action: wait_for_element
-    selector: ".data-loaded"
-    timeout: 10s
-
-  # Complex operations - generous timeout
-  - action: verify_element
-    selector: ".report-generated"
-    timeout: 60s
-```
-
-## Testing Strategies [LEVEL 2]
-
-### Smoke Tests
-
-Minimal tests that verify critical functionality works:
-
-```yaml
-scenario:
-  name: "Smoke Test - Application Starts"
-  tags: [smoke]
-
-  steps:
-    - action: launch
-      target: "./app"
-    - action: verify_output
-      contains: "Ready"
-      timeout: 5s
-```
-
-Run before every commit: `gadugi-test run --tags smoke`
-
-### Happy Path Tests
-
-Test the ideal user journey:
-
-```yaml
-scenario:
-  name: "Happy Path - User Registration"
-
-  steps:
-    - action: navigate
-      url: "/register"
-    - action: type
-      selector: "#email"
-      value: "newuser@example.com"
-    - action: type
-      selector: "#password"
-      value: "SecurePass123!"
-    - action: click
-      selector: "button[type=submit]"
-    - action: wait_for_url
-      contains: "/welcome"
-```
-
-### Error Path Tests
-
-Verify error handling:
-
-```yaml
-scenario:
-  name: "Error Path - Invalid Login"
-
-  steps:
-    - action: navigate
-      url: "/login"
-    - action: type
-      selector: "#email"
-      value: "invalid@example.com"
-    - action: type
-      selector: "#password"
-      value: "wrongpassword"
-    - action: click
-      selector: "button[type=submit]"
-    - action: verify_element
-      selector: ".error-message"
-      contains: "Invalid credentials"
-```
-
-### Regression Tests
-
-Prevent bugs from reappearing:
-
-```yaml
-scenario:
-  name: "Regression - Issue #123 Password Reset"
-  tags: [regression, bug-123]
-  description: "Verifies password reset email is sent (was broken in v1.2)"
-
-  steps:
-    - action: navigate
-      url: "/forgot-password"
-    - action: type
-      selector: "#email"
-      value: "user@example.com"
-    - action: click
-      selector: "button[type=submit]"
-    - action: verify_element
-      selector: ".success-message"
-      contains: "Reset email sent"
-```
-
-## Philosophy Alignment [LEVEL 2]
-
-This skill follows amplihack's core principles:
-
-### Ruthless Simplicity
-
-- **YAML over code**: Declarative tests are simpler than programmatic tests
-- **No implementation details**: Tests describe WHAT, not HOW
-- **Minimal boilerplate**: Each test is focused and concise
-
-### Modular Design (Bricks & Studs)
-
-- **Self-contained scenarios**: Each YAML file is independent
-- **Clear contracts**: Steps have well-defined inputs/outputs
-- **Composable actions**: Reuse actions across different test types
-
-### Zero-BS Implementation
-
-- **No stubs**: Every example in this skill is a complete, runnable test
-- **Working defaults**: Tests run with minimal configuration
-- **Clear errors**: Framework provides actionable error messages
-
-### Outside-In Thinking
-
-- **User perspective**: Tests verify behavior users care about
-- **Implementation agnostic**: Refactoring doesn't break tests
-- **Behavior-driven**: Focus on outcomes, not internals
-
-## Common Pitfalls and Solutions [LEVEL 2]
-
-### Pitfall 1: Over-Specifying
-
-**Problem**: Test breaks when UI changes slightly
-
-```yaml
-# Bad - Too specific
-- action: verify_element
-  selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold"
-  contains: "Welcome"
-```
-
-**Solution**: Use flexible selectors
-
-```yaml
-# Good - Focused on behavior
-- action: verify_element
-  selector: ".welcome-message"
-  contains: "Welcome"
-```
-
-### Pitfall 2: Missing Waits
-
-**Problem**: Test fails intermittently due to timing
-
-```yaml
-# Bad - No wait for async operation
-- action: click
-  selector: ".load-data-button"
-- action: verify_element
-  selector: ".data-table" # May not exist yet!
-```
-
-**Solution**: Always wait for dynamic content
-
-```yaml
-# Good - Wait for element to appear
-- action: click
-  selector: ".load-data-button"
-- action: wait_for_element
-  selector: ".data-table"
-  timeout: 10s
-- action: verify_element
-  selector: ".data-table"
-```
-
-### Pitfall 3: Testing Implementation Details
-
-**Problem**: Test coupled to internal state
-
-```yaml
-# Bad - Tests internal cache state
-- action: verify_output
-  contains: "Cache hit ratio: 85%"
-```
-
-**Solution**: Test user-visible behavior
-
-```yaml
-# Good - Tests response time
-- action: verify_response_time
-  less_than: 100ms
-  description: "Fast response indicates caching works"
-```
-
-### Pitfall 4: Flaky Assertions
-
-**Problem**: Assertions depend on exact timing or formatting
-
-```yaml
-# Bad - Exact timestamp match will fail
-- action: verify_output
-  contains: "Created at: 2025-11-16 09:30:45"
-```
-
-**Solution**: Use flexible patterns
-
-```yaml
-# Good - Match pattern, not exact value
-- action: verify_output
-  matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}"
-```
-
-### Pitfall 5: Not Cleaning Up
-
-**Problem**: Tests leave artifacts that affect future runs
-
-```yaml
-# Bad - No cleanup
-steps:
-  - action: create_file
-    path: "./test-data.json"
-  - action: launch
-    target: "./app"
-```
-
-**Solution**: Always use cleanup section
-
-```yaml
-# Good - Cleanup ensures clean slate
-steps:
-  - action: create_file
-    path: "./test-data.json"
-  - action: launch
-    target: "./app"
-
-cleanup:
-  - action: delete_file
-    path: "./test-data.json"
-```
-
-## Example Library [LEVEL 1]
-
-This skill includes 15 complete working examples organized by application type and complexity level:
-
-### CLI Examples
-
-1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations
-2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery
-3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI
-
-### TUI Examples
-
-4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation
-5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation
-6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing
-
-### Web Examples
-
-7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification
-8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow
-9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing
-
-### Electron Examples
-
-10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test
-11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration
-12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions
-13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing
-
-### Custom Agent Examples
-
-14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic
-15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting
-
-See `examples/` directory for full example code with inline documentation.
-
-## Framework Freshness Check [LEVEL 3]
-
-This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists:
-
-```bash
-# Run the freshness check script
-python scripts/check-freshness.py
-
-# Output if outdated:
-# WARNING: Embedded framework version is 0.1.0
-# Latest GitHub version is 0.2.5
-#
-# New features in 0.2.5:
-# - Native Playwright support for web testing
-# - Video recording for all test types
-# - Parallel test execution
-#
-# Update with: npm update -g @gadugi/agentic-test
-```
-
-The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements.
-
-**When to Update This Skill**:
-
-- New framework version adds significant features
-- Breaking changes in YAML schema
-- New application types supported
-- Agent capabilities expand
-
-## Integration with Other Skills [LEVEL 2]
-
-### Works Well With
-
-**test-gap-analyzer**:
-
-- Use test-gap-analyzer to find untested functions
-- Write outside-in tests for critical user-facing paths
-- Use unit tests (from test-gap-analyzer) for internal functions
-
-**philosophy-guardian**:
-
-- Ensure test YAML follows ruthless simplicity
-- Verify tests focus on behavior, not implementation
-
-**pr-review-assistant**:
-
-- Include outside-in tests in PR reviews
-- Verify tests cover changed functionality
-- Check test readability and clarity
-
-**module-spec-generator**:
-
-- Generate module specs that include outside-in test scenarios
-- Use specs as templates for test YAML
-
-### Example Combined Workflow
-
-```bash
-# 1. Analyze coverage gaps
-claude "Use test-gap-analyzer on ./src"
-
-# 2. Write outside-in tests for critical paths
-claude "Use outside-in-testing to create web tests for authentication"
-
-# 3. Verify philosophy compliance
-claude "Use philosophy-guardian to review new test files"
-
-# 4. Include in PR
-git add tests/agentic/
-git commit -m "Add outside-in tests for auth flow"
-```
-
-## Troubleshooting [LEVEL 2]
-
-### Test Times Out
-
-**Symptom**: Test exceeds timeout and fails
-
-**Causes**:
-
-- Application takes longer to start than expected
-- Network requests are slow
-- Element never appears (incorrect selector)
-
-**Solutions**:
-
-```yaml
-# Increase timeout
-- action: wait_for_element
-  selector: ".slow-loading-element"
-  timeout: 30s # Increase from default
-
-# Add intermediate verification
-- action: launch
-  target: "./app"
-- action: wait_for_output
-  contains: "Initializing..."
-  timeout: 5s
-- action: wait_for_output
-  contains: "Ready"
-  timeout: 20s
-```
-
-### Element Not Found
-
-**Symptom**: `verify_element` or `click` fails with "element not found"
-
-**Causes**:
-
-- Incorrect CSS selector
-- Element not yet rendered (timing issue)
-- Element in iframe or shadow DOM
-
-**Solutions**:
-
-```yaml
-# Add wait before interaction
-- action: wait_for_element
-  selector: ".target-element"
-  timeout: 10s
-- action: click
-  selector: ".target-element"
-
-# Use more specific selector
-- action: click
-  selector: "button[data-testid='submit-button']"
-
-# Handle iframe
-- action: switch_to_iframe
-  selector: "iframe#payment-frame"
-- action: click
-  selector: ".pay-now-button"
-```
-
-### Test Passes Locally, Fails in CI
-
-**Symptom**: Test works on dev machine but fails in CI environment
-
-**Causes**:
-
-- Different screen size (web/Electron)
-- Missing dependencies
-- Timing differences (slower CI machines)
-- Environment variable differences
-
-**Solutions**:
-
-```yaml
-# Set explicit viewport size (web/Electron)
-scenario:
-  environment:
-    viewport:
-      width: 1920
-      height: 1080
-
-# Add longer timeouts in CI
-- action: wait_for_element
-  selector: ".element"
-  timeout: 30s  # Generous for CI
-
-# Verify prerequisites
-prerequisites:
-  - "Chrome browser installed"
-  - "Environment variable API_KEY is set"
-```
-
-### Output Doesn't Match Expected
-
-**Symptom**: `verify_output` fails even though output looks correct
-
-**Causes**:
-
-- Extra whitespace or newlines
-- ANSI color codes in output
-- Case sensitivity
-
-**Solutions**:
-
-```yaml
-# Use flexible matching
-- action: verify_output
-  matches: "Result:\\s+Success" # Allow flexible whitespace
-
-# Strip ANSI codes
-- action: verify_output
-  contains: "Success"
-  strip_ansi: true
-
-# Case-insensitive match
-- action: verify_output
-  contains: "success"
-  case_sensitive: false
-```
-
-## Reference: Action Catalog [LEVEL 3]
-
-### CLI Actions
-
-| Action             | Parameters                       | Description                            |
-| ------------------ | -------------------------------- | -------------------------------------- |
-| `launch`           | `target`, `args`, `cwd`, `env`   | Start CLI application                  |
-| `send_input`       | `value`, `delay`                 | Send text to stdin                     |
-| `send_signal`      | `signal`                         | Send OS signal (SIGINT, SIGTERM, etc.) |
-| `wait_for_output`  | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr         |
-| `verify_output`    | `contains`, `matches`, `stream`  | Check output content                   |
-| `verify_exit_code` | `expected`                       | Validate exit code                     |
-| `capture_output`   | `save_as`, `stream`              | Save output to file                    |
-
-### TUI Actions
-
-| Action               | Parameters                        | Description              |
-| -------------------- | --------------------------------- | ------------------------ |
-| `launch`             | `target`, `args`, `terminal_size` | Start TUI application    |
-| `send_keypress`      | `value`, `times`, `modifiers`     | Send keyboard input      |
-| `wait_for_screen`    | `contains`, `timeout`             | Wait for text on screen  |
-| `verify_screen`      | `contains`, `matches`, `region`   | Check screen content     |
-| `capture_screenshot` | `save_as`                         | Save terminal screenshot |
-| `navigate_menu`      | `path`                            | Navigate menu structure  |
-| `fill_form`          | `fields`                          | Fill TUI form fields     |
-
-### Web Actions
-
-| Action             | Parameters                                | Description            |
-| ------------------ | ----------------------------------------- | ---------------------- |
-| `navigate`         | `url`, `wait_for_load`                    | Go to URL              |
-| `click`            | `selector`, `text`, `nth`                 | Click element          |
-| `type`             | `selector`, `value`, `delay`              | Type into input        |
-| `wait_for_element` | `selector`, `timeout`, `disappears`       | Wait for element       |
-| `verify_element`   | `selector`, `contains`, `count`, `exists` | Check element state    |
-| `verify_url`       | `equals`, `contains`, `matches`           | Validate URL           |
-| `screenshot`       | `save_as`, `selector`, `full_page`        | Capture screenshot     |
-| `scroll`           | `selector`, `direction`, `amount`         | Scroll page/element    |
-| `select_option`    | `selector`, `value`                       | Select dropdown option |
-| `checkbox`         | `selector`, `checked`                     | Check/uncheck checkbox |
-
-### Electron Actions
-
-| Action          | Parameters                             | Description                |
-| --------------- | -------------------------------------- | -------------------------- |
-| `launch`        | `target`, `args`, `wait_for_window`    | Start Electron app         |
-| `window_action` | `window`, `action`                     | Interact with windows      |
-| `menu_click`    | `path`                                 | Click menu items           |
-| `dialog_action` | `type`, `action`, `filename`           | Handle dialogs             |
-| `ipc_send`      | `channel`, `data`                      | Send IPC message           |
-| `ipc_expect`    | `channel`, `timeout`                   | Wait for IPC message       |
-| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state         |
-| All web actions |                                        | Electron includes Chromium |
-
-### Common Parameters
-
-| Parameter             | Type       | Description                          |
-| --------------------- | ---------- | ------------------------------------ |
-| `timeout`             | Duration   | Maximum wait time (e.g., "5s", "2m") |
-| `description`         | String     | Human-readable step explanation      |
-| `continue_on_failure` | Boolean    | Don't fail scenario if step fails    |
-| `id`                  | String     | Step identifier for conditionals     |
-| `condition`           | Expression | Execute step only if condition true  |
-
-## Quick Reference: YAML Template [LEVEL 1]
-
-```yaml
-scenario:
-  # Required fields
-  name: "Test Name"
-  description: "What this test verifies"
-  type: cli | tui | web | electron
-
-  # Optional metadata
-  tags: [smoke, critical]
-  timeout: 60s
-
-  # What must be true before running
-  prerequisites:
-    - "Condition 1"
-    - "Condition 2"
-
-  # Environment setup
-  environment:
-    variables:
-      VAR_NAME: "value"
-
-  # Variables for templating
-  variables:
-    username: "testuser"
-
-  # Test steps (executed in order)
-  steps:
-    - action: launch
-      target: "./app"
-
-    - action: send_input
-      value: "command\n"
-
-    - action: verify_output
-      contains: "Success"
-      timeout: 5s
-
-  # Cleanup (always runs)
-  cleanup:
-    - action: stop_application
-```
-
-## Skill Usage Examples [LEVEL 1]
-
-### Invoke This Skill
-
-```
-Claude, use the outside-in-testing skill to create a CLI test for my calculator app.
-
-Claude, use outside-in-testing to generate web tests for the login flow.
-
-Claude, create TUI tests using the outside-in-testing skill for file navigation.
-```
-
-### What You'll Get
-
-1. **Complete YAML test scenario** matching your requirements
-2. **Inline documentation** explaining each section
-3. **Best practices applied** (timeouts, waits, verification)
-4. **Appropriate complexity level** (1, 2, or 3 based on needs)
-5. **Instructions for running** the test
-
-### Example Request and Response
-
-**User**: "Create a web test for user login with valid credentials"
-
-**Claude** (using this skill):
-
-```yaml
-scenario:
-  name: "User Login - Valid Credentials"
-  description: "Verifies user can successfully log in with valid email and password"
-  type: web
-  tags: [smoke, auth, critical]
-
-  prerequisites:
-    - "User test@example.com exists with password 'TestPass123!'"
-    - "Application is running on http://localhost:3000"
-
-  steps:
-    # Navigate to login page
-    - action: navigate
-      url: "http://localhost:3000/login"
-      description: "Open login page"
-
-    # Wait for page to fully load
-    - action: wait_for_element
-      selector: "form.login-form"
-      timeout: 5s
-      description: "Wait for login form to render"
-
-    # Fill in email field
-    - action: type
-      selector: "input[name='email']"
-      value: "test@example.com"
-      description: "Enter email address"
-
-    # Fill in password field
-    - action: type
-      selector: "input[name='password']"
-      value: "TestPass123!"
-      description: "Enter password"
-
-    # Submit form
-    - action: click
-      selector: "button[type='submit']"
-      description: "Click login button"
-
-    # Wait for redirect to dashboard
-    - action: wait_for_url
-      contains: "/dashboard"
-      timeout: 10s
-      description: "Should redirect to dashboard after successful login"
-
-    # Verify user is logged in
-    - action: verify_element
-      selector: ".user-profile"
-      contains: "test@example.com"
-      description: "User email should appear in profile widget"
-
-    # Capture evidence
-    - action: screenshot
-      save_as: "successful-login.png"
-      description: "Save screenshot of logged-in state"
-# To run this test:
-# 1. Save as test-login-valid.yaml
-# 2. Run: gadugi-test run test-login-valid.yaml
-# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/
-```
-
-## Related Resources [LEVEL 1]
-
-### Official Documentation
-
-- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test
-- **Framework Docs**: See repo README and docs/ folder
-- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
-
-## Level 4: Shadow Environment Integration [LEVEL 4]
-
-Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments.
-
-### Why Use Shadow Environments for Testing
-
-1. **Clean State**: Fresh container, no host pollution
-2. **Local Changes**: Test uncommitted code exactly as-is
-3. **Multi-Repo**: Coordinate changes across multiple repos
-4. **CI Parity**: What shadow sees ≈ what CI will see
-
-### Shadow Testing Workflow
-
-For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests:
-
-#### Pattern 1: CLI Tests in Shadow (Amplifier)
-
-```python
-# Create shadow with your local library changes
-shadow.create(local_sources=["~/repos/my-lib:org/my-lib"])
-
-# Run outside-in test scenarios inside shadow
-shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml")
-
-# Extract evidence
-shadow.extract(shadow_id, "/evidence", "./test-evidence")
-
-# Cleanup
-shadow.destroy(shadow_id)
-```
-
-#### Pattern 2: CLI Tests in Shadow (Standalone)
-
-```bash
-# Create shadow with local changes
-amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test
-
-# Run your test scenarios
-amplifier-shadow exec test "gadugi-test run test-scenario.yaml"
-
-# Extract results
-amplifier-shadow extract test /evidence ./test-evidence
-
-# Cleanup
-amplifier-shadow destroy test
-```
-
-#### Pattern 3: Multi-Repo Integration Test
-
-```yaml
-# test-multi-repo.yaml
-scenario:
-  name: "Multi-Repo Integration Test"
-  type: cli
-
-  prerequisites:
-    - "Shadow environment with core-lib and cli-tool"
-
-  steps:
-    - action: launch
-      target: "cli-tool"
-
-    - action: send_input
-      value: "process --lib core-lib\n"
-
-    - action: verify_output
-      contains: "Success: Using core-lib"
-```
-
-```bash
-# Setup shadow with both repos
-amplifier-shadow create \
-  --local ~/repos/core-lib:org/core-lib \
-  --local ~/repos/cli-tool:org/cli-tool \
-  --name multi-test
-
-# Run test that exercises both
-amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml"
-```
-
-#### Pattern 4: Web App Testing in Shadow
-
-```yaml
-# test-web-app.yaml
-scenario:
-  name: "Web App with Local Library"
-  type: web
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000"
-
-    - action: click
-      selector: "button.process"
-
-    - action: verify_element
-      selector: ".result"
-      contains: "Processed with v2.0" # Your local version
-```
-
-```bash
-# Shadow with library changes
-amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test
-
-# Start web app inside shadow (uses your local lib)
-amplifier-shadow exec web-test "
-  cd /workspace &&
-  git clone https://github.com/org/web-app &&
-  cd web-app &&
-  npm install &&  # Pulls your local my-lib via git URL rewriting
-  npm start &
-"
-
-# Wait for app to start, then run tests
-amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml"
-```
-
-### Verification Best Practices
-
-When running tests in shadow, always verify your local sources are being used:
-
-```bash
-# After shadow.create, check snapshot commits
-shadow.status(shadow_id)
-# Shows: snapshot_commits: {"org/my-lib": "abc1234..."}
-
-# When your test installs dependencies, verify commit matches
-# Look in test output for: my-lib @ git+...@abc1234
-```
-
-### Complete Example: Library Change Validation
-
-```yaml
-# test-library-change.yaml - Outside-in test
-scenario:
-  name: "Validate Library Breaking Change"
-  type: cli
-  description: "Test that dependent app still works with new library API"
-
-  steps:
-    - action: launch
-      target: "/workspace/org/dependent-app/cli.py"
-
-    - action: send_input
-      value: "process data.json\n"
-
-    - action: verify_output
-      contains: "Processed successfully"
-      description: "New library API should still work"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-```bash
-# Complete workflow
-# 1. Create shadow with your breaking change
-amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test
-
-# 2. Install dependent app (pulls your local lib)
-amplifier-shadow exec breaking-test "
-  cd /workspace &&
-  git clone https://github.com/org/dependent-app &&
-  cd dependent-app &&
-  pip install -e . &&  # This installs git+https://github.com/org/my-lib (your local version)
-  echo 'Ready to test'
-"
-
-# 3. Run outside-in test
-amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml"
-
-# If test passes, your breaking change is compatible!
-# If test fails, you've caught the issue before pushing
-```
-
-### When to Use Shadow Integration
-
-Use shadow + outside-in tests when:
-
-- ✅ Testing library changes with dependent projects
-- ✅ Validating multi-repo coordinated changes
-- ✅ Need clean-state validation before pushing
-- ✅ Want to catch integration issues early
-- ✅ Testing that setup/install procedures work
-
-Don't use shadow for:
-
-- ❌ Simple unit tests (too much overhead)
-- ❌ Tests of already-committed code (shadow adds no value)
-- ❌ Performance testing (container overhead skews results)
-
-### Learn More
-
-For complete shadow environment documentation, including:
-
-- Shell scripts for DIY setup
-- Docker Compose examples
-- Multi-language support (Python, Node, Rust, Go)
-- Troubleshooting and verification techniques
-
-**Load the shadow-testing skill**:
-
-```
-Claude, use the shadow-testing skill to set up a shadow environment
-```
-
-Or for Amplifier users, the shadow tool is built-in:
-
-```python
-shadow.create(local_sources=["~/repos/lib:org/lib"])
-```
-
----
-
-### Related Skills
-
-- **shadow-testing**: Complete shadow environment setup and usage
-- **test-gap-analyzer**: Find untested code paths
-- **philosophy-guardian**: Review test philosophy compliance
-- **pr-review-assistant**: Include tests in PR reviews
-- **module-spec-generator**: Generate specs with test scenarios
-
-### Further Reading
-
-- Outside-in vs inside-out testing approaches
-- Behavior-driven development (BDD) principles
-- AI-powered testing best practices
-- Test automation patterns
-- Shadow environment testing methodology
-
-## Changelog [LEVEL 3]
-
-### Version 1.1.0 (2026-01-29)
-
-- **NEW**: Level 4 - Shadow Environment Integration
-- Added complete shadow testing workflow patterns
-- Integration examples for Amplifier native and standalone CLI
-- Multi-repo integration test patterns
-- Web app testing in shadow environments
-- Complete workflow example for library change validation
-- References to shadow-testing skill for deep-dive documentation
-
-### Version 1.0.0 (2025-11-16)
-
-- Initial skill release
-- Support for CLI, TUI, Web, and Electron applications
-- 15 complete working examples
-- Progressive disclosure levels (1, 2, 3)
-- Embedded gadugi-agentic-test framework documentation (v0.1.0)
-- Freshness check script for version monitoring
-- Full integration with amplihack philosophy
-- Comprehensive troubleshooting guide
-- Action reference catalog
-
----
-
-**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows.
-
-Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen.
+See `../qa-team/README.md` and `../qa-team/SKILL.md` for the primary documentation.
diff --git a/.claude/skills/outside-in-testing/examples b/.claude/skills/outside-in-testing/examples
new file mode 120000
index 000000000..68c765545
--- /dev/null
+++ b/.claude/skills/outside-in-testing/examples
@@ -0,0 +1 @@
+../qa-team/examples
\ No newline at end of file
diff --git a/.claude/skills/outside-in-testing/scripts b/.claude/skills/outside-in-testing/scripts
new file mode 120000
index 000000000..ff9bde766
--- /dev/null
+++ b/.claude/skills/outside-in-testing/scripts
@@ -0,0 +1 @@
+../qa-team/scripts
\ No newline at end of file
diff --git a/.claude/skills/outside-in-testing/tests b/.claude/skills/outside-in-testing/tests
new file mode 120000
index 000000000..371fb2568
--- /dev/null
+++ b/.claude/skills/outside-in-testing/tests
@@ -0,0 +1 @@
+../qa-team/tests
\ No newline at end of file
diff --git a/.claude/skills/qa-team/README.md b/.claude/skills/qa-team/README.md
new file mode 100644
index 000000000..51e32504a
--- /dev/null
+++ b/.claude/skills/qa-team/README.md
@@ -0,0 +1,794 @@
+# QA Team Skill
+
+## Overview
+
+QA Team is the renamed primary skill for outside-in validation. It helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation, and it now also covers side-by-side parity loops for legacy-vs-new or A-vs-B comparisons.
+
+**Key Benefits**:
+
+- Tests survive refactoring (implementation changes don't break tests)
+- Readable by non-developers (declarative YAML format)
+- Platform-agnostic (same structure for CLI, TUI, Web, Electron)
+- AI-powered execution (agents handle complex interactions)
+- Evidence-based validation (screenshots, logs, output captures)
+
+## What is Outside-In Testing?
+
+**Traditional Testing** (Inside-Out):
+
+```python
+# Knows internal implementation
+def test_user_service():
+    service = UserService()
+    user = service.create_user("test@example.com")
+    assert user.id is not None
+    assert user.email == "test@example.com"
+    assert user.created_at <= datetime.now()  # Internal state
+```
+
+**Outside-In Testing**:
+
+```yaml
+# Only knows external behavior
+scenario:
+  name: "User Registration"
+  type: web
+  steps:
+    - action: navigate
+      url: "/register"
+    - action: type
+      selector: "#email"
+      value: "test@example.com"
+    - action: click
+      selector: "button[type=submit]"
+    - action: verify_url
+      contains: "/welcome"
+```
+
+The outside-in test verifies the same functionality but:
+
+- Doesn't depend on internal classes (`UserService`)
+- Doesn't check internal state (`created_at`, `id`)
+- Tests from user's perspective (what they see and do)
+- Remains valid even if implementation completely changes
+
+## When to Use This Skill
+
+### Perfect Scenarios
+
+1. **Smoke Testing** - Quickly verify critical paths work
+2. **Acceptance Testing** - Validate features meet requirements
+3. **Regression Testing** - Ensure changes don't break existing behavior
+4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach
+5. **Refactoring Safety** - Tests protect behavior during rewrites
+6. **Documentation as Tests** - YAML doubles as executable specifications
+
+### Complementary to Unit Tests
+
+Outside-in tests work best alongside unit tests:
+
+- **Unit Tests** (60%): Internal logic, edge cases, error handling
+- **Integration Tests** (30%): Component interactions, API contracts
+- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths
+
+Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation.
+
+## Parity, Shadow, and A/B Comparison
+
+Use QA Team when you need more than a single implementation test:
+
+- compare legacy vs replacement behavior side by side
+- run paired observable tmux sessions with `--observable`
+- execute the same parity suite remotely with `--ssh-target azlin`
+- log rollout divergences with `--shadow-mode --shadow-log ...`
+
+Example local parity command:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary
+```
+
+Example shadow-mode command:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary \
+  --shadow-mode \
+  --shadow-log /tmp/feature-shadow.jsonl
+```
+
+## Quick Start
+
+### 1. Install Framework
+
+**Option A: From GitHub (Recommended - Latest)**
+
+```bash
+# Install globally
+npm install -g github:rysweet/gadugi-agentic-test
+
+# Or use with npx
+npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml
+
+# Or clone and build
+git clone https://github.com/rysweet/gadugi-agentic-test
+cd gadugi-agentic-test
+npm install
+npm run build
+node dist/cli.js run scenarios/your-test.yaml
+```
+
+**Option B: From npm (when published)**
+
+```bash
+npm install -g gadugi-agentic-test
+gadugi-test run test.yaml
+```
+
+**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below.
+
+### 2. Create Your First Test
+
+Save as `test-hello.yaml`:
+
+```yaml
+scenario:
+  name: "Hello World Test"
+  description: "Verify application prints greeting"
+  type: cli
+
+  steps:
+    - action: launch
+      target: "./hello-world"
+
+    - action: verify_output
+      contains: "Hello, World!"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+### 3. Run the Test
+
+**If installed globally**:
+
+```bash
+gadugi-test run test-hello.yaml
+```
+
+**If using from source**:
+
+```bash
+cd /path/to/gadugi-agentic-test
+node dist/cli.js run /path/to/test-hello.yaml
+```
+
+**Run all tests in directory**:
+
+```bash
+node dist/cli.js run -d ./my-test-scenarios
+```
+
+### 4. Review Results
+
+The framework generates evidence in `./evidence/`:
+
+- Execution logs
+- Output captures
+- Screenshots (for TUI/Web/Electron)
+- Timing data
+- HTML report
+
+## Supported Application Types
+
+### CLI (Command-Line Interface)
+
+Test command-line tools, scripts, and utilities:
+
+```yaml
+scenario:
+  name: "Git Status Test"
+  type: cli
+  steps:
+    - action: launch
+      target: "git"
+      args: ["status"]
+    - action: verify_output
+      contains: "On branch"
+```
+
+**Common Use Cases**:
+
+- Package managers (npm, pip, cargo)
+- Build tools (make, gradle, webpack)
+- DevOps tools (docker, kubectl, terraform)
+- Custom CLI applications
+
+### TUI (Terminal User Interface)
+
+Test interactive terminal applications:
+
+```yaml
+scenario:
+  name: "TUI Navigation"
+  type: tui
+  steps:
+    - action: launch
+      target: "./file-manager"
+    - action: send_keypress
+      value: "down"
+      times: 3
+    - action: verify_screen
+      contains: "> documents/"
+```
+
+**Common Use Cases**:
+
+- System monitors (htop, top)
+- Text editors (vim, nano)
+- File managers (ranger, midnight commander)
+- Custom TUI dashboards
+
+### Web Applications
+
+Test browser-based applications:
+
+```yaml
+scenario:
+  name: "Web Dashboard Test"
+  type: web
+  steps:
+    - action: navigate
+      url: "http://localhost:3000"
+    - action: verify_element
+      selector: "h1"
+      contains: "Dashboard"
+```
+
+**Common Use Cases**:
+
+- SPAs (React, Vue, Angular apps)
+- Admin panels
+- E-commerce sites
+- SaaS applications
+
+### Electron Applications
+
+Test desktop apps built with Electron:
+
+```yaml
+scenario:
+  name: "Desktop App Test"
+  type: electron
+  steps:
+    - action: launch
+      target: "./dist/my-app"
+    - action: verify_window
+      title: "My Application"
+```
+
+**Common Use Cases**:
+
+- Code editors (VS Code-like apps)
+- Chat applications (Slack, Discord clones)
+- Productivity tools
+- Custom desktop applications
+
+## Progressive Learning Path
+
+The skill teaches testing in three levels:
+
+### Level 1: Fundamentals (Start Here)
+
+- Basic test structure (YAML anatomy)
+- Single-action tests
+- Simple verification
+- Smoke tests
+
+**Examples**:
+
+- `examples/cli/calculator-basic.yaml`
+- `examples/tui/file-manager-navigation.yaml`
+- `examples/web/dashboard-smoke-test.yaml`
+- `examples/electron/single-window-basic.yaml`
+
+### Level 2: Intermediate
+
+- Multi-step workflows
+- Conditional logic
+- Error handling
+- Variables and templating
+
+**Examples**:
+
+- `examples/cli/cli-error-handling.yaml`
+- `examples/tui/tui-form-validation.yaml`
+- `examples/web/web-authentication-flow.yaml`
+- `examples/electron/multi-window-coordination.yaml`
+
+### Level 3: Advanced
+
+- Custom comprehension agents
+- Visual regression testing
+- Performance validation
+- IPC testing (Electron)
+
+**Examples**:
+
+- `examples/tui/tui-performance-monitoring.yaml`
+- `examples/electron/electron-ipc-testing.yaml`
+- `examples/custom-agents/custom-comprehension-agent.yaml`
+- `examples/custom-agents/custom-reporter-integration.yaml`
+
+## Example Library
+
+This skill includes **15 complete working examples**:
+
+### CLI (3 examples)
+
+- Basic calculator operations [Level 1]
+- Error handling and recovery [Level 2]
+- Interactive session management [Level 2]
+
+### TUI (3 examples)
+
+- File manager navigation [Level 1]
+- Form validation [Level 2]
+- Performance monitoring [Level 3]
+
+### Web (3 examples)
+
+- Dashboard smoke test [Level 1]
+- Authentication flow [Level 2]
+- Visual regression [Level 2]
+
+### Electron (4 examples)
+
+- Single window basics [Level 1]
+- Multi-window coordination [Level 2]
+- Menu interactions [Level 2]
+- IPC testing [Level 3]
+
+### Custom Agents (2 examples)
+
+- Domain-specific comprehension [Level 3]
+- Custom reporting [Level 3]
+
+All examples include:
+
+- Complete working YAML
+- Inline documentation
+- Expected output
+- Prerequisites
+- Level indicators
+
+## Using This Skill in Claude
+
+### Invoke the Skill
+
+```
+Claude, use the qa-team skill to create a CLI test for my calculator app.
+
+Claude, use qa-team to generate web tests for user login.
+
+Claude, create Electron tests using qa-team for my desktop app.
+```
+
+### What You'll Receive
+
+1. **Complete YAML test scenario** matching your requirements
+2. **Inline comments** explaining each section
+3. **Best practices** applied (timeouts, waits, verification)
+4. **Appropriate complexity** (Level 1, 2, or 3 based on needs)
+5. **Instructions** for running the test
+
+### Example Interaction
+
+**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard"
+
+**Claude** (using this skill): Generates a complete Level 2 YAML scenario with:
+
+- Navigation to login page
+- Form filling (email, password)
+- Submit button click
+- URL verification (redirected to dashboard)
+- Element verification (user profile visible)
+- Screenshot capture
+- Proper timeouts and waits
+
+## Integration with Amplihack Philosophy
+
+This skill embodies amplihack's core principles:
+
+### Ruthless Simplicity
+
+- Declarative YAML over complex code
+- Minimal boilerplate
+- Focus on behavior, not implementation
+
+### Modular Design (Bricks & Studs)
+
+- Self-contained test scenarios
+- Clear action contracts
+- Composable test steps
+
+### Zero-BS Implementation
+
+- No stubs or placeholders
+- Every example is runnable
+- Clear, actionable error messages
+
+### Outside-In Thinking
+
+- User perspective first
+- Implementation-agnostic tests
+- Behavior-driven validation
+
+## Best Practices
+
+### 1. Start Simple
+
+Begin with basic smoke tests, then add complexity:
+
+```yaml
+# Level 1: Smoke test
+steps:
+  - action: launch
+    target: "./app"
+  - action: verify_output
+    contains: "Ready"
+
+# Level 2: Add interaction
+steps:
+  - action: launch
+    target: "./app"
+  - action: send_input
+    value: "command\n"
+  - action: verify_output
+    contains: "Success"
+```
+
+### 2. Use Descriptive Names
+
+```yaml
+# Good
+scenario:
+  name: "User Login - Valid Credentials"
+  description: "Verifies successful login with email and password"
+
+# Bad
+scenario:
+  name: "Test 1"
+```
+
+### 3. Verify Critical Paths Only
+
+Don't test every detail. Focus on user-facing behavior:
+
+```yaml
+# Good - User-visible behavior
+- action: verify_element
+  selector: ".welcome-message"
+  contains: "Welcome back"
+
+# Bad - Implementation detail
+- action: verify_element
+  selector: ".cache-status"
+  contains: "initialized"
+```
+
+### 4. Always Wait for Dynamic Content
+
+```yaml
+# Good - Wait before verification
+- action: click
+  selector: ".load-data"
+- action: wait_for_element
+  selector: ".data-table"
+  timeout: 10s
+- action: verify_element
+  selector: ".data-table"
+
+# Bad - May fail due to timing
+- action: click
+  selector: ".load-data"
+- action: verify_element
+  selector: ".data-table" # Might not exist yet!
+```
+
+### 5. Clean Up After Tests
+
+```yaml
+steps:
+  # Test steps...
+
+cleanup:
+  - action: delete_file
+    path: "./test-data.json"
+  - action: stop_application
+```
+
+## Troubleshooting
+
+### Installation Issues
+
+**Problem**: `@types/node-pty` not found error
+
+**Solution**: This was fixed in gadugi-agentic-test. If you see this:
+
+```bash
+# Update to latest version
+npm install -g github:rysweet/gadugi-agentic-test
+
+# Or if you cloned, pull latest:
+git pull origin main
+npm install
+npm run build
+```
+
+**Problem**: `tsc: command not found` when building
+
+**Solution**: TypeScript not installed
+
+```bash
+npm install  # Installs all dependencies including TypeScript
+npm run build  # Now will work
+```
+
+### Test Times Out
+
+**Problem**: Test exceeds timeout and fails
+
+**Solution**: Increase timeout for slow operations
+
+```yaml
+- action: wait_for_element
+  selector: ".slow-loading-data"
+  timeout: 30s # Generous timeout
+```
+
+### Scenario Format Issues
+
+**Problem**: "Scenario must have a name" error
+
+**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`:
+
+```yaml
+# WRONG (won't load)
+scenario:
+  name: "My Test"
+  steps: [...]
+
+# RIGHT
+name: "My Test"
+description: "What this tests"
+version: "1.0.0"
+config:
+  timeout: 120000
+steps: [...]
+```
+
+### Element Not Found
+
+**Problem**: Cannot find element to interact with
+
+**Solutions**:
+
+1. Use `wait_for_element` before interaction
+2. Verify selector is correct
+3. Check if element is in iframe
+
+```yaml
+- action: wait_for_element
+  selector: ".target"
+  timeout: 10s
+- action: click
+  selector: ".target"
+```
+
+### Flaky Tests in CI
+
+**Problem**: Tests pass locally but fail in CI
+
+**Solutions**:
+
+1. Add longer timeouts for CI environments
+2. Set explicit viewport sizes
+3. Wait for application readiness
+
+```yaml
+scenario:
+  environment:
+    viewport:
+      width: 1920
+      height: 1080
+
+  steps:
+    - action: wait_for_element
+      selector: ".app-ready"
+      timeout: 30s # Generous for CI
+```
+
+## Framework Version Check
+
+This skill embeds gadugi-agentic-test version **0.1.0**.
+
+To check for newer versions:
+
+```bash
+python scripts/check-freshness.py
+```
+
+The script compares the embedded version against the latest GitHub release and notifies you of new features.
+
+## Related Skills
+
+- **test-gap-analyzer**: Find untested code paths (unit test focus)
+- **philosophy-guardian**: Review test philosophy compliance
+- **pr-review-assistant**: Include tests in PR reviews
+- **module-spec-generator**: Generate specs with test scenarios
+
+## Resources
+
+### Documentation
+
+- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation
+- **Examples**: `examples/` - 15 complete working examples
+- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test
+
+### Getting Help
+
+- Review examples in `examples/` directory
+- Check `SKILL.md` for detailed explanations
+- See troubleshooting section in `SKILL.md`
+- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues
+
+## Quick Reference
+
+### Basic Test Template
+
+```yaml
+scenario:
+  name: "Test Name"
+  description: "What this verifies"
+  type: cli | tui | web | electron
+
+  prerequisites:
+    - "Condition 1"
+
+  steps:
+    - action: launch
+      target: "./app"
+
+    - action: verify_output
+      contains: "Expected"
+
+  cleanup:
+    - action: stop_application
+```
+
+### Common Actions
+
+**CLI**:
+
+- `launch` - Start application
+- `send_input` - Send text
+- `verify_output` - Check output
+- `verify_exit_code` - Validate exit code
+
+**TUI**:
+
+- `send_keypress` - Send keys
+- `verify_screen` - Check screen
+- `capture_screenshot` - Save screenshot
+
+**Web**:
+
+- `navigate` - Go to URL
+- `click` - Click element
+- `type` - Type text
+- `verify_element` - Check element
+
+**Electron**:
+
+- `window_action` - Control windows
+- `menu_click` - Click menus
+- `dialog_action` - Handle dialogs
+- All web actions
+
+## Success Stories
+
+Outside-in testing shines when:
+
+1. **Refactoring**: Change implementation without updating tests
+2. **Collaboration**: Non-developers can read and understand tests
+3. **Documentation**: Tests serve as executable specifications
+4. **Regression Prevention**: Catch breaking changes in critical flows
+5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron
+
+Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen.
+
+---
+
+**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation.
+
+## Real-World Example: Testing amplihack Guide Agent
+
+Based on actual testing of amplihack's guide agent, here's a complete working example:
+
+### Scenario: Naive Student Learning Flow
+
+```yaml
+name: "Guide Agent - Beginner First Question"
+description: "Test how guide responds to complete beginner"
+version: "1.0.0"
+
+config:
+  timeout: 180000 # 3 minutes for AI response
+  retries: 1
+  parallel: false
+
+agents:
+  - name: "student-cli"
+    type: "system"
+    config:
+      shell: "bash"
+      cwd: "/tmp/test-student"
+      timeout: 180000
+      capture_output: true
+
+steps:
+  - name: "Student asks: What is amplihack?"
+    agent: "student-cli"
+    action: "execute_command"
+    params:
+      command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100'
+    expect:
+      exit_code: 0
+      stdout_contains:
+        - "amplihack"
+        - "AI"
+    timeout: 180000
+
+  - name: "Verify guide gives immediate action"
+    agent: "student-cli"
+    action: "execute_command"
+    params:
+      command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md"
+    expect:
+      exit_code: 0
+    timeout: 5000
+
+metadata:
+  tags: ["guide-agent", "beginner", "real-world"]
+  priority: "high"
+```
+
+### What This Tests
+
+1. **Installation via uvx** - Tests users can run without installing
+2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works
+3. **Beginner-friendly response** - Checks for immediate actionable command
+4. **Interactive elements** - Looks for TRY IT prompts
+
+### Running This Test
+
+```bash
+cd gadugi-agentic-test
+node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose
+```
+
+### What We Learned
+
+**From testing amplihack guide agent**:
+
+- Long-running AI commands need 180s+ timeouts
+- Testing in clean `/tmp` directory avoids state pollution
+- Combining `uvx --from git+...` with gadugi tests unreleased branches
+- Checking file content (guide.md) verifies features beyond just output
+- Real-world tests exposed gaps (guide showing bash commands in REPL context)
diff --git a/.claude/skills/qa-team/SKILL.md b/.claude/skills/qa-team/SKILL.md
new file mode 100644
index 000000000..f75aa884d
--- /dev/null
+++ b/.claude/skills/qa-team/SKILL.md
@@ -0,0 +1,2100 @@
+---
+name: qa-team
+description: |
+  QA team for outside-in validation, side-by-side parity loops, and A/B behavioral comparison.
+  Use when you need behavior-driven tests, legacy-vs-new comparison, or rollout shadow validation.
+  Creates executable scenarios and parity workflows that agents can observe, compare, and iterate on.
+  Supports local, observable tmux, remote SSH, and shadow-mode divergence logging patterns.
+version: 1.1.0
+embedded_framework_version: 0.1.0
+github_repo: https://github.com/rysweet/gadugi-agentic-test
+issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
+---
+
+# QA Team Skill
+
+## Purpose [LEVEL 1]
+
+This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate.
+
+**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details.
+
+## When to Use This Skill [LEVEL 1]
+
+### Perfect For
+
+- **Smoke Tests**: Quick validation that critical user flows work
+- **Behavior-Driven Testing**: Verify features from user perspective
+- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron
+- **Refactoring Safety**: Tests remain valid when implementation changes
+- **AI-Powered Testing**: Let agents handle complex interactions
+- **Documentation as Tests**: YAML scenarios double as executable specs
+
+### Use This Skill When
+
+- Starting a new project and defining expected behaviors
+- Refactoring code and need tests that won't break with internal changes
+- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps)
+- Writing acceptance criteria that can be automatically verified
+- Need tests that non-developers can read and understand
+- Want to catch regressions in critical user workflows
+- Testing complex multi-step interactions
+
+### Don't Use This Skill When
+
+- Need unit tests for internal functions (use test-gap-analyzer instead)
+- Testing performance or load characteristics
+- Need precise timing or concurrency control
+- Testing non-interactive batch processes
+- Implementation details matter more than behavior
+
+## Core Concepts [LEVEL 1]
+
+### Outside-In Testing Philosophy
+
+**Traditional Inside-Out Testing**:
+
+```python
+# Tightly coupled to implementation
+def test_calculator_add():
+    calc = Calculator()
+    result = calc.add(2, 3)
+    assert result == 5
+    assert calc.history == [(2, 3, 5)]  # Knows internal state
+```
+
+**Agentic Outside-In Testing**:
+
+```yaml
+# Implementation-agnostic behavior verification
+scenario:
+  name: "Calculator Addition"
+  steps:
+    - action: launch
+      target: "./calculator"
+    - action: send_input
+      value: "add 2 3"
+    - action: verify_output
+      contains: "Result: 5"
+```
+
+**Benefits**:
+
+- Tests survive refactoring (internal changes don't break tests)
+- Readable by non-developers (YAML is declarative)
+- Platform-agnostic (same structure for CLI/TUI/Web/Electron)
+- AI agents handle complexity (navigation, timing, screenshots)
+
+### The Gadugi Agentic Test Framework [LEVEL 2]
+
+Gadugi-agentic-test is a Python framework that:
+
+1. **Parses YAML test scenarios** with declarative steps
+2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents)
+3. **Executes actions** (launch, input, click, wait, verify)
+4. **Collects evidence** (screenshots, logs, output captures)
+5. **Validates outcomes** against expected results
+6. **Generates reports** with evidence trails
+
+**Architecture**:
+
+```
+YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine
+                                          ↓
+                     [CLI Agent, TUI Agent, Web Agent, Electron Agent]
+                                          ↓
+                           Observers → Comprehension Agent
+                                          ↓
+                                   Evidence Report
+```
+
+### Progressive Disclosure Levels [LEVEL 1]
+
+This skill teaches testing in four levels:
+
+- **Level 1: Fundamentals** - Basic single-action tests, simple verification
+- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling
+- **Level 3: Advanced** - Custom agents, visual regression, performance validation
+- **Level 4: Parity & Shadowing** - Side-by-side A/B comparison, remote observable runs, rollout divergence logging
+
+Each example is marked with its level. Start at Level 1 and progress as needed.
+
+## Side-by-Side Parity and A/B Validation [LEVEL 2]
+
+QA Team is the renamed primary skill for what used to be `outside-in-testing`. Use it for standard outside-in scenarios **and** for parity loops where you must compare a legacy implementation to a replacement, or compare approach A to approach B, as an external user would observe them.
+
+### Use QA Team for parity work when
+
+- migrating Python to Rust, old CLI to new CLI, or v1 to v2 behavior
+- validating a rewrite before switching defaults
+- comparing branch A vs branch B using the same user scenarios
+- running observable side-by-side sessions in paired virtual TTYs
+- logging rollout divergences in shadow mode without failing the run
+
+### Recommended parity loop
+
+1. Define shared user-facing scenarios first.
+2. Run both implementations in isolated sandboxes.
+3. Compare stdout, stderr, exit code, JSON outputs, and filesystem side effects.
+4. Re-run in `--observable` mode when you need paired tmux panes for debugging.
+5. Use `--ssh-target <host>` when parity must happen on a remote environment such as `azlin`.
+6. Use `--shadow-mode --shadow-log <file>` during rollout to log divergences without blocking execution.
+
+### Command pattern to reuse
+
+If the repo already has a parity harness, extend it instead of inventing a second one. A good baseline is:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary \
+  --observable
+```
+
+For remote parity:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --ssh-target azlin \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /remote/path/to/legacy-repo \
+  --rust-binary /remote/path/to/new-binary
+```
+
+For rollout shadow logging:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary \
+  --shadow-mode \
+  --shadow-log /tmp/feature-shadow.jsonl
+```
+
+## Quick Start [LEVEL 1]
+
+### Installation
+
+**Prerequisites (for native module compilation):**
+
+```bash
+# macOS
+xcode-select --install
+
+# Ubuntu/Debian
+sudo apt-get install -y build-essential python3
+
+# Windows: Install Visual Studio Build Tools with "Desktop development with C++"
+```
+
+**Install the framework:**
+
+```bash
+# Install globally for CLI access
+npm install -g @gadugi/agentic-test
+
+# Or install locally in your project
+npm install @gadugi/agentic-test
+
+# Verify installation
+gadugi-test --version
+```
+
+### Your First Test (CLI Example)
+
+Create `test-hello.yaml`:
+
+```yaml
+scenario:
+  name: "Hello World CLI Test"
+  description: "Verify CLI prints greeting"
+  type: cli
+
+  prerequisites:
+    - "./hello-world executable exists"
+
+  steps:
+    - action: launch
+      target: "./hello-world"
+
+    - action: verify_output
+      contains: "Hello, World!"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+Run the test:
+
+```bash
+gadugi-test run test-hello.yaml
+```
+
+Output:
+
+```
+✓ Scenario: Hello World CLI Test
+  ✓ Step 1: Launched ./hello-world
+  ✓ Step 2: Output contains "Hello, World!"
+  ✓ Step 3: Exit code is 0
+
+PASSED (3/3 steps successful)
+Evidence saved to: ./evidence/test-hello-20250116-093045/
+```
+
+### Understanding the YAML Structure [LEVEL 1]
+
+Every test scenario has this structure:
+
+```yaml
+scenario:
+  name: "Descriptive test name"
+  description: "What this test verifies"
+  type: cli | tui | web | electron
+
+  # Optional metadata
+  tags: [smoke, critical, auth]
+  timeout: 30s
+
+  # What must be true before test runs
+  prerequisites:
+    - "Condition 1"
+    - "Condition 2"
+
+  # The test steps (executed sequentially)
+  steps:
+    - action: action_name
+      parameter1: value1
+      parameter2: value2
+
+    - action: verify_something
+      expected: value
+
+  # Optional cleanup
+  cleanup:
+    - action: stop_application
+```
+
+## Application Types and Agents [LEVEL 2]
+
+### CLI Applications [LEVEL 1]
+
+**Use Case**: Command-line tools, scripts, build tools, package managers
+
+**Supported Actions**:
+
+- `launch` - Start the CLI program
+- `send_input` - Send text or commands via stdin
+- `send_signal` - Send OS signals (SIGINT, SIGTERM)
+- `wait_for_output` - Wait for specific text in stdout/stderr
+- `verify_output` - Check stdout/stderr contains/matches expected text
+- `verify_exit_code` - Validate process exit code
+- `capture_output` - Save output for later verification
+
+**Example** (see `examples/cli/calculator-basic.yaml`):
+
+```yaml
+scenario:
+  name: "CLI Calculator Basic Operations"
+  type: cli
+
+  steps:
+    - action: launch
+      target: "./calculator"
+      args: ["--mode", "interactive"]
+
+    - action: send_input
+      value: "add 5 3\n"
+
+    - action: verify_output
+      contains: "Result: 8"
+      timeout: 2s
+
+    - action: send_input
+      value: "multiply 4 7\n"
+
+    - action: verify_output
+      contains: "Result: 28"
+
+    - action: send_input
+      value: "exit\n"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+### TUI Applications [LEVEL 1]
+
+**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs)
+
+**Supported Actions**:
+
+- `launch` - Start TUI application
+- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.)
+- `wait_for_screen` - Wait for specific text to appear on screen
+- `verify_screen` - Check screen contents match expectations
+- `capture_screenshot` - Save terminal screenshot (ANSI art)
+- `navigate_menu` - Navigate menu structures
+- `fill_form` - Fill TUI form fields
+
+**Example** (see `examples/tui/file-manager-navigation.yaml`):
+
+```yaml
+scenario:
+  name: "TUI File Manager Navigation"
+  type: tui
+
+  steps:
+    - action: launch
+      target: "./file-manager"
+
+    - action: wait_for_screen
+      contains: "File Manager v1.0"
+      timeout: 3s
+
+    - action: send_keypress
+      value: "down"
+      times: 3
+
+    - action: verify_screen
+      contains: "> documents/"
+      description: "Third item should be selected"
+
+    - action: send_keypress
+      value: "enter"
+
+    - action: wait_for_screen
+      contains: "documents/"
+      timeout: 2s
+
+    - action: capture_screenshot
+      save_as: "documents-view.txt"
+```
+
+### Web Applications [LEVEL 1]
+
+**Use Case**: Web apps, dashboards, SPAs, admin panels
+
+**Supported Actions**:
+
+- `navigate` - Go to URL
+- `click` - Click element by selector or text
+- `type` - Type into input fields
+- `wait_for_element` - Wait for element to appear
+- `verify_element` - Check element exists/contains text
+- `verify_url` - Validate current URL
+- `screenshot` - Capture browser screenshot
+- `scroll` - Scroll page or element
+
+**Example** (see `examples/web/dashboard-smoke-test.yaml`):
+
+```yaml
+scenario:
+  name: "Dashboard Smoke Test"
+  type: web
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000/dashboard"
+
+    - action: wait_for_element
+      selector: "h1.dashboard-title"
+      timeout: 5s
+
+    - action: verify_element
+      selector: "h1.dashboard-title"
+      contains: "Analytics Dashboard"
+
+    - action: verify_element
+      selector: ".widget-stats"
+      count: 4
+      description: "Should have 4 stat widgets"
+
+    - action: click
+      selector: "button.refresh-data"
+
+    - action: wait_for_element
+      selector: ".loading-spinner"
+      disappears: true
+      timeout: 10s
+
+    - action: screenshot
+      save_as: "dashboard-loaded.png"
+```
+
+### Electron Applications [LEVEL 2]
+
+**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones)
+
+**Supported Actions**:
+
+- `launch` - Start Electron app
+- `window_action` - Interact with windows (focus, minimize, close)
+- `menu_click` - Click application menu items
+- `dialog_action` - Handle native dialogs (open file, save, confirm)
+- `ipc_send` - Send IPC message to main process
+- `verify_window` - Check window state/properties
+- All web actions (since Electron uses Chromium)
+
+**Example** (see `examples/electron/single-window-basic.yaml`):
+
+```yaml
+scenario:
+  name: "Electron Single Window Test"
+  type: electron
+
+  steps:
+    - action: launch
+      target: "./dist/my-app"
+      wait_for_window: true
+      timeout: 10s
+
+    - action: verify_window
+      title: "My Application"
+      visible: true
+
+    - action: menu_click
+      path: ["File", "New Document"]
+
+    - action: wait_for_element
+      selector: ".document-editor"
+
+    - action: type
+      selector: ".document-editor"
+      value: "Hello from test"
+
+    - action: menu_click
+      path: ["File", "Save"]
+
+    - action: dialog_action
+      type: save_file
+      filename: "test-document.txt"
+
+    - action: verify_window
+      title_contains: "test-document.txt"
+```
+
+## Test Scenario Anatomy [LEVEL 2]
+
+### Metadata Section
+
+```yaml
+scenario:
+  name: "Clear descriptive name"
+  description: "Detailed explanation of what this test verifies"
+  type: cli | tui | web | electron
+
+  # Optional fields
+  tags: [smoke, regression, auth, payment]
+  priority: high | medium | low
+  timeout: 60s # Overall scenario timeout
+  retry_on_failure: 2 # Retry count
+
+  # Environment requirements
+  environment:
+    variables:
+      API_URL: "http://localhost:8080"
+      DEBUG: "true"
+    files:
+      - "./config.json must exist"
+```
+
+### Prerequisites
+
+Prerequisites are conditions that must be true before the test runs. The framework validates these before execution.
+
+```yaml
+prerequisites:
+  - "./application binary exists"
+  - "Port 8080 is available"
+  - "Database is running"
+  - "User account test@example.com exists"
+  - "File ./test-data.json exists"
+```
+
+If prerequisites fail, the test is skipped (not failed).
+
+### Steps
+
+Steps execute sequentially. Each step has:
+
+- **action**: Required - the action to perform
+- **Parameters**: Action-specific parameters
+- **description**: Optional - human-readable explanation
+- **timeout**: Optional - step-specific timeout
+- **continue_on_failure**: Optional - don't fail scenario if step fails
+
+```yaml
+steps:
+  # Simple action
+  - action: launch
+    target: "./app"
+
+  # Action with multiple parameters
+  - action: verify_output
+    contains: "Success"
+    timeout: 5s
+    description: "App should print success message"
+
+  # Continue even if this fails
+  - action: click
+    selector: ".optional-button"
+    continue_on_failure: true
+```
+
+### Verification Actions [LEVEL 1]
+
+Verification actions check expected outcomes. They fail the test if expectations aren't met.
+
+**Common Verifications**:
+
+```yaml
+# CLI: Check output contains text
+- action: verify_output
+  contains: "Expected text"
+
+# CLI: Check output matches regex
+- action: verify_output
+  matches: "Result: \\d+"
+
+# CLI: Check exit code
+- action: verify_exit_code
+  expected: 0
+
+# Web/TUI: Check element exists
+- action: verify_element
+  selector: ".success-message"
+
+# Web/TUI: Check element contains text
+- action: verify_element
+  selector: "h1"
+  contains: "Welcome"
+
+# Web: Check URL
+- action: verify_url
+  equals: "http://localhost:3000/dashboard"
+
+# Web: Check element count
+- action: verify_element
+  selector: ".list-item"
+  count: 5
+
+# Electron: Check window state
+- action: verify_window
+  title: "My App"
+  visible: true
+  focused: true
+```
+
+### Cleanup Section
+
+Cleanup runs after all steps complete (success or failure). Use for teardown actions.
+
+```yaml
+cleanup:
+  - action: stop_application
+    force: true
+
+  - action: delete_file
+    path: "./temp-test-data.json"
+
+  - action: reset_database
+    connection: "test_db"
+```
+
+## Advanced Patterns [LEVEL 2]
+
+### Conditional Logic
+
+Execute steps based on conditions:
+
+```yaml
+steps:
+  - action: launch
+    target: "./app"
+
+  - action: verify_output
+    contains: "Login required"
+    id: login_check
+
+  # Only run if login_check passed
+  - action: send_input
+    value: "login admin password123\n"
+    condition: login_check.passed
+```
+
+### Variables and Templating [LEVEL 2]
+
+Define variables and use them throughout the scenario:
+
+```yaml
+scenario:
+  name: "Test with Variables"
+  type: cli
+
+  variables:
+    username: "testuser"
+    api_url: "http://localhost:8080"
+
+  steps:
+    - action: launch
+      target: "./app"
+      args: ["--api", "${api_url}"]
+
+    - action: send_input
+      value: "login ${username}\n"
+
+    - action: verify_output
+      contains: "Welcome, ${username}!"
+```
+
+### Loops and Repetition [LEVEL 2]
+
+Repeat actions multiple times:
+
+```yaml
+steps:
+  - action: launch
+    target: "./app"
+
+  # Repeat action N times
+  - action: send_keypress
+    value: "down"
+    times: 5
+
+  # Loop over list
+  - action: send_input
+    value: "${item}\n"
+    for_each:
+      - "apple"
+      - "banana"
+      - "cherry"
+```
+
+### Error Handling [LEVEL 2]
+
+Handle expected errors gracefully:
+
+```yaml
+steps:
+  - action: send_input
+    value: "invalid command\n"
+
+  # Verify error message appears
+  - action: verify_output
+    contains: "Error: Unknown command"
+    expected_failure: true
+
+  # App should still be running
+  - action: verify_running
+    expected: true
+```
+
+### Multi-Step Workflows [LEVEL 2]
+
+Complex scenarios with multiple phases:
+
+```yaml
+scenario:
+  name: "E-commerce Purchase Flow"
+  type: web
+
+  steps:
+    # Phase 1: Authentication
+    - action: navigate
+      url: "http://localhost:3000/login"
+
+    - action: type
+      selector: "#username"
+      value: "test@example.com"
+
+    - action: type
+      selector: "#password"
+      value: "password123"
+
+    - action: click
+      selector: "button[type=submit]"
+
+    - action: wait_for_url
+      contains: "/dashboard"
+
+    # Phase 2: Product Selection
+    - action: navigate
+      url: "http://localhost:3000/products"
+
+    - action: click
+      text: "Add to Cart"
+      nth: 1
+
+    - action: verify_element
+      selector: ".cart-badge"
+      contains: "1"
+
+    # Phase 3: Checkout
+    - action: click
+      selector: ".cart-icon"
+
+    - action: click
+      text: "Proceed to Checkout"
+
+    - action: fill_form
+      fields:
+        "#shipping-address": "123 Test St"
+        "#city": "Testville"
+        "#zip": "12345"
+
+    - action: click
+      selector: "#place-order"
+
+    - action: wait_for_element
+      selector: ".order-confirmation"
+      timeout: 10s
+
+    - action: verify_element
+      selector: ".order-number"
+      exists: true
+```
+
+## Level 3: Advanced Topics [LEVEL 3]
+
+### Custom Comprehension Agents
+
+The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic.
+
+**Default Comprehension Agent**:
+
+- Observes raw output (text, HTML, screenshots)
+- Applies general reasoning to verify expectations
+- Returns pass/fail with explanation
+
+**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`):
+
+```yaml
+scenario:
+  name: "Financial Dashboard Test with Custom Agent"
+  type: web
+
+  # Define custom comprehension logic
+  comprehension_agent:
+    model: "gpt-4"
+    system_prompt: |
+      You are a financial data validator. When verifying dashboard content:
+      1. All monetary values must use proper formatting ($1,234.56)
+      2. Percentages must include % symbol
+      3. Dates must be in MM/DD/YYYY format
+      4. Negative values must be red
+      5. Chart data must be logically consistent
+
+      Be strict about formatting and data consistency.
+
+    examples:
+      - input: "Total Revenue: 45000"
+        output: "FAIL - Missing currency symbol and comma separator"
+      - input: "Total Revenue: $45,000.00"
+        output: "PASS - Correctly formatted"
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000/financial-dashboard"
+
+    - action: verify_element
+      selector: ".revenue-widget"
+      use_custom_comprehension: true
+      description: "Revenue should be properly formatted"
+```
+
+### Visual Regression Testing [LEVEL 3]
+
+Compare screenshots against baseline images:
+
+```yaml
+scenario:
+  name: "Visual Regression - Homepage"
+  type: web
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000"
+
+    - action: wait_for_element
+      selector: ".page-loaded"
+
+    - action: screenshot
+      save_as: "homepage.png"
+
+    - action: visual_compare
+      screenshot: "homepage.png"
+      baseline: "./baselines/homepage-baseline.png"
+      threshold: 0.05 # 5% difference allowed
+      highlight_differences: true
+```
+
+### Performance Validation [LEVEL 3]
+
+Measure and validate performance metrics:
+
+```yaml
+scenario:
+  name: "Performance - Dashboard Load Time"
+  type: web
+
+  performance:
+    metrics:
+      - page_load_time
+      - first_contentful_paint
+      - time_to_interactive
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000/dashboard"
+      measure_timing: true
+
+    - action: verify_performance
+      metric: page_load_time
+      less_than: 3000 # 3 seconds
+
+    - action: verify_performance
+      metric: first_contentful_paint
+      less_than: 1500 # 1.5 seconds
+```
+
+### Multi-Window Coordination (Electron) [LEVEL 3]
+
+Test applications with multiple windows:
+
+```yaml
+scenario:
+  name: "Multi-Window Chat Application"
+  type: electron
+
+  steps:
+    - action: launch
+      target: "./chat-app"
+
+    - action: menu_click
+      path: ["Window", "New Chat"]
+
+    - action: verify_window
+      count: 2
+
+    - action: window_action
+      window: 1
+      action: focus
+
+    - action: type
+      selector: ".message-input"
+      value: "Hello from window 1"
+
+    - action: click
+      selector: ".send-button"
+
+    - action: window_action
+      window: 2
+      action: focus
+
+    - action: wait_for_element
+      selector: ".message"
+      contains: "Hello from window 1"
+      timeout: 5s
+```
+
+### IPC Testing (Electron) [LEVEL 3]
+
+Test Inter-Process Communication between renderer and main:
+
+```yaml
+scenario:
+  name: "Electron IPC Communication"
+  type: electron
+
+  steps:
+    - action: launch
+      target: "./my-app"
+
+    - action: ipc_send
+      channel: "get-system-info"
+
+    - action: ipc_expect
+      channel: "system-info-reply"
+      timeout: 3s
+
+    - action: verify_ipc_payload
+      contains:
+        platform: "darwin"
+        arch: "x64"
+```
+
+### Custom Reporters [LEVEL 3]
+
+Generate custom test reports:
+
+```yaml
+scenario:
+  name: "Test with Custom Reporting"
+  type: cli
+
+  reporting:
+    format: custom
+    template: "./report-template.html"
+    include:
+      - screenshots
+      - logs
+      - timing_data
+      - video_recording
+
+    email:
+      enabled: true
+      recipients: ["team@example.com"]
+      on_failure_only: true
+
+  steps:
+    # ... test steps ...
+```
+
+## Framework Integration [LEVEL 2]
+
+### Running Tests
+
+**Single test**:
+
+```bash
+gadugi-test run test-scenario.yaml
+```
+
+**Multiple tests**:
+
+```bash
+gadugi-test run tests/*.yaml
+```
+
+**With options**:
+
+```bash
+gadugi-test run test.yaml \
+  --verbose \
+  --evidence-dir ./test-evidence \
+  --retry 2 \
+  --timeout 60s
+```
+
+### CI/CD Integration
+
+**GitHub Actions** (`.github/workflows/agentic-tests.yml`):
+
+```yaml
+name: Agentic Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install gadugi-agentic-test
+        run: npm install -g @gadugi/agentic-test
+
+      - name: Run tests
+        run: gadugi-test run tests/agentic/*.yaml
+
+      - name: Upload evidence
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-evidence
+          path: ./evidence/
+```
+
+### Evidence Collection
+
+The framework automatically collects evidence for debugging:
+
+```
+evidence/
+  scenario-name-20250116-093045/
+    ├── scenario.yaml          # Original test scenario
+    ├── execution-log.json     # Detailed execution log
+    ├── screenshots/           # All captured screenshots
+    │   ├── step-1.png
+    │   ├── step-3.png
+    │   └── step-5.png
+    ├── output-captures/       # CLI/TUI output
+    │   ├── stdout.txt
+    │   └── stderr.txt
+    ├── timing.json            # Performance metrics
+    └── report.html            # Human-readable report
+```
+
+## Best Practices [LEVEL 2]
+
+### 1. Start Simple, Add Complexity
+
+Begin with basic smoke tests, then add detail:
+
+```yaml
+# Level 1: Basic smoke test
+steps:
+  - action: launch
+    target: "./app"
+  - action: verify_output
+    contains: "Ready"
+
+# Level 2: Add interaction
+steps:
+  - action: launch
+    target: "./app"
+  - action: send_input
+    value: "command\n"
+  - action: verify_output
+    contains: "Success"
+
+# Level 3: Add error handling and edge cases
+steps:
+  - action: launch
+    target: "./app"
+  - action: send_input
+    value: "invalid\n"
+  - action: verify_output
+    contains: "Error"
+  - action: send_input
+    value: "command\n"
+  - action: verify_output
+    contains: "Success"
+```
+
+### 2. Use Descriptive Names and Descriptions
+
+```yaml
+# Bad
+scenario:
+  name: "Test 1"
+  steps:
+    - action: click
+      selector: "button"
+
+# Good
+scenario:
+  name: "User Login Flow - Valid Credentials"
+  description: "Verifies user can log in with valid email and password"
+  steps:
+    - action: click
+      selector: "button[type=submit]"
+      description: "Submit login form"
+```
+
+### 3. Verify Critical Paths Only
+
+Don't test every tiny detail. Focus on user-facing behavior:
+
+```yaml
+# Bad - Tests implementation details
+- action: verify_element
+  selector: ".internal-cache-status"
+  contains: "initialized"
+
+# Good - Tests user-visible behavior
+- action: verify_element
+  selector: ".welcome-message"
+  contains: "Welcome back"
+```
+
+### 4. Use Prerequisites for Test Dependencies
+
+```yaml
+scenario:
+  name: "User Profile Edit"
+
+  prerequisites:
+    - "User testuser@example.com exists"
+    - "User is logged in"
+    - "Database is seeded with test data"
+
+  steps:
+    # Test assumes prerequisites are met
+    - action: navigate
+      url: "/profile"
+```
+
+### 5. Keep Tests Independent
+
+Each test should set up its own state and clean up:
+
+```yaml
+scenario:
+  name: "Create Document"
+
+  steps:
+    # Create test user (don't assume exists)
+    - action: api_call
+      endpoint: "/api/users"
+      method: POST
+      data: { email: "test@example.com" }
+
+    # Run test
+    - action: navigate
+      url: "/documents/new"
+    # ... test steps ...
+
+  cleanup:
+    # Remove test user
+    - action: api_call
+      endpoint: "/api/users/test@example.com"
+      method: DELETE
+```
+
+### 6. Use Tags for Organization
+
+```yaml
+scenario:
+  name: "Critical Payment Flow"
+  tags: [smoke, critical, payment, e2e]
+  # Run with: gadugi-test run --tags critical
+```
+
+### 7. Add Timeouts Strategically
+
+```yaml
+steps:
+  # Quick operations - short timeout
+  - action: click
+    selector: "button"
+    timeout: 2s
+
+  # Network operations - longer timeout
+  - action: wait_for_element
+    selector: ".data-loaded"
+    timeout: 10s
+
+  # Complex operations - generous timeout
+  - action: verify_element
+    selector: ".report-generated"
+    timeout: 60s
+```
+
+## Testing Strategies [LEVEL 2]
+
+### Smoke Tests
+
+Minimal tests that verify critical functionality works:
+
+```yaml
+scenario:
+  name: "Smoke Test - Application Starts"
+  tags: [smoke]
+
+  steps:
+    - action: launch
+      target: "./app"
+    - action: verify_output
+      contains: "Ready"
+      timeout: 5s
+```
+
+Run before every commit: `gadugi-test run --tags smoke`
+
+### Happy Path Tests
+
+Test the ideal user journey:
+
+```yaml
+scenario:
+  name: "Happy Path - User Registration"
+
+  steps:
+    - action: navigate
+      url: "/register"
+    - action: type
+      selector: "#email"
+      value: "newuser@example.com"
+    - action: type
+      selector: "#password"
+      value: "SecurePass123!"
+    - action: click
+      selector: "button[type=submit]"
+    - action: wait_for_url
+      contains: "/welcome"
+```
+
+### Error Path Tests
+
+Verify error handling:
+
+```yaml
+scenario:
+  name: "Error Path - Invalid Login"
+
+  steps:
+    - action: navigate
+      url: "/login"
+    - action: type
+      selector: "#email"
+      value: "invalid@example.com"
+    - action: type
+      selector: "#password"
+      value: "wrongpassword"
+    - action: click
+      selector: "button[type=submit]"
+    - action: verify_element
+      selector: ".error-message"
+      contains: "Invalid credentials"
+```
+
+### Regression Tests
+
+Prevent bugs from reappearing:
+
+```yaml
+scenario:
+  name: "Regression - Issue #123 Password Reset"
+  tags: [regression, bug-123]
+  description: "Verifies password reset email is sent (was broken in v1.2)"
+
+  steps:
+    - action: navigate
+      url: "/forgot-password"
+    - action: type
+      selector: "#email"
+      value: "user@example.com"
+    - action: click
+      selector: "button[type=submit]"
+    - action: verify_element
+      selector: ".success-message"
+      contains: "Reset email sent"
+```
+
+## Philosophy Alignment [LEVEL 2]
+
+This skill follows amplihack's core principles:
+
+### Ruthless Simplicity
+
+- **YAML over code**: Declarative tests are simpler than programmatic tests
+- **No implementation details**: Tests describe WHAT, not HOW
+- **Minimal boilerplate**: Each test is focused and concise
+
+### Modular Design (Bricks & Studs)
+
+- **Self-contained scenarios**: Each YAML file is independent
+- **Clear contracts**: Steps have well-defined inputs/outputs
+- **Composable actions**: Reuse actions across different test types
+
+### Zero-BS Implementation
+
+- **No stubs**: Every example in this skill is a complete, runnable test
+- **Working defaults**: Tests run with minimal configuration
+- **Clear errors**: Framework provides actionable error messages
+
+### Outside-In Thinking
+
+- **User perspective**: Tests verify behavior users care about
+- **Implementation agnostic**: Refactoring doesn't break tests
+- **Behavior-driven**: Focus on outcomes, not internals
+
+## Common Pitfalls and Solutions [LEVEL 2]
+
+### Pitfall 1: Over-Specifying
+
+**Problem**: Test breaks when UI changes slightly
+
+```yaml
+# Bad - Too specific
+- action: verify_element
+  selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold"
+  contains: "Welcome"
+```
+
+**Solution**: Use flexible selectors
+
+```yaml
+# Good - Focused on behavior
+- action: verify_element
+  selector: ".welcome-message"
+  contains: "Welcome"
+```
+
+### Pitfall 2: Missing Waits
+
+**Problem**: Test fails intermittently due to timing
+
+```yaml
+# Bad - No wait for async operation
+- action: click
+  selector: ".load-data-button"
+- action: verify_element
+  selector: ".data-table" # May not exist yet!
+```
+
+**Solution**: Always wait for dynamic content
+
+```yaml
+# Good - Wait for element to appear
+- action: click
+  selector: ".load-data-button"
+- action: wait_for_element
+  selector: ".data-table"
+  timeout: 10s
+- action: verify_element
+  selector: ".data-table"
+```
+
+### Pitfall 3: Testing Implementation Details
+
+**Problem**: Test coupled to internal state
+
+```yaml
+# Bad - Tests internal cache state
+- action: verify_output
+  contains: "Cache hit ratio: 85%"
+```
+
+**Solution**: Test user-visible behavior
+
+```yaml
+# Good - Tests response time
+- action: verify_response_time
+  less_than: 100ms
+  description: "Fast response indicates caching works"
+```
+
+### Pitfall 4: Flaky Assertions
+
+**Problem**: Assertions depend on exact timing or formatting
+
+```yaml
+# Bad - Exact timestamp match will fail
+- action: verify_output
+  contains: "Created at: 2025-11-16 09:30:45"
+```
+
+**Solution**: Use flexible patterns
+
+```yaml
+# Good - Match pattern, not exact value
+- action: verify_output
+  matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}"
+```
+
+### Pitfall 5: Not Cleaning Up
+
+**Problem**: Tests leave artifacts that affect future runs
+
+```yaml
+# Bad - No cleanup
+steps:
+  - action: create_file
+    path: "./test-data.json"
+  - action: launch
+    target: "./app"
+```
+
+**Solution**: Always use cleanup section
+
+```yaml
+# Good - Cleanup ensures clean slate
+steps:
+  - action: create_file
+    path: "./test-data.json"
+  - action: launch
+    target: "./app"
+
+cleanup:
+  - action: delete_file
+    path: "./test-data.json"
+```
+
+## Example Library [LEVEL 1]
+
+This skill includes 15 complete working examples organized by application type and complexity level:
+
+### CLI Examples
+
+1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations
+2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery
+3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI
+
+### TUI Examples
+
+4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation
+5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation
+6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing
+
+### Web Examples
+
+7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification
+8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow
+9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing
+
+### Electron Examples
+
+10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test
+11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration
+12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions
+13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing
+
+### Custom Agent Examples
+
+14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic
+15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting
+
+See `examples/` directory for full example code with inline documentation.
+
+## Framework Freshness Check [LEVEL 3]
+
+This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists:
+
+```bash
+# Run the freshness check script
+python scripts/check-freshness.py
+
+# Output if outdated:
+# WARNING: Embedded framework version is 0.1.0
+# Latest GitHub version is 0.2.5
+#
+# New features in 0.2.5:
+# - Native Playwright support for web testing
+# - Video recording for all test types
+# - Parallel test execution
+#
+# Update with: npm update -g @gadugi/agentic-test
+```
+
+The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements.
+
+**When to Update This Skill**:
+
+- New framework version adds significant features
+- Breaking changes in YAML schema
+- New application types supported
+- Agent capabilities expand
+
+## Integration with Other Skills [LEVEL 2]
+
+### Works Well With
+
+**test-gap-analyzer**:
+
+- Use test-gap-analyzer to find untested functions
+- Write outside-in tests for critical user-facing paths
+- Use unit tests (from test-gap-analyzer) for internal functions
+
+**philosophy-guardian**:
+
+- Ensure test YAML follows ruthless simplicity
+- Verify tests focus on behavior, not implementation
+
+**pr-review-assistant**:
+
+- Include outside-in tests in PR reviews
+- Verify tests cover changed functionality
+- Check test readability and clarity
+
+**module-spec-generator**:
+
+- Generate module specs that include outside-in test scenarios
+- Use specs as templates for test YAML
+
+### Example Combined Workflow
+
+```bash
+# 1. Analyze coverage gaps
+claude "Use test-gap-analyzer on ./src"
+
+# 2. Write outside-in tests for critical paths
+claude "Use qa-team to create web tests for authentication"
+
+# 3. Verify philosophy compliance
+claude "Use philosophy-guardian to review new test files"
+
+# 4. Include in PR
+git add tests/agentic/
+git commit -m "Add outside-in tests for auth flow"
+```
+
+## Troubleshooting [LEVEL 2]
+
+### Test Times Out
+
+**Symptom**: Test exceeds timeout and fails
+
+**Causes**:
+
+- Application takes longer to start than expected
+- Network requests are slow
+- Element never appears (incorrect selector)
+
+**Solutions**:
+
+```yaml
+# Increase timeout
+- action: wait_for_element
+  selector: ".slow-loading-element"
+  timeout: 30s # Increase from default
+
+# Add intermediate verification
+- action: launch
+  target: "./app"
+- action: wait_for_output
+  contains: "Initializing..."
+  timeout: 5s
+- action: wait_for_output
+  contains: "Ready"
+  timeout: 20s
+```
+
+### Element Not Found
+
+**Symptom**: `verify_element` or `click` fails with "element not found"
+
+**Causes**:
+
+- Incorrect CSS selector
+- Element not yet rendered (timing issue)
+- Element in iframe or shadow DOM
+
+**Solutions**:
+
+```yaml
+# Add wait before interaction
+- action: wait_for_element
+  selector: ".target-element"
+  timeout: 10s
+- action: click
+  selector: ".target-element"
+
+# Use more specific selector
+- action: click
+  selector: "button[data-testid='submit-button']"
+
+# Handle iframe
+- action: switch_to_iframe
+  selector: "iframe#payment-frame"
+- action: click
+  selector: ".pay-now-button"
+```
+
+### Test Passes Locally, Fails in CI
+
+**Symptom**: Test works on dev machine but fails in CI environment
+
+**Causes**:
+
+- Different screen size (web/Electron)
+- Missing dependencies
+- Timing differences (slower CI machines)
+- Environment variable differences
+
+**Solutions**:
+
+```yaml
+# Set explicit viewport size (web/Electron)
+scenario:
+  environment:
+    viewport:
+      width: 1920
+      height: 1080
+
+# Add longer timeouts in CI
+- action: wait_for_element
+  selector: ".element"
+  timeout: 30s  # Generous for CI
+
+# Verify prerequisites
+prerequisites:
+  - "Chrome browser installed"
+  - "Environment variable API_KEY is set"
+```
+
+### Output Doesn't Match Expected
+
+**Symptom**: `verify_output` fails even though output looks correct
+
+**Causes**:
+
+- Extra whitespace or newlines
+- ANSI color codes in output
+- Case sensitivity
+
+**Solutions**:
+
+```yaml
+# Use flexible matching
+- action: verify_output
+  matches: "Result:\\s+Success" # Allow flexible whitespace
+
+# Strip ANSI codes
+- action: verify_output
+  contains: "Success"
+  strip_ansi: true
+
+# Case-insensitive match
+- action: verify_output
+  contains: "success"
+  case_sensitive: false
+```
+
+## Reference: Action Catalog [LEVEL 3]
+
+### CLI Actions
+
+| Action             | Parameters                       | Description                            |
+| ------------------ | -------------------------------- | -------------------------------------- |
+| `launch`           | `target`, `args`, `cwd`, `env`   | Start CLI application                  |
+| `send_input`       | `value`, `delay`                 | Send text to stdin                     |
+| `send_signal`      | `signal`                         | Send OS signal (SIGINT, SIGTERM, etc.) |
+| `wait_for_output`  | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr         |
+| `verify_output`    | `contains`, `matches`, `stream`  | Check output content                   |
+| `verify_exit_code` | `expected`                       | Validate exit code                     |
+| `capture_output`   | `save_as`, `stream`              | Save output to file                    |
+
+### TUI Actions
+
+| Action               | Parameters                        | Description              |
+| -------------------- | --------------------------------- | ------------------------ |
+| `launch`             | `target`, `args`, `terminal_size` | Start TUI application    |
+| `send_keypress`      | `value`, `times`, `modifiers`     | Send keyboard input      |
+| `wait_for_screen`    | `contains`, `timeout`             | Wait for text on screen  |
+| `verify_screen`      | `contains`, `matches`, `region`   | Check screen content     |
+| `capture_screenshot` | `save_as`                         | Save terminal screenshot |
+| `navigate_menu`      | `path`                            | Navigate menu structure  |
+| `fill_form`          | `fields`                          | Fill TUI form fields     |
+
+### Web Actions
+
+| Action             | Parameters                                | Description            |
+| ------------------ | ----------------------------------------- | ---------------------- |
+| `navigate`         | `url`, `wait_for_load`                    | Go to URL              |
+| `click`            | `selector`, `text`, `nth`                 | Click element          |
+| `type`             | `selector`, `value`, `delay`              | Type into input        |
+| `wait_for_element` | `selector`, `timeout`, `disappears`       | Wait for element       |
+| `verify_element`   | `selector`, `contains`, `count`, `exists` | Check element state    |
+| `verify_url`       | `equals`, `contains`, `matches`           | Validate URL           |
+| `screenshot`       | `save_as`, `selector`, `full_page`        | Capture screenshot     |
+| `scroll`           | `selector`, `direction`, `amount`         | Scroll page/element    |
+| `select_option`    | `selector`, `value`                       | Select dropdown option |
+| `checkbox`         | `selector`, `checked`                     | Check/uncheck checkbox |
+
+### Electron Actions
+
+| Action          | Parameters                             | Description                |
+| --------------- | -------------------------------------- | -------------------------- |
+| `launch`        | `target`, `args`, `wait_for_window`    | Start Electron app         |
+| `window_action` | `window`, `action`                     | Interact with windows      |
+| `menu_click`    | `path`                                 | Click menu items           |
+| `dialog_action` | `type`, `action`, `filename`           | Handle dialogs             |
+| `ipc_send`      | `channel`, `data`                      | Send IPC message           |
+| `ipc_expect`    | `channel`, `timeout`                   | Wait for IPC message       |
+| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state         |
+| All web actions |                                        | Electron includes Chromium |
+
+### Common Parameters
+
+| Parameter             | Type       | Description                          |
+| --------------------- | ---------- | ------------------------------------ |
+| `timeout`             | Duration   | Maximum wait time (e.g., "5s", "2m") |
+| `description`         | String     | Human-readable step explanation      |
+| `continue_on_failure` | Boolean    | Don't fail scenario if step fails    |
+| `id`                  | String     | Step identifier for conditionals     |
+| `condition`           | Expression | Execute step only if condition true  |
+
+## Quick Reference: YAML Template [LEVEL 1]
+
+```yaml
+scenario:
+  # Required fields
+  name: "Test Name"
+  description: "What this test verifies"
+  type: cli | tui | web | electron
+
+  # Optional metadata
+  tags: [smoke, critical]
+  timeout: 60s
+
+  # What must be true before running
+  prerequisites:
+    - "Condition 1"
+    - "Condition 2"
+
+  # Environment setup
+  environment:
+    variables:
+      VAR_NAME: "value"
+
+  # Variables for templating
+  variables:
+    username: "testuser"
+
+  # Test steps (executed in order)
+  steps:
+    - action: launch
+      target: "./app"
+
+    - action: send_input
+      value: "command\n"
+
+    - action: verify_output
+      contains: "Success"
+      timeout: 5s
+
+  # Cleanup (always runs)
+  cleanup:
+    - action: stop_application
+```
+
+## Skill Usage Examples [LEVEL 1]
+
+### Invoke This Skill
+
+```
+Claude, use the qa-team skill to create a CLI test for my calculator app.
+
+Claude, use qa-team to generate web tests for the login flow.
+
+Claude, create TUI tests using the qa-team skill for file navigation.
+```
+
+### What You'll Get
+
+1. **Complete YAML test scenario** matching your requirements
+2. **Inline documentation** explaining each section
+3. **Best practices applied** (timeouts, waits, verification)
+4. **Appropriate complexity level** (1, 2, or 3 based on needs)
+5. **Instructions for running** the test
+
+### Example Request and Response
+
+**User**: "Create a web test for user login with valid credentials"
+
+**Claude** (using this skill):
+
+```yaml
+scenario:
+  name: "User Login - Valid Credentials"
+  description: "Verifies user can successfully log in with valid email and password"
+  type: web
+  tags: [smoke, auth, critical]
+
+  prerequisites:
+    - "User test@example.com exists with password 'TestPass123!'"
+    - "Application is running on http://localhost:3000"
+
+  steps:
+    # Navigate to login page
+    - action: navigate
+      url: "http://localhost:3000/login"
+      description: "Open login page"
+
+    # Wait for page to fully load
+    - action: wait_for_element
+      selector: "form.login-form"
+      timeout: 5s
+      description: "Wait for login form to render"
+
+    # Fill in email field
+    - action: type
+      selector: "input[name='email']"
+      value: "test@example.com"
+      description: "Enter email address"
+
+    # Fill in password field
+    - action: type
+      selector: "input[name='password']"
+      value: "TestPass123!"
+      description: "Enter password"
+
+    # Submit form
+    - action: click
+      selector: "button[type='submit']"
+      description: "Click login button"
+
+    # Wait for redirect to dashboard
+    - action: wait_for_url
+      contains: "/dashboard"
+      timeout: 10s
+      description: "Should redirect to dashboard after successful login"
+
+    # Verify user is logged in
+    - action: verify_element
+      selector: ".user-profile"
+      contains: "test@example.com"
+      description: "User email should appear in profile widget"
+
+    # Capture evidence
+    - action: screenshot
+      save_as: "successful-login.png"
+      description: "Save screenshot of logged-in state"
+# To run this test:
+# 1. Save as test-login-valid.yaml
+# 2. Run: gadugi-test run test-login-valid.yaml
+# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/
+```
+
+## Related Resources [LEVEL 1]
+
+### Official Documentation
+
+- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test
+- **Framework Docs**: See repo README and docs/ folder
+- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
+
+## Level 4: Shadow Environment Integration [LEVEL 4]
+
+Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments.
+
+### Why Use Shadow Environments for Testing
+
+1. **Clean State**: Fresh container, no host pollution
+2. **Local Changes**: Test uncommitted code exactly as-is
+3. **Multi-Repo**: Coordinate changes across multiple repos
+4. **CI Parity**: What shadow sees ≈ what CI will see
+
+### Shadow Testing Workflow
+
+For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests:
+
+#### Pattern 1: CLI Tests in Shadow (Amplifier)
+
+```python
+# Create shadow with your local library changes
+shadow.create(local_sources=["~/repos/my-lib:org/my-lib"])
+
+# Run outside-in test scenarios inside shadow
+shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml")
+
+# Extract evidence
+shadow.extract(shadow_id, "/evidence", "./test-evidence")
+
+# Cleanup
+shadow.destroy(shadow_id)
+```
+
+#### Pattern 2: CLI Tests in Shadow (Standalone)
+
+```bash
+# Create shadow with local changes
+amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test
+
+# Run your test scenarios
+amplifier-shadow exec test "gadugi-test run test-scenario.yaml"
+
+# Extract results
+amplifier-shadow extract test /evidence ./test-evidence
+
+# Cleanup
+amplifier-shadow destroy test
+```
+
+#### Pattern 3: Multi-Repo Integration Test
+
+```yaml
+# test-multi-repo.yaml
+scenario:
+  name: "Multi-Repo Integration Test"
+  type: cli
+
+  prerequisites:
+    - "Shadow environment with core-lib and cli-tool"
+
+  steps:
+    - action: launch
+      target: "cli-tool"
+
+    - action: send_input
+      value: "process --lib core-lib\n"
+
+    - action: verify_output
+      contains: "Success: Using core-lib"
+```
+
+```bash
+# Setup shadow with both repos
+amplifier-shadow create \
+  --local ~/repos/core-lib:org/core-lib \
+  --local ~/repos/cli-tool:org/cli-tool \
+  --name multi-test
+
+# Run test that exercises both
+amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml"
+```
+
+#### Pattern 4: Web App Testing in Shadow
+
+```yaml
+# test-web-app.yaml
+scenario:
+  name: "Web App with Local Library"
+  type: web
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000"
+
+    - action: click
+      selector: "button.process"
+
+    - action: verify_element
+      selector: ".result"
+      contains: "Processed with v2.0" # Your local version
+```
+
+```bash
+# Shadow with library changes
+amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test
+
+# Start web app inside shadow (uses your local lib)
+amplifier-shadow exec web-test "
+  cd /workspace &&
+  git clone https://github.com/org/web-app &&
+  cd web-app &&
+  npm install &&  # Pulls your local my-lib via git URL rewriting
+  npm start &
+"
+
+# Wait for app to start, then run tests
+amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml"
+```
+
+### Verification Best Practices
+
+When running tests in shadow, always verify your local sources are being used:
+
+```bash
+# After shadow.create, check snapshot commits
+shadow.status(shadow_id)
+# Shows: snapshot_commits: {"org/my-lib": "abc1234..."}
+
+# When your test installs dependencies, verify commit matches
+# Look in test output for: my-lib @ git+...@abc1234
+```
+
+### Complete Example: Library Change Validation
+
+```yaml
+# test-library-change.yaml - Outside-in test
+scenario:
+  name: "Validate Library Breaking Change"
+  type: cli
+  description: "Test that dependent app still works with new library API"
+
+  steps:
+    - action: launch
+      target: "/workspace/org/dependent-app/cli.py"
+
+    - action: send_input
+      value: "process data.json\n"
+
+    - action: verify_output
+      contains: "Processed successfully"
+      description: "New library API should still work"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+```bash
+# Complete workflow
+# 1. Create shadow with your breaking change
+amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test
+
+# 2. Install dependent app (pulls your local lib)
+amplifier-shadow exec breaking-test "
+  cd /workspace &&
+  git clone https://github.com/org/dependent-app &&
+  cd dependent-app &&
+  pip install -e . &&  # This installs git+https://github.com/org/my-lib (your local version)
+  echo 'Ready to test'
+"
+
+# 3. Run outside-in test
+amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml"
+
+# If test passes, your breaking change is compatible!
+# If test fails, you've caught the issue before pushing
+```
+
+### When to Use Shadow Integration
+
+Use shadow + outside-in tests when:
+
+- ✅ Testing library changes with dependent projects
+- ✅ Validating multi-repo coordinated changes
+- ✅ Need clean-state validation before pushing
+- ✅ Want to catch integration issues early
+- ✅ Testing that setup/install procedures work
+
+Don't use shadow for:
+
+- ❌ Simple unit tests (too much overhead)
+- ❌ Tests of already-committed code (shadow adds no value)
+- ❌ Performance testing (container overhead skews results)
+
+### Learn More
+
+For complete shadow environment documentation, including:
+
+- Shell scripts for DIY setup
+- Docker Compose examples
+- Multi-language support (Python, Node, Rust, Go)
+- Troubleshooting and verification techniques
+
+**Load the shadow-testing skill**:
+
+```
+Claude, use the shadow-testing skill to set up a shadow environment
+```
+
+Or for Amplifier users, the shadow tool is built-in:
+
+```python
+shadow.create(local_sources=["~/repos/lib:org/lib"])
+```
+
+---
+
+### Related Skills
+
+- **shadow-testing**: Complete shadow environment setup and usage
+- **test-gap-analyzer**: Find untested code paths
+- **philosophy-guardian**: Review test philosophy compliance
+- **pr-review-assistant**: Include tests in PR reviews
+- **module-spec-generator**: Generate specs with test scenarios
+
+### Further Reading
+
+- Outside-in vs inside-out testing approaches
+- Behavior-driven development (BDD) principles
+- AI-powered testing best practices
+- Test automation patterns
+- Shadow environment testing methodology
+
+## Changelog [LEVEL 3]
+
+### Version 1.1.0 (2026-01-29)
+
+- **NEW**: Level 4 - Shadow Environment Integration
+- Added complete shadow testing workflow patterns
+- Integration examples for Amplifier native and standalone CLI
+- Multi-repo integration test patterns
+- Web app testing in shadow environments
+- Complete workflow example for library change validation
+- References to shadow-testing skill for deep-dive documentation
+
+### Version 1.0.0 (2025-11-16)
+
+- Initial skill release
+- Support for CLI, TUI, Web, and Electron applications
+- 15 complete working examples
+- Progressive disclosure levels (1, 2, 3)
+- Embedded gadugi-agentic-test framework documentation (v0.1.0)
+- Freshness check script for version monitoring
+- Full integration with amplihack philosophy
+- Comprehensive troubleshooting guide
+- Action reference catalog
+
+---
+
+**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows.
+
+Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen.
diff --git a/.claude/skills/outside-in-testing/examples/cli/calculator-basic.yaml b/.claude/skills/qa-team/examples/cli/calculator-basic.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/cli/calculator-basic.yaml
rename to .claude/skills/qa-team/examples/cli/calculator-basic.yaml
diff --git a/.claude/skills/outside-in-testing/examples/cli/cli-error-handling.yaml b/.claude/skills/qa-team/examples/cli/cli-error-handling.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/cli/cli-error-handling.yaml
rename to .claude/skills/qa-team/examples/cli/cli-error-handling.yaml
diff --git a/.claude/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml b/.claude/skills/qa-team/examples/cli/cli-interactive-session.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml
rename to .claude/skills/qa-team/examples/cli/cli-interactive-session.yaml
diff --git a/.claude/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml b/.claude/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml
rename to .claude/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml
diff --git a/.claude/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml b/.claude/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml
rename to .claude/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml
diff --git a/.claude/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml b/.claude/skills/qa-team/examples/electron/electron-ipc-testing.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml
rename to .claude/skills/qa-team/examples/electron/electron-ipc-testing.yaml
diff --git a/.claude/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml b/.claude/skills/qa-team/examples/electron/electron-menu-testing.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml
rename to .claude/skills/qa-team/examples/electron/electron-menu-testing.yaml
diff --git a/.claude/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml b/.claude/skills/qa-team/examples/electron/multi-window-coordination.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml
rename to .claude/skills/qa-team/examples/electron/multi-window-coordination.yaml
diff --git a/.claude/skills/outside-in-testing/examples/electron/single-window-basic.yaml b/.claude/skills/qa-team/examples/electron/single-window-basic.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/electron/single-window-basic.yaml
rename to .claude/skills/qa-team/examples/electron/single-window-basic.yaml
diff --git a/.claude/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml b/.claude/skills/qa-team/examples/tui/file-manager-navigation.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml
rename to .claude/skills/qa-team/examples/tui/file-manager-navigation.yaml
diff --git a/.claude/skills/outside-in-testing/examples/tui/tui-form-validation.yaml b/.claude/skills/qa-team/examples/tui/tui-form-validation.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/tui/tui-form-validation.yaml
rename to .claude/skills/qa-team/examples/tui/tui-form-validation.yaml
diff --git a/.claude/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml b/.claude/skills/qa-team/examples/tui/tui-performance-monitoring.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml
rename to .claude/skills/qa-team/examples/tui/tui-performance-monitoring.yaml
diff --git a/.claude/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml b/.claude/skills/qa-team/examples/web/dashboard-smoke-test.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml
rename to .claude/skills/qa-team/examples/web/dashboard-smoke-test.yaml
diff --git a/.claude/skills/outside-in-testing/examples/web/web-authentication-flow.yaml b/.claude/skills/qa-team/examples/web/web-authentication-flow.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/web/web-authentication-flow.yaml
rename to .claude/skills/qa-team/examples/web/web-authentication-flow.yaml
diff --git a/.claude/skills/outside-in-testing/examples/web/web-visual-regression.yaml b/.claude/skills/qa-team/examples/web/web-visual-regression.yaml
similarity index 100%
rename from .claude/skills/outside-in-testing/examples/web/web-visual-regression.yaml
rename to .claude/skills/qa-team/examples/web/web-visual-regression.yaml
diff --git a/.claude/skills/outside-in-testing/scripts/check-freshness.py b/.claude/skills/qa-team/scripts/check-freshness.py
similarity index 100%
rename from .claude/skills/outside-in-testing/scripts/check-freshness.py
rename to .claude/skills/qa-team/scripts/check-freshness.py
diff --git a/.claude/skills/outside-in-testing/tests/test_skill_examples.py b/.claude/skills/qa-team/tests/test_skill_examples.py
similarity index 98%
rename from .claude/skills/outside-in-testing/tests/test_skill_examples.py
rename to .claude/skills/qa-team/tests/test_skill_examples.py
index 5b57472e6..7ffeb669b 100644
--- a/.claude/skills/outside-in-testing/tests/test_skill_examples.py
+++ b/.claude/skills/qa-team/tests/test_skill_examples.py
@@ -1,5 +1,5 @@
 """
-Tests for outside-in-testing skill example YAML files.
+Tests for qa-team skill example YAML files.
 
 Validates that all example YAML files are:
 - Valid YAML syntax
@@ -276,6 +276,7 @@ def test_skill_has_yaml_frontmatter(self):
 
         # Check required frontmatter fields
         assert "name" in metadata, "Frontmatter missing 'name'"
+        assert metadata["name"] == "qa-team", "Frontmatter name should be 'qa-team'"
         assert "description" in metadata, "Frontmatter missing 'description'"
         assert "version" in metadata, "Frontmatter missing 'version'"
         assert "embedded_framework_version" in metadata, (
diff --git a/.claude/skills/shadow-testing/README.md b/.claude/skills/shadow-testing/README.md
index 2967fb55b..3f39991a4 100644
--- a/.claude/skills/shadow-testing/README.md
+++ b/.claude/skills/shadow-testing/README.md
@@ -143,7 +143,7 @@ amplifier-shadow exec test "gadugi-agentic-test run test-scenario.yaml"
 amplifier-shadow extract test /evidence ./test-evidence
 ```
 
-See the `outside-in-testing` skill Level 4 for complete integration examples.
+See the `qa-team` skill Level 4 for complete integration examples (`outside-in-testing` remains an alias).
 
 ## Use Cases
 
@@ -223,7 +223,7 @@ Shadow environments use this architecture:
 
 ## Related Skills
 
-- **outside-in-testing** - Agentic behavior-driven tests (enhanced with Level 4 shadow integration)
+- **qa-team** - Agentic behavior-driven tests (legacy alias: `outside-in-testing`)
 - **test-gap-analyzer** - Find untested code paths
 - **philosophy-guardian** - Verify scripts follow ruthless simplicity
 
@@ -243,7 +243,7 @@ Shadow environments use this architecture:
 - Generalizable shell scripts for DIY setup
 - Docker Compose examples for all use cases
 - Multi-language support (Python, Node, Rust, Go)
-- Integration patterns with outside-in-testing
+- Integration patterns with qa-team / outside-in-testing alias
 - Philosophy alignment with ruthless simplicity
 
 ## Contributing
diff --git a/.claude/skills/shadow-testing/SKILL.md b/.claude/skills/shadow-testing/SKILL.md
index 99b140c45..cb3dc8d4c 100644
--- a/.claude/skills/shadow-testing/SKILL.md
+++ b/.claude/skills/shadow-testing/SKILL.md
@@ -647,7 +647,7 @@ amplifier-shadow exec test "gadugi-agentic-test run test-scenario.yaml"
 amplifier-shadow extract test /evidence ./test-evidence
 ```
 
-See the `outside-in-testing` skill for complete integration examples.
+See the `qa-team` skill for complete integration examples (`outside-in-testing` remains an alias).
 
 ## Best Practices [LEVEL 2]
 
@@ -893,7 +893,7 @@ amplifier-shadow extract test /workspace/results ./local-results
 
 ## Related Skills [LEVEL 1]
 
-- **outside-in-testing**: Run agentic tests in shadow environments
+- **qa-team**: Run agentic tests in shadow environments (legacy name: `outside-in-testing`)
 - **test-gap-analyzer**: Find untested code paths (complement shadow testing)
 - **philosophy-guardian**: Verify shadow scripts follow ruthless simplicity
 
@@ -919,7 +919,7 @@ When shadow tests fail:
 - Shell scripts for standalone usage
 - Docker Compose examples for CI integration
 - Complete CLI reference and troubleshooting guide
-- Integration patterns with outside-in-testing
+- Integration patterns with qa-team / outside-in-testing alias
 - Philosophy alignment with ruthless simplicity
 
 ---
diff --git a/.claude/skills/smart-test/README.md b/.claude/skills/smart-test/README.md
index 868b90ac9..80e95d3a8 100644
--- a/.claude/skills/smart-test/README.md
+++ b/.claude/skills/smart-test/README.md
@@ -94,7 +94,7 @@ User: Rebuild test mapping cache
 ## Related Skills
 
 - `test-gap-analyzer`: Find untested code
-- `outside-in-testing`: Create E2E tests
+- `qa-team`: Create E2E and parity tests (`outside-in-testing` alias supported)
 - `pre-commit-diagnostic`: Fix hook failures
 
 ---
diff --git a/.claude/skills/smart-test/SKILL.md b/.claude/skills/smart-test/SKILL.md
index 5aa0126c6..17d54a568 100644
--- a/.claude/skills/smart-test/SKILL.md
+++ b/.claude/skills/smart-test/SKILL.md
@@ -28,7 +28,7 @@ invokes:
   - type: skill
     name: test-gap-analyzer
   - type: skill
-    name: outside-in-testing
+    name: qa-team
   - type: skill
     name: pre-commit-diagnostic
   - type: subagent
@@ -360,7 +360,7 @@ Works with existing pytest markers from pyproject.toml:
 ## Complementary Skills
 
 - **test-gap-analyzer**: Identifies missing tests
-- **outside-in-testing**: Creates E2E test scenarios
+- **qa-team**: Creates E2E and parity test scenarios (`outside-in-testing` alias supported)
 - **tester agent**: Writes new tests for gaps
 - **pre-commit-diagnostic**: Fixes pre-commit failures
 
diff --git a/.claude/workflow/DEFAULT_WORKFLOW.md b/.claude/workflow/DEFAULT_WORKFLOW.md
index 04d5aaa4a..05d6fdc79 100644
--- a/.claude/workflow/DEFAULT_WORKFLOW.md
+++ b/.claude/workflow/DEFAULT_WORKFLOW.md
@@ -771,7 +771,7 @@ Step 13 validates technical functionality locally. Step 19 validates real-world
 
 **For CLI/TUI applications:**
 
-- [ ] Use `/outside-in-testing` skill for guided CLI/TUI testing workflow
+- [ ] Use `/qa-team` skill for guided CLI/TUI testing workflow (`/outside-in-testing` remains an alias)
 - [ ] Test in fresh terminal session with production-like environment
 - [ ] Execute actual commands with various flags and inputs
 - [ ] Verify output formatting and error messages match expectations
diff --git a/amplifier-bundle/bundle.md b/amplifier-bundle/bundle.md
index 39607376a..effa306c0 100644
--- a/amplifier-bundle/bundle.md
+++ b/amplifier-bundle/bundle.md
@@ -86,6 +86,7 @@ skills:
   microsoft-agent-framework: { path: skills/microsoft-agent-framework/skill.md }
   module-spec-generator: { path: skills/module-spec-generator/SKILL.md }
   outside-in-testing: { path: skills/outside-in-testing/SKILL.md }
+  qa-team: { path: skills/qa-team/SKILL.md }
   remote-work: { path: skills/remote-work/SKILL.md }
   skill-builder: { path: skills/skill-builder/SKILL.md }
   test-gap-analyzer: { path: skills/test-gap-analyzer/SKILL.md }
diff --git a/amplifier-bundle/recipes/default-workflow.yaml b/amplifier-bundle/recipes/default-workflow.yaml
index 0e228292c..10f5fe0e9 100644
--- a/amplifier-bundle/recipes/default-workflow.yaml
+++ b/amplifier-bundle/recipes/default-workflow.yaml
@@ -824,7 +824,7 @@ steps:
 
   # ==========================================================================
   # STEP 13: MANDATORY OUTSIDE-IN TESTING
-  # Use the outside-in-testing skill to test this PR as a user would from
+  # Use the qa-team skill (outside-in-testing alias supported) to test this PR as a user would from
   # the PR branch. No bash echo — the agent must actually execute the tests.
   # ==========================================================================
   - id: "step-13-local-testing"
@@ -836,7 +836,7 @@ steps:
       **Repository:** {{repo_path}}
       **Branch:** Run `git branch --show-current` to get the current branch name.
 
-      You MUST perform outside-in testing using the `outside-in-testing` skill
+      You MUST perform outside-in testing using the `qa-team` skill (`outside-in-testing` remains an alias)
       to verify this change as a real user would — from the PR branch, not just
       the working directory.
 
@@ -852,11 +852,11 @@ steps:
          git remote get-url origin
          ```
 
-      3. **Invoke the `outside-in-testing` skill** to generate and execute
+      3. **Invoke the `qa-team` skill** to generate and execute
          agentic outside-in tests for this change. Pass the PR branch name and
          repository URL so tests run against the actual branch:
          ```
-         Skill(skill="outside-in-testing")
+         Skill(skill="qa-team")
          ```
 
       4. **Execute at least 2 test scenarios:**
@@ -1061,10 +1061,10 @@ steps:
 
       ### Phase 1: Run Outside-In Tests
 
-      Use the `outside-in-testing` skill to test from the PR branch:
+      Use the `qa-team` skill to test from the PR branch (`outside-in-testing` alias also works):
 
       ```
-      Skill(skill="outside-in-testing")
+      Skill(skill="qa-team")
       ```
 
       Test using the PR branch:
@@ -1139,7 +1139,7 @@ steps:
         echo "The outside-in testing step (step-13-local-testing) must be completed" && \
         echo "before the PR review phase can begin." && \
         echo "" && \
-        echo "Required: Invoke the outside-in-testing skill and document at least" && \
+        echo "Required: Invoke the qa-team skill (outside-in-testing alias also works) and document at least" && \
         echo "2 test scenarios in the PR description under 'Step 13: Local Testing Results'." && \
         exit 1 ; \
       fi && \
diff --git a/amplifier-bundle/recipes/smart-orchestrator.yaml b/amplifier-bundle/recipes/smart-orchestrator.yaml
index 2223a2649..f7047143e 100644
--- a/amplifier-bundle/recipes/smart-orchestrator.yaml
+++ b/amplifier-bundle/recipes/smart-orchestrator.yaml
@@ -554,7 +554,7 @@ steps:
 
       For parallel workstreams: inspect the round results for evidence that each
       workstream completed, and check whether the log references mention outside-in
-      testing being performed (look for "Step 13", "outside-in-testing", or
+      testing being performed (look for "Step 13", "qa-team", "outside-in-testing", or
       "local testing" references in the results).
 
       ## Validation Steps
@@ -566,7 +566,7 @@ steps:
 
       2. For each workstream, determine whether outside-in testing was performed:
          - Look for "Step 13: Local Testing Results" evidence
-         - Look for `outside-in-testing` skill invocation
+         - Look for `qa-team` or `outside-in-testing` skill invocation
          - Look for at least 2 test scenario results (PASS/FAIL)
 
       3. Report your findings in this format:
@@ -579,7 +579,7 @@ steps:
 
          For each workstream:
            - Workstream: <name or PR URL>
-           - outside-in-testing skill invoked: YES / NO / UNKNOWN
+           - qa-team / outside-in-testing skill invoked: YES / NO / UNKNOWN
            - Step 13 results documented: YES / NO / UNKNOWN
            - Test scenarios executed: <count or UNKNOWN>
            - VERDICT: PASS / FAIL / CANNOT_VERIFY
diff --git a/amplifier-bundle/skills/outside-in-testing/README.md b/amplifier-bundle/skills/outside-in-testing/README.md
deleted file mode 100644
index 9eb17ee84..000000000
--- a/amplifier-bundle/skills/outside-in-testing/README.md
+++ /dev/null
@@ -1,765 +0,0 @@
-# Outside-In Testing Skill
-
-## Overview
-
-The Outside-In Testing Skill helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate.
-
-**Key Benefits**:
-
-- Tests survive refactoring (implementation changes don't break tests)
-- Readable by non-developers (declarative YAML format)
-- Platform-agnostic (same structure for CLI, TUI, Web, Electron)
-- AI-powered execution (agents handle complex interactions)
-- Evidence-based validation (screenshots, logs, output captures)
-
-## What is Outside-In Testing?
-
-**Traditional Testing** (Inside-Out):
-
-```python
-# Knows internal implementation
-def test_user_service():
-    service = UserService()
-    user = service.create_user("test@example.com")
-    assert user.id is not None
-    assert user.email == "test@example.com"
-    assert user.created_at <= datetime.now()  # Internal state
-```
-
-**Outside-In Testing**:
-
-```yaml
-# Only knows external behavior
-scenario:
-  name: "User Registration"
-  type: web
-  steps:
-    - action: navigate
-      url: "/register"
-    - action: type
-      selector: "#email"
-      value: "test@example.com"
-    - action: click
-      selector: "button[type=submit]"
-    - action: verify_url
-      contains: "/welcome"
-```
-
-The outside-in test verifies the same functionality but:
-
-- Doesn't depend on internal classes (`UserService`)
-- Doesn't check internal state (`created_at`, `id`)
-- Tests from user's perspective (what they see and do)
-- Remains valid even if implementation completely changes
-
-## When to Use This Skill
-
-### Perfect Scenarios
-
-1. **Smoke Testing** - Quickly verify critical paths work
-2. **Acceptance Testing** - Validate features meet requirements
-3. **Regression Testing** - Ensure changes don't break existing behavior
-4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach
-5. **Refactoring Safety** - Tests protect behavior during rewrites
-6. **Documentation as Tests** - YAML doubles as executable specifications
-
-### Complementary to Unit Tests
-
-Outside-in tests work best alongside unit tests:
-
-- **Unit Tests** (60%): Internal logic, edge cases, error handling
-- **Integration Tests** (30%): Component interactions, API contracts
-- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths
-
-Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation.
-
-## Quick Start
-
-### 1. Install Framework
-
-**Option A: From GitHub (Recommended - Latest)**
-
-```bash
-# Install globally
-npm install -g github:rysweet/gadugi-agentic-test
-
-# Or use with npx
-npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml
-
-# Or clone and build
-git clone https://github.com/rysweet/gadugi-agentic-test
-cd gadugi-agentic-test
-npm install
-npm run build
-node dist/cli.js run scenarios/your-test.yaml
-```
-
-**Option B: From npm (when published)**
-
-```bash
-npm install -g gadugi-agentic-test
-gadugi-test run test.yaml
-```
-
-**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below.
-
-### 2. Create Your First Test
-
-Save as `test-hello.yaml`:
-
-```yaml
-scenario:
-  name: "Hello World Test"
-  description: "Verify application prints greeting"
-  type: cli
-
-  steps:
-    - action: launch
-      target: "./hello-world"
-
-    - action: verify_output
-      contains: "Hello, World!"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-### 3. Run the Test
-
-**If installed globally**:
-
-```bash
-gadugi-test run test-hello.yaml
-```
-
-**If using from source**:
-
-```bash
-cd /path/to/gadugi-agentic-test
-node dist/cli.js run /path/to/test-hello.yaml
-```
-
-**Run all tests in directory**:
-
-```bash
-node dist/cli.js run -d ./my-test-scenarios
-```
-
-### 4. Review Results
-
-The framework generates evidence in `./evidence/`:
-
-- Execution logs
-- Output captures
-- Screenshots (for TUI/Web/Electron)
-- Timing data
-- HTML report
-
-## Supported Application Types
-
-### CLI (Command-Line Interface)
-
-Test command-line tools, scripts, and utilities:
-
-```yaml
-scenario:
-  name: "Git Status Test"
-  type: cli
-  steps:
-    - action: launch
-      target: "git"
-      args: ["status"]
-    - action: verify_output
-      contains: "On branch"
-```
-
-**Common Use Cases**:
-
-- Package managers (npm, pip, cargo)
-- Build tools (make, gradle, webpack)
-- DevOps tools (docker, kubectl, terraform)
-- Custom CLI applications
-
-### TUI (Terminal User Interface)
-
-Test interactive terminal applications:
-
-```yaml
-scenario:
-  name: "TUI Navigation"
-  type: tui
-  steps:
-    - action: launch
-      target: "./file-manager"
-    - action: send_keypress
-      value: "down"
-      times: 3
-    - action: verify_screen
-      contains: "> documents/"
-```
-
-**Common Use Cases**:
-
-- System monitors (htop, top)
-- Text editors (vim, nano)
-- File managers (ranger, midnight commander)
-- Custom TUI dashboards
-
-### Web Applications
-
-Test browser-based applications:
-
-```yaml
-scenario:
-  name: "Web Dashboard Test"
-  type: web
-  steps:
-    - action: navigate
-      url: "http://localhost:3000"
-    - action: verify_element
-      selector: "h1"
-      contains: "Dashboard"
-```
-
-**Common Use Cases**:
-
-- SPAs (React, Vue, Angular apps)
-- Admin panels
-- E-commerce sites
-- SaaS applications
-
-### Electron Applications
-
-Test desktop apps built with Electron:
-
-```yaml
-scenario:
-  name: "Desktop App Test"
-  type: electron
-  steps:
-    - action: launch
-      target: "./dist/my-app"
-    - action: verify_window
-      title: "My Application"
-```
-
-**Common Use Cases**:
-
-- Code editors (VS Code-like apps)
-- Chat applications (Slack, Discord clones)
-- Productivity tools
-- Custom desktop applications
-
-## Progressive Learning Path
-
-The skill teaches testing in three levels:
-
-### Level 1: Fundamentals (Start Here)
-
-- Basic test structure (YAML anatomy)
-- Single-action tests
-- Simple verification
-- Smoke tests
-
-**Examples**:
-
-- `examples/cli/calculator-basic.yaml`
-- `examples/tui/file-manager-navigation.yaml`
-- `examples/web/dashboard-smoke-test.yaml`
-- `examples/electron/single-window-basic.yaml`
-
-### Level 2: Intermediate
-
-- Multi-step workflows
-- Conditional logic
-- Error handling
-- Variables and templating
-
-**Examples**:
-
-- `examples/cli/cli-error-handling.yaml`
-- `examples/tui/tui-form-validation.yaml`
-- `examples/web/web-authentication-flow.yaml`
-- `examples/electron/multi-window-coordination.yaml`
-
-### Level 3: Advanced
-
-- Custom comprehension agents
-- Visual regression testing
-- Performance validation
-- IPC testing (Electron)
-
-**Examples**:
-
-- `examples/tui/tui-performance-monitoring.yaml`
-- `examples/electron/electron-ipc-testing.yaml`
-- `examples/custom-agents/custom-comprehension-agent.yaml`
-- `examples/custom-agents/custom-reporter-integration.yaml`
-
-## Example Library
-
-This skill includes **15 complete working examples**:
-
-### CLI (3 examples)
-
-- Basic calculator operations [Level 1]
-- Error handling and recovery [Level 2]
-- Interactive session management [Level 2]
-
-### TUI (3 examples)
-
-- File manager navigation [Level 1]
-- Form validation [Level 2]
-- Performance monitoring [Level 3]
-
-### Web (3 examples)
-
-- Dashboard smoke test [Level 1]
-- Authentication flow [Level 2]
-- Visual regression [Level 2]
-
-### Electron (4 examples)
-
-- Single window basics [Level 1]
-- Multi-window coordination [Level 2]
-- Menu interactions [Level 2]
-- IPC testing [Level 3]
-
-### Custom Agents (2 examples)
-
-- Domain-specific comprehension [Level 3]
-- Custom reporting [Level 3]
-
-All examples include:
-
-- Complete working YAML
-- Inline documentation
-- Expected output
-- Prerequisites
-- Level indicators
-
-## Using This Skill in Claude
-
-### Invoke the Skill
-
-```
-Claude, use the outside-in-testing skill to create a CLI test for my calculator app.
-
-Claude, use outside-in-testing to generate web tests for user login.
-
-Claude, create Electron tests using outside-in-testing for my desktop app.
-```
-
-### What You'll Receive
-
-1. **Complete YAML test scenario** matching your requirements
-2. **Inline comments** explaining each section
-3. **Best practices** applied (timeouts, waits, verification)
-4. **Appropriate complexity** (Level 1, 2, or 3 based on needs)
-5. **Instructions** for running the test
-
-### Example Interaction
-
-**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard"
-
-**Claude** (using this skill): Generates a complete Level 2 YAML scenario with:
-
-- Navigation to login page
-- Form filling (email, password)
-- Submit button click
-- URL verification (redirected to dashboard)
-- Element verification (user profile visible)
-- Screenshot capture
-- Proper timeouts and waits
-
-## Integration with Amplihack Philosophy
-
-This skill embodies amplihack's core principles:
-
-### Ruthless Simplicity
-
-- Declarative YAML over complex code
-- Minimal boilerplate
-- Focus on behavior, not implementation
-
-### Modular Design (Bricks & Studs)
-
-- Self-contained test scenarios
-- Clear action contracts
-- Composable test steps
-
-### Zero-BS Implementation
-
-- No stubs or placeholders
-- Every example is runnable
-- Clear, actionable error messages
-
-### Outside-In Thinking
-
-- User perspective first
-- Implementation-agnostic tests
-- Behavior-driven validation
-
-## Best Practices
-
-### 1. Start Simple
-
-Begin with basic smoke tests, then add complexity:
-
-```yaml
-# Level 1: Smoke test
-steps:
-  - action: launch
-    target: "./app"
-  - action: verify_output
-    contains: "Ready"
-
-# Level 2: Add interaction
-steps:
-  - action: launch
-    target: "./app"
-  - action: send_input
-    value: "command\n"
-  - action: verify_output
-    contains: "Success"
-```
-
-### 2. Use Descriptive Names
-
-```yaml
-# Good
-scenario:
-  name: "User Login - Valid Credentials"
-  description: "Verifies successful login with email and password"
-
-# Bad
-scenario:
-  name: "Test 1"
-```
-
-### 3. Verify Critical Paths Only
-
-Don't test every detail. Focus on user-facing behavior:
-
-```yaml
-# Good - User-visible behavior
-- action: verify_element
-  selector: ".welcome-message"
-  contains: "Welcome back"
-
-# Bad - Implementation detail
-- action: verify_element
-  selector: ".cache-status"
-  contains: "initialized"
-```
-
-### 4. Always Wait for Dynamic Content
-
-```yaml
-# Good - Wait before verification
-- action: click
-  selector: ".load-data"
-- action: wait_for_element
-  selector: ".data-table"
-  timeout: 10s
-- action: verify_element
-  selector: ".data-table"
-
-# Bad - May fail due to timing
-- action: click
-  selector: ".load-data"
-- action: verify_element
-  selector: ".data-table" # Might not exist yet!
-```
-
-### 5. Clean Up After Tests
-
-```yaml
-steps:
-  # Test steps...
-
-cleanup:
-  - action: delete_file
-    path: "./test-data.json"
-  - action: stop_application
-```
-
-## Troubleshooting
-
-### Installation Issues
-
-**Problem**: `@types/node-pty` not found error
-
-**Solution**: This was fixed in gadugi-agentic-test. If you see this:
-
-```bash
-# Update to latest version
-npm install -g github:rysweet/gadugi-agentic-test
-
-# Or if you cloned, pull latest:
-git pull origin main
-npm install
-npm run build
-```
-
-**Problem**: `tsc: command not found` when building
-
-**Solution**: TypeScript not installed
-
-```bash
-npm install  # Installs all dependencies including TypeScript
-npm run build  # Now will work
-```
-
-### Test Times Out
-
-**Problem**: Test exceeds timeout and fails
-
-**Solution**: Increase timeout for slow operations
-
-```yaml
-- action: wait_for_element
-  selector: ".slow-loading-data"
-  timeout: 30s # Generous timeout
-```
-
-### Scenario Format Issues
-
-**Problem**: "Scenario must have a name" error
-
-**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`:
-
-```yaml
-# WRONG (won't load)
-scenario:
-  name: "My Test"
-  steps: [...]
-
-# RIGHT
-name: "My Test"
-description: "What this tests"
-version: "1.0.0"
-config:
-  timeout: 120000
-steps: [...]
-```
-
-### Element Not Found
-
-**Problem**: Cannot find element to interact with
-
-**Solutions**:
-
-1. Use `wait_for_element` before interaction
-2. Verify selector is correct
-3. Check if element is in iframe
-
-```yaml
-- action: wait_for_element
-  selector: ".target"
-  timeout: 10s
-- action: click
-  selector: ".target"
-```
-
-### Flaky Tests in CI
-
-**Problem**: Tests pass locally but fail in CI
-
-**Solutions**:
-
-1. Add longer timeouts for CI environments
-2. Set explicit viewport sizes
-3. Wait for application readiness
-
-```yaml
-scenario:
-  environment:
-    viewport:
-      width: 1920
-      height: 1080
-
-  steps:
-    - action: wait_for_element
-      selector: ".app-ready"
-      timeout: 30s # Generous for CI
-```
-
-## Framework Version Check
-
-This skill embeds gadugi-agentic-test version **0.1.0**.
-
-To check for newer versions:
-
-```bash
-python scripts/check-freshness.py
-```
-
-The script compares the embedded version against the latest GitHub release and notifies you of new features.
-
-## Related Skills
-
-- **test-gap-analyzer**: Find untested code paths (unit test focus)
-- **philosophy-guardian**: Review test philosophy compliance
-- **pr-review-assistant**: Include tests in PR reviews
-- **module-spec-generator**: Generate specs with test scenarios
-
-## Resources
-
-### Documentation
-
-- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation
-- **Examples**: `examples/` - 15 complete working examples
-- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test
-
-### Getting Help
-
-- Review examples in `examples/` directory
-- Check `SKILL.md` for detailed explanations
-- See troubleshooting section in `SKILL.md`
-- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues
-
-## Quick Reference
-
-### Basic Test Template
-
-```yaml
-scenario:
-  name: "Test Name"
-  description: "What this verifies"
-  type: cli | tui | web | electron
-
-  prerequisites:
-    - "Condition 1"
-
-  steps:
-    - action: launch
-      target: "./app"
-
-    - action: verify_output
-      contains: "Expected"
-
-  cleanup:
-    - action: stop_application
-```
-
-### Common Actions
-
-**CLI**:
-
-- `launch` - Start application
-- `send_input` - Send text
-- `verify_output` - Check output
-- `verify_exit_code` - Validate exit code
-
-**TUI**:
-
-- `send_keypress` - Send keys
-- `verify_screen` - Check screen
-- `capture_screenshot` - Save screenshot
-
-**Web**:
-
-- `navigate` - Go to URL
-- `click` - Click element
-- `type` - Type text
-- `verify_element` - Check element
-
-**Electron**:
-
-- `window_action` - Control windows
-- `menu_click` - Click menus
-- `dialog_action` - Handle dialogs
-- All web actions
-
-## Success Stories
-
-Outside-in testing shines when:
-
-1. **Refactoring**: Change implementation without updating tests
-2. **Collaboration**: Non-developers can read and understand tests
-3. **Documentation**: Tests serve as executable specifications
-4. **Regression Prevention**: Catch breaking changes in critical flows
-5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron
-
-Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen.
-
----
-
-**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation.
-
-## Real-World Example: Testing amplihack Guide Agent
-
-Based on actual testing of amplihack's guide agent, here's a complete working example:
-
-### Scenario: Naive Student Learning Flow
-
-```yaml
-name: "Guide Agent - Beginner First Question"
-description: "Test how guide responds to complete beginner"
-version: "1.0.0"
-
-config:
-  timeout: 180000 # 3 minutes for AI response
-  retries: 1
-  parallel: false
-
-agents:
-  - name: "student-cli"
-    type: "system"
-    config:
-      shell: "bash"
-      cwd: "/tmp/test-student"
-      timeout: 180000
-      capture_output: true
-
-steps:
-  - name: "Student asks: What is amplihack?"
-    agent: "student-cli"
-    action: "execute_command"
-    params:
-      command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100'
-    expect:
-      exit_code: 0
-      stdout_contains:
-        - "amplihack"
-        - "AI"
-    timeout: 180000
-
-  - name: "Verify guide gives immediate action"
-    agent: "student-cli"
-    action: "execute_command"
-    params:
-      command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md"
-    expect:
-      exit_code: 0
-    timeout: 5000
-
-metadata:
-  tags: ["guide-agent", "beginner", "real-world"]
-  priority: "high"
-```
-
-### What This Tests
-
-1. **Installation via uvx** - Tests users can run without installing
-2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works
-3. **Beginner-friendly response** - Checks for immediate actionable command
-4. **Interactive elements** - Looks for TRY IT prompts
-
-### Running This Test
-
-```bash
-cd gadugi-agentic-test
-node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose
-```
-
-### What We Learned
-
-**From testing amplihack guide agent**:
-
-- Long-running AI commands need 180s+ timeouts
-- Testing in clean `/tmp` directory avoids state pollution
-- Combining `uvx --from git+...` with gadugi tests unreleased branches
-- Checking file content (guide.md) verifies features beyond just output
-- Real-world tests exposed gaps (guide showing bash commands in REPL context)
diff --git a/amplifier-bundle/skills/outside-in-testing/README.md b/amplifier-bundle/skills/outside-in-testing/README.md
new file mode 120000
index 000000000..40402e77f
--- /dev/null
+++ b/amplifier-bundle/skills/outside-in-testing/README.md
@@ -0,0 +1 @@
+../qa-team/README.md
\ No newline at end of file
diff --git a/amplifier-bundle/skills/outside-in-testing/SKILL.md b/amplifier-bundle/skills/outside-in-testing/SKILL.md
index 91526566b..891b6fb02 100644
--- a/amplifier-bundle/skills/outside-in-testing/SKILL.md
+++ b/amplifier-bundle/skills/outside-in-testing/SKILL.md
@@ -1,2045 +1,16 @@
 ---
 name: outside-in-testing
 description: |
-  Generates agentic outside-in tests using gadugi-agentic-test framework for CLI, TUI, Web, and Electron apps.
-  Use when you need behavior-driven tests that verify external interfaces without internal implementation knowledge.
-  Creates YAML test scenarios that AI agents execute, observe, and validate against expected outcomes.
-  Supports progressive complexity from simple smoke tests to advanced multi-step workflows.
-version: 1.0.0
-embedded_framework_version: 0.1.0
-github_repo: https://github.com/rysweet/gadugi-agentic-test
-issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
+  Deprecated compatibility alias for qa-team.
+  Use when existing recipes or agents still invoke outside-in-testing by name.
+  Redirects future work to the qa-team skill for outside-in validation and parity loops.
+version: 1.1.0
 ---
 
-# Outside-In Testing Skill
+# outside-in-testing (Alias)
 
-## Purpose [LEVEL 1]
+`outside-in-testing` is now a compatibility alias for `qa-team`.
 
-This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate.
+Use `qa-team` for all new work. This alias remains so existing workflows, recipes, and skills that still invoke `outside-in-testing` continue to resolve cleanly while the rename propagates.
 
-**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details.
-
-## When to Use This Skill [LEVEL 1]
-
-### Perfect For
-
-- **Smoke Tests**: Quick validation that critical user flows work
-- **Behavior-Driven Testing**: Verify features from user perspective
-- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron
-- **Refactoring Safety**: Tests remain valid when implementation changes
-- **AI-Powered Testing**: Let agents handle complex interactions
-- **Documentation as Tests**: YAML scenarios double as executable specs
-
-### Use This Skill When
-
-- Starting a new project and defining expected behaviors
-- Refactoring code and need tests that won't break with internal changes
-- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps)
-- Writing acceptance criteria that can be automatically verified
-- Need tests that non-developers can read and understand
-- Want to catch regressions in critical user workflows
-- Testing complex multi-step interactions
-
-### Don't Use This Skill When
-
-- Need unit tests for internal functions (use test-gap-analyzer instead)
-- Testing performance or load characteristics
-- Need precise timing or concurrency control
-- Testing non-interactive batch processes
-- Implementation details matter more than behavior
-
-## Core Concepts [LEVEL 1]
-
-### Outside-In Testing Philosophy
-
-**Traditional Inside-Out Testing**:
-
-```python
-# Tightly coupled to implementation
-def test_calculator_add():
-    calc = Calculator()
-    result = calc.add(2, 3)
-    assert result == 5
-    assert calc.history == [(2, 3, 5)]  # Knows internal state
-```
-
-**Agentic Outside-In Testing**:
-
-```yaml
-# Implementation-agnostic behavior verification
-scenario:
-  name: "Calculator Addition"
-  steps:
-    - action: launch
-      target: "./calculator"
-    - action: send_input
-      value: "add 2 3"
-    - action: verify_output
-      contains: "Result: 5"
-```
-
-**Benefits**:
-
-- Tests survive refactoring (internal changes don't break tests)
-- Readable by non-developers (YAML is declarative)
-- Platform-agnostic (same structure for CLI/TUI/Web/Electron)
-- AI agents handle complexity (navigation, timing, screenshots)
-
-### The Gadugi Agentic Test Framework [LEVEL 2]
-
-Gadugi-agentic-test is a Python framework that:
-
-1. **Parses YAML test scenarios** with declarative steps
-2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents)
-3. **Executes actions** (launch, input, click, wait, verify)
-4. **Collects evidence** (screenshots, logs, output captures)
-5. **Validates outcomes** against expected results
-6. **Generates reports** with evidence trails
-
-**Architecture**:
-
-```
-YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine
-                                          ↓
-                     [CLI Agent, TUI Agent, Web Agent, Electron Agent]
-                                          ↓
-                           Observers → Comprehension Agent
-                                          ↓
-                                   Evidence Report
-```
-
-### Progressive Disclosure Levels [LEVEL 1]
-
-This skill teaches testing in three levels:
-
-- **Level 1: Fundamentals** - Basic single-action tests, simple verification
-- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling
-- **Level 3: Advanced** - Custom agents, visual regression, performance validation
-
-Each example is marked with its level. Start at Level 1 and progress as needed.
-
-## Quick Start [LEVEL 1]
-
-### Installation
-
-**Prerequisites (for native module compilation):**
-
-```bash
-# macOS
-xcode-select --install
-
-# Ubuntu/Debian
-sudo apt-get install -y build-essential python3
-
-# Windows: Install Visual Studio Build Tools with "Desktop development with C++"
-```
-
-**Install the framework:**
-
-```bash
-# Install globally for CLI access
-npm install -g @gadugi/agentic-test
-
-# Or install locally in your project
-npm install @gadugi/agentic-test
-
-# Verify installation
-gadugi-test --version
-```
-
-### Your First Test (CLI Example)
-
-Create `test-hello.yaml`:
-
-```yaml
-scenario:
-  name: "Hello World CLI Test"
-  description: "Verify CLI prints greeting"
-  type: cli
-
-  prerequisites:
-    - "./hello-world executable exists"
-
-  steps:
-    - action: launch
-      target: "./hello-world"
-
-    - action: verify_output
-      contains: "Hello, World!"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-Run the test:
-
-```bash
-gadugi-test run test-hello.yaml
-```
-
-Output:
-
-```
-✓ Scenario: Hello World CLI Test
-  ✓ Step 1: Launched ./hello-world
-  ✓ Step 2: Output contains "Hello, World!"
-  ✓ Step 3: Exit code is 0
-
-PASSED (3/3 steps successful)
-Evidence saved to: ./evidence/test-hello-20250116-093045/
-```
-
-### Understanding the YAML Structure [LEVEL 1]
-
-Every test scenario has this structure:
-
-```yaml
-scenario:
-  name: "Descriptive test name"
-  description: "What this test verifies"
-  type: cli | tui | web | electron
-
-  # Optional metadata
-  tags: [smoke, critical, auth]
-  timeout: 30s
-
-  # What must be true before test runs
-  prerequisites:
-    - "Condition 1"
-    - "Condition 2"
-
-  # The test steps (executed sequentially)
-  steps:
-    - action: action_name
-      parameter1: value1
-      parameter2: value2
-
-    - action: verify_something
-      expected: value
-
-  # Optional cleanup
-  cleanup:
-    - action: stop_application
-```
-
-## Application Types and Agents [LEVEL 2]
-
-### CLI Applications [LEVEL 1]
-
-**Use Case**: Command-line tools, scripts, build tools, package managers
-
-**Supported Actions**:
-
-- `launch` - Start the CLI program
-- `send_input` - Send text or commands via stdin
-- `send_signal` - Send OS signals (SIGINT, SIGTERM)
-- `wait_for_output` - Wait for specific text in stdout/stderr
-- `verify_output` - Check stdout/stderr contains/matches expected text
-- `verify_exit_code` - Validate process exit code
-- `capture_output` - Save output for later verification
-
-**Example** (see `examples/cli/calculator-basic.yaml`):
-
-```yaml
-scenario:
-  name: "CLI Calculator Basic Operations"
-  type: cli
-
-  steps:
-    - action: launch
-      target: "./calculator"
-      args: ["--mode", "interactive"]
-
-    - action: send_input
-      value: "add 5 3\n"
-
-    - action: verify_output
-      contains: "Result: 8"
-      timeout: 2s
-
-    - action: send_input
-      value: "multiply 4 7\n"
-
-    - action: verify_output
-      contains: "Result: 28"
-
-    - action: send_input
-      value: "exit\n"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-### TUI Applications [LEVEL 1]
-
-**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs)
-
-**Supported Actions**:
-
-- `launch` - Start TUI application
-- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.)
-- `wait_for_screen` - Wait for specific text to appear on screen
-- `verify_screen` - Check screen contents match expectations
-- `capture_screenshot` - Save terminal screenshot (ANSI art)
-- `navigate_menu` - Navigate menu structures
-- `fill_form` - Fill TUI form fields
-
-**Example** (see `examples/tui/file-manager-navigation.yaml`):
-
-```yaml
-scenario:
-  name: "TUI File Manager Navigation"
-  type: tui
-
-  steps:
-    - action: launch
-      target: "./file-manager"
-
-    - action: wait_for_screen
-      contains: "File Manager v1.0"
-      timeout: 3s
-
-    - action: send_keypress
-      value: "down"
-      times: 3
-
-    - action: verify_screen
-      contains: "> documents/"
-      description: "Third item should be selected"
-
-    - action: send_keypress
-      value: "enter"
-
-    - action: wait_for_screen
-      contains: "documents/"
-      timeout: 2s
-
-    - action: capture_screenshot
-      save_as: "documents-view.txt"
-```
-
-### Web Applications [LEVEL 1]
-
-**Use Case**: Web apps, dashboards, SPAs, admin panels
-
-**Supported Actions**:
-
-- `navigate` - Go to URL
-- `click` - Click element by selector or text
-- `type` - Type into input fields
-- `wait_for_element` - Wait for element to appear
-- `verify_element` - Check element exists/contains text
-- `verify_url` - Validate current URL
-- `screenshot` - Capture browser screenshot
-- `scroll` - Scroll page or element
-
-**Example** (see `examples/web/dashboard-smoke-test.yaml`):
-
-```yaml
-scenario:
-  name: "Dashboard Smoke Test"
-  type: web
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000/dashboard"
-
-    - action: wait_for_element
-      selector: "h1.dashboard-title"
-      timeout: 5s
-
-    - action: verify_element
-      selector: "h1.dashboard-title"
-      contains: "Analytics Dashboard"
-
-    - action: verify_element
-      selector: ".widget-stats"
-      count: 4
-      description: "Should have 4 stat widgets"
-
-    - action: click
-      selector: "button.refresh-data"
-
-    - action: wait_for_element
-      selector: ".loading-spinner"
-      disappears: true
-      timeout: 10s
-
-    - action: screenshot
-      save_as: "dashboard-loaded.png"
-```
-
-### Electron Applications [LEVEL 2]
-
-**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones)
-
-**Supported Actions**:
-
-- `launch` - Start Electron app
-- `window_action` - Interact with windows (focus, minimize, close)
-- `menu_click` - Click application menu items
-- `dialog_action` - Handle native dialogs (open file, save, confirm)
-- `ipc_send` - Send IPC message to main process
-- `verify_window` - Check window state/properties
-- All web actions (since Electron uses Chromium)
-
-**Example** (see `examples/electron/single-window-basic.yaml`):
-
-```yaml
-scenario:
-  name: "Electron Single Window Test"
-  type: electron
-
-  steps:
-    - action: launch
-      target: "./dist/my-app"
-      wait_for_window: true
-      timeout: 10s
-
-    - action: verify_window
-      title: "My Application"
-      visible: true
-
-    - action: menu_click
-      path: ["File", "New Document"]
-
-    - action: wait_for_element
-      selector: ".document-editor"
-
-    - action: type
-      selector: ".document-editor"
-      value: "Hello from test"
-
-    - action: menu_click
-      path: ["File", "Save"]
-
-    - action: dialog_action
-      type: save_file
-      filename: "test-document.txt"
-
-    - action: verify_window
-      title_contains: "test-document.txt"
-```
-
-## Test Scenario Anatomy [LEVEL 2]
-
-### Metadata Section
-
-```yaml
-scenario:
-  name: "Clear descriptive name"
-  description: "Detailed explanation of what this test verifies"
-  type: cli | tui | web | electron
-
-  # Optional fields
-  tags: [smoke, regression, auth, payment]
-  priority: high | medium | low
-  timeout: 60s # Overall scenario timeout
-  retry_on_failure: 2 # Retry count
-
-  # Environment requirements
-  environment:
-    variables:
-      API_URL: "http://localhost:8080"
-      DEBUG: "true"
-    files:
-      - "./config.json must exist"
-```
-
-### Prerequisites
-
-Prerequisites are conditions that must be true before the test runs. The framework validates these before execution.
-
-```yaml
-prerequisites:
-  - "./application binary exists"
-  - "Port 8080 is available"
-  - "Database is running"
-  - "User account test@example.com exists"
-  - "File ./test-data.json exists"
-```
-
-If prerequisites fail, the test is skipped (not failed).
-
-### Steps
-
-Steps execute sequentially. Each step has:
-
-- **action**: Required - the action to perform
-- **Parameters**: Action-specific parameters
-- **description**: Optional - human-readable explanation
-- **timeout**: Optional - step-specific timeout
-- **continue_on_failure**: Optional - don't fail scenario if step fails
-
-```yaml
-steps:
-  # Simple action
-  - action: launch
-    target: "./app"
-
-  # Action with multiple parameters
-  - action: verify_output
-    contains: "Success"
-    timeout: 5s
-    description: "App should print success message"
-
-  # Continue even if this fails
-  - action: click
-    selector: ".optional-button"
-    continue_on_failure: true
-```
-
-### Verification Actions [LEVEL 1]
-
-Verification actions check expected outcomes. They fail the test if expectations aren't met.
-
-**Common Verifications**:
-
-```yaml
-# CLI: Check output contains text
-- action: verify_output
-  contains: "Expected text"
-
-# CLI: Check output matches regex
-- action: verify_output
-  matches: "Result: \\d+"
-
-# CLI: Check exit code
-- action: verify_exit_code
-  expected: 0
-
-# Web/TUI: Check element exists
-- action: verify_element
-  selector: ".success-message"
-
-# Web/TUI: Check element contains text
-- action: verify_element
-  selector: "h1"
-  contains: "Welcome"
-
-# Web: Check URL
-- action: verify_url
-  equals: "http://localhost:3000/dashboard"
-
-# Web: Check element count
-- action: verify_element
-  selector: ".list-item"
-  count: 5
-
-# Electron: Check window state
-- action: verify_window
-  title: "My App"
-  visible: true
-  focused: true
-```
-
-### Cleanup Section
-
-Cleanup runs after all steps complete (success or failure). Use for teardown actions.
-
-```yaml
-cleanup:
-  - action: stop_application
-    force: true
-
-  - action: delete_file
-    path: "./temp-test-data.json"
-
-  - action: reset_database
-    connection: "test_db"
-```
-
-## Advanced Patterns [LEVEL 2]
-
-### Conditional Logic
-
-Execute steps based on conditions:
-
-```yaml
-steps:
-  - action: launch
-    target: "./app"
-
-  - action: verify_output
-    contains: "Login required"
-    id: login_check
-
-  # Only run if login_check passed
-  - action: send_input
-    value: "login admin password123\n"
-    condition: login_check.passed
-```
-
-### Variables and Templating [LEVEL 2]
-
-Define variables and use them throughout the scenario:
-
-```yaml
-scenario:
-  name: "Test with Variables"
-  type: cli
-
-  variables:
-    username: "testuser"
-    api_url: "http://localhost:8080"
-
-  steps:
-    - action: launch
-      target: "./app"
-      args: ["--api", "${api_url}"]
-
-    - action: send_input
-      value: "login ${username}\n"
-
-    - action: verify_output
-      contains: "Welcome, ${username}!"
-```
-
-### Loops and Repetition [LEVEL 2]
-
-Repeat actions multiple times:
-
-```yaml
-steps:
-  - action: launch
-    target: "./app"
-
-  # Repeat action N times
-  - action: send_keypress
-    value: "down"
-    times: 5
-
-  # Loop over list
-  - action: send_input
-    value: "${item}\n"
-    for_each:
-      - "apple"
-      - "banana"
-      - "cherry"
-```
-
-### Error Handling [LEVEL 2]
-
-Handle expected errors gracefully:
-
-```yaml
-steps:
-  - action: send_input
-    value: "invalid command\n"
-
-  # Verify error message appears
-  - action: verify_output
-    contains: "Error: Unknown command"
-    expected_failure: true
-
-  # App should still be running
-  - action: verify_running
-    expected: true
-```
-
-### Multi-Step Workflows [LEVEL 2]
-
-Complex scenarios with multiple phases:
-
-```yaml
-scenario:
-  name: "E-commerce Purchase Flow"
-  type: web
-
-  steps:
-    # Phase 1: Authentication
-    - action: navigate
-      url: "http://localhost:3000/login"
-
-    - action: type
-      selector: "#username"
-      value: "test@example.com"
-
-    - action: type
-      selector: "#password"
-      value: "password123"
-
-    - action: click
-      selector: "button[type=submit]"
-
-    - action: wait_for_url
-      contains: "/dashboard"
-
-    # Phase 2: Product Selection
-    - action: navigate
-      url: "http://localhost:3000/products"
-
-    - action: click
-      text: "Add to Cart"
-      nth: 1
-
-    - action: verify_element
-      selector: ".cart-badge"
-      contains: "1"
-
-    # Phase 3: Checkout
-    - action: click
-      selector: ".cart-icon"
-
-    - action: click
-      text: "Proceed to Checkout"
-
-    - action: fill_form
-      fields:
-        "#shipping-address": "123 Test St"
-        "#city": "Testville"
-        "#zip": "12345"
-
-    - action: click
-      selector: "#place-order"
-
-    - action: wait_for_element
-      selector: ".order-confirmation"
-      timeout: 10s
-
-    - action: verify_element
-      selector: ".order-number"
-      exists: true
-```
-
-## Level 3: Advanced Topics [LEVEL 3]
-
-### Custom Comprehension Agents
-
-The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic.
-
-**Default Comprehension Agent**:
-
-- Observes raw output (text, HTML, screenshots)
-- Applies general reasoning to verify expectations
-- Returns pass/fail with explanation
-
-**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`):
-
-```yaml
-scenario:
-  name: "Financial Dashboard Test with Custom Agent"
-  type: web
-
-  # Define custom comprehension logic
-  comprehension_agent:
-    model: "gpt-4"
-    system_prompt: |
-      You are a financial data validator. When verifying dashboard content:
-      1. All monetary values must use proper formatting ($1,234.56)
-      2. Percentages must include % symbol
-      3. Dates must be in MM/DD/YYYY format
-      4. Negative values must be red
-      5. Chart data must be logically consistent
-
-      Be strict about formatting and data consistency.
-
-    examples:
-      - input: "Total Revenue: 45000"
-        output: "FAIL - Missing currency symbol and comma separator"
-      - input: "Total Revenue: $45,000.00"
-        output: "PASS - Correctly formatted"
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000/financial-dashboard"
-
-    - action: verify_element
-      selector: ".revenue-widget"
-      use_custom_comprehension: true
-      description: "Revenue should be properly formatted"
-```
-
-### Visual Regression Testing [LEVEL 3]
-
-Compare screenshots against baseline images:
-
-```yaml
-scenario:
-  name: "Visual Regression - Homepage"
-  type: web
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000"
-
-    - action: wait_for_element
-      selector: ".page-loaded"
-
-    - action: screenshot
-      save_as: "homepage.png"
-
-    - action: visual_compare
-      screenshot: "homepage.png"
-      baseline: "./baselines/homepage-baseline.png"
-      threshold: 0.05 # 5% difference allowed
-      highlight_differences: true
-```
-
-### Performance Validation [LEVEL 3]
-
-Measure and validate performance metrics:
-
-```yaml
-scenario:
-  name: "Performance - Dashboard Load Time"
-  type: web
-
-  performance:
-    metrics:
-      - page_load_time
-      - first_contentful_paint
-      - time_to_interactive
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000/dashboard"
-      measure_timing: true
-
-    - action: verify_performance
-      metric: page_load_time
-      less_than: 3000 # 3 seconds
-
-    - action: verify_performance
-      metric: first_contentful_paint
-      less_than: 1500 # 1.5 seconds
-```
-
-### Multi-Window Coordination (Electron) [LEVEL 3]
-
-Test applications with multiple windows:
-
-```yaml
-scenario:
-  name: "Multi-Window Chat Application"
-  type: electron
-
-  steps:
-    - action: launch
-      target: "./chat-app"
-
-    - action: menu_click
-      path: ["Window", "New Chat"]
-
-    - action: verify_window
-      count: 2
-
-    - action: window_action
-      window: 1
-      action: focus
-
-    - action: type
-      selector: ".message-input"
-      value: "Hello from window 1"
-
-    - action: click
-      selector: ".send-button"
-
-    - action: window_action
-      window: 2
-      action: focus
-
-    - action: wait_for_element
-      selector: ".message"
-      contains: "Hello from window 1"
-      timeout: 5s
-```
-
-### IPC Testing (Electron) [LEVEL 3]
-
-Test Inter-Process Communication between renderer and main:
-
-```yaml
-scenario:
-  name: "Electron IPC Communication"
-  type: electron
-
-  steps:
-    - action: launch
-      target: "./my-app"
-
-    - action: ipc_send
-      channel: "get-system-info"
-
-    - action: ipc_expect
-      channel: "system-info-reply"
-      timeout: 3s
-
-    - action: verify_ipc_payload
-      contains:
-        platform: "darwin"
-        arch: "x64"
-```
-
-### Custom Reporters [LEVEL 3]
-
-Generate custom test reports:
-
-```yaml
-scenario:
-  name: "Test with Custom Reporting"
-  type: cli
-
-  reporting:
-    format: custom
-    template: "./report-template.html"
-    include:
-      - screenshots
-      - logs
-      - timing_data
-      - video_recording
-
-    email:
-      enabled: true
-      recipients: ["team@example.com"]
-      on_failure_only: true
-
-  steps:
-    # ... test steps ...
-```
-
-## Framework Integration [LEVEL 2]
-
-### Running Tests
-
-**Single test**:
-
-```bash
-gadugi-test run test-scenario.yaml
-```
-
-**Multiple tests**:
-
-```bash
-gadugi-test run tests/*.yaml
-```
-
-**With options**:
-
-```bash
-gadugi-test run test.yaml \
-  --verbose \
-  --evidence-dir ./test-evidence \
-  --retry 2 \
-  --timeout 60s
-```
-
-### CI/CD Integration
-
-**GitHub Actions** (`.github/workflows/agentic-tests.yml`):
-
-```yaml
-name: Agentic Tests
-
-on: [push, pull_request]
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Install gadugi-agentic-test
-        run: npm install -g @gadugi/agentic-test
-
-      - name: Run tests
-        run: gadugi-test run tests/agentic/*.yaml
-
-      - name: Upload evidence
-        if: always()
-        uses: actions/upload-artifact@v3
-        with:
-          name: test-evidence
-          path: ./evidence/
-```
-
-### Evidence Collection
-
-The framework automatically collects evidence for debugging:
-
-```
-evidence/
-  scenario-name-20250116-093045/
-    ├── scenario.yaml          # Original test scenario
-    ├── execution-log.json     # Detailed execution log
-    ├── screenshots/           # All captured screenshots
-    │   ├── step-1.png
-    │   ├── step-3.png
-    │   └── step-5.png
-    ├── output-captures/       # CLI/TUI output
-    │   ├── stdout.txt
-    │   └── stderr.txt
-    ├── timing.json            # Performance metrics
-    └── report.html            # Human-readable report
-```
-
-## Best Practices [LEVEL 2]
-
-### 1. Start Simple, Add Complexity
-
-Begin with basic smoke tests, then add detail:
-
-```yaml
-# Level 1: Basic smoke test
-steps:
-  - action: launch
-    target: "./app"
-  - action: verify_output
-    contains: "Ready"
-
-# Level 2: Add interaction
-steps:
-  - action: launch
-    target: "./app"
-  - action: send_input
-    value: "command\n"
-  - action: verify_output
-    contains: "Success"
-
-# Level 3: Add error handling and edge cases
-steps:
-  - action: launch
-    target: "./app"
-  - action: send_input
-    value: "invalid\n"
-  - action: verify_output
-    contains: "Error"
-  - action: send_input
-    value: "command\n"
-  - action: verify_output
-    contains: "Success"
-```
-
-### 2. Use Descriptive Names and Descriptions
-
-```yaml
-# Bad
-scenario:
-  name: "Test 1"
-  steps:
-    - action: click
-      selector: "button"
-
-# Good
-scenario:
-  name: "User Login Flow - Valid Credentials"
-  description: "Verifies user can log in with valid email and password"
-  steps:
-    - action: click
-      selector: "button[type=submit]"
-      description: "Submit login form"
-```
-
-### 3. Verify Critical Paths Only
-
-Don't test every tiny detail. Focus on user-facing behavior:
-
-```yaml
-# Bad - Tests implementation details
-- action: verify_element
-  selector: ".internal-cache-status"
-  contains: "initialized"
-
-# Good - Tests user-visible behavior
-- action: verify_element
-  selector: ".welcome-message"
-  contains: "Welcome back"
-```
-
-### 4. Use Prerequisites for Test Dependencies
-
-```yaml
-scenario:
-  name: "User Profile Edit"
-
-  prerequisites:
-    - "User testuser@example.com exists"
-    - "User is logged in"
-    - "Database is seeded with test data"
-
-  steps:
-    # Test assumes prerequisites are met
-    - action: navigate
-      url: "/profile"
-```
-
-### 5. Keep Tests Independent
-
-Each test should set up its own state and clean up:
-
-```yaml
-scenario:
-  name: "Create Document"
-
-  steps:
-    # Create test user (don't assume exists)
-    - action: api_call
-      endpoint: "/api/users"
-      method: POST
-      data: { email: "test@example.com" }
-
-    # Run test
-    - action: navigate
-      url: "/documents/new"
-    # ... test steps ...
-
-  cleanup:
-    # Remove test user
-    - action: api_call
-      endpoint: "/api/users/test@example.com"
-      method: DELETE
-```
-
-### 6. Use Tags for Organization
-
-```yaml
-scenario:
-  name: "Critical Payment Flow"
-  tags: [smoke, critical, payment, e2e]
-  # Run with: gadugi-test run --tags critical
-```
-
-### 7. Add Timeouts Strategically
-
-```yaml
-steps:
-  # Quick operations - short timeout
-  - action: click
-    selector: "button"
-    timeout: 2s
-
-  # Network operations - longer timeout
-  - action: wait_for_element
-    selector: ".data-loaded"
-    timeout: 10s
-
-  # Complex operations - generous timeout
-  - action: verify_element
-    selector: ".report-generated"
-    timeout: 60s
-```
-
-## Testing Strategies [LEVEL 2]
-
-### Smoke Tests
-
-Minimal tests that verify critical functionality works:
-
-```yaml
-scenario:
-  name: "Smoke Test - Application Starts"
-  tags: [smoke]
-
-  steps:
-    - action: launch
-      target: "./app"
-    - action: verify_output
-      contains: "Ready"
-      timeout: 5s
-```
-
-Run before every commit: `gadugi-test run --tags smoke`
-
-### Happy Path Tests
-
-Test the ideal user journey:
-
-```yaml
-scenario:
-  name: "Happy Path - User Registration"
-
-  steps:
-    - action: navigate
-      url: "/register"
-    - action: type
-      selector: "#email"
-      value: "newuser@example.com"
-    - action: type
-      selector: "#password"
-      value: "SecurePass123!"
-    - action: click
-      selector: "button[type=submit]"
-    - action: wait_for_url
-      contains: "/welcome"
-```
-
-### Error Path Tests
-
-Verify error handling:
-
-```yaml
-scenario:
-  name: "Error Path - Invalid Login"
-
-  steps:
-    - action: navigate
-      url: "/login"
-    - action: type
-      selector: "#email"
-      value: "invalid@example.com"
-    - action: type
-      selector: "#password"
-      value: "wrongpassword"
-    - action: click
-      selector: "button[type=submit]"
-    - action: verify_element
-      selector: ".error-message"
-      contains: "Invalid credentials"
-```
-
-### Regression Tests
-
-Prevent bugs from reappearing:
-
-```yaml
-scenario:
-  name: "Regression - Issue #123 Password Reset"
-  tags: [regression, bug-123]
-  description: "Verifies password reset email is sent (was broken in v1.2)"
-
-  steps:
-    - action: navigate
-      url: "/forgot-password"
-    - action: type
-      selector: "#email"
-      value: "user@example.com"
-    - action: click
-      selector: "button[type=submit]"
-    - action: verify_element
-      selector: ".success-message"
-      contains: "Reset email sent"
-```
-
-## Philosophy Alignment [LEVEL 2]
-
-This skill follows amplihack's core principles:
-
-### Ruthless Simplicity
-
-- **YAML over code**: Declarative tests are simpler than programmatic tests
-- **No implementation details**: Tests describe WHAT, not HOW
-- **Minimal boilerplate**: Each test is focused and concise
-
-### Modular Design (Bricks & Studs)
-
-- **Self-contained scenarios**: Each YAML file is independent
-- **Clear contracts**: Steps have well-defined inputs/outputs
-- **Composable actions**: Reuse actions across different test types
-
-### Zero-BS Implementation
-
-- **No stubs**: Every example in this skill is a complete, runnable test
-- **Working defaults**: Tests run with minimal configuration
-- **Clear errors**: Framework provides actionable error messages
-
-### Outside-In Thinking
-
-- **User perspective**: Tests verify behavior users care about
-- **Implementation agnostic**: Refactoring doesn't break tests
-- **Behavior-driven**: Focus on outcomes, not internals
-
-## Common Pitfalls and Solutions [LEVEL 2]
-
-### Pitfall 1: Over-Specifying
-
-**Problem**: Test breaks when UI changes slightly
-
-```yaml
-# Bad - Too specific
-- action: verify_element
-  selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold"
-  contains: "Welcome"
-```
-
-**Solution**: Use flexible selectors
-
-```yaml
-# Good - Focused on behavior
-- action: verify_element
-  selector: ".welcome-message"
-  contains: "Welcome"
-```
-
-### Pitfall 2: Missing Waits
-
-**Problem**: Test fails intermittently due to timing
-
-```yaml
-# Bad - No wait for async operation
-- action: click
-  selector: ".load-data-button"
-- action: verify_element
-  selector: ".data-table" # May not exist yet!
-```
-
-**Solution**: Always wait for dynamic content
-
-```yaml
-# Good - Wait for element to appear
-- action: click
-  selector: ".load-data-button"
-- action: wait_for_element
-  selector: ".data-table"
-  timeout: 10s
-- action: verify_element
-  selector: ".data-table"
-```
-
-### Pitfall 3: Testing Implementation Details
-
-**Problem**: Test coupled to internal state
-
-```yaml
-# Bad - Tests internal cache state
-- action: verify_output
-  contains: "Cache hit ratio: 85%"
-```
-
-**Solution**: Test user-visible behavior
-
-```yaml
-# Good - Tests response time
-- action: verify_response_time
-  less_than: 100ms
-  description: "Fast response indicates caching works"
-```
-
-### Pitfall 4: Flaky Assertions
-
-**Problem**: Assertions depend on exact timing or formatting
-
-```yaml
-# Bad - Exact timestamp match will fail
-- action: verify_output
-  contains: "Created at: 2025-11-16 09:30:45"
-```
-
-**Solution**: Use flexible patterns
-
-```yaml
-# Good - Match pattern, not exact value
-- action: verify_output
-  matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}"
-```
-
-### Pitfall 5: Not Cleaning Up
-
-**Problem**: Tests leave artifacts that affect future runs
-
-```yaml
-# Bad - No cleanup
-steps:
-  - action: create_file
-    path: "./test-data.json"
-  - action: launch
-    target: "./app"
-```
-
-**Solution**: Always use cleanup section
-
-```yaml
-# Good - Cleanup ensures clean slate
-steps:
-  - action: create_file
-    path: "./test-data.json"
-  - action: launch
-    target: "./app"
-
-cleanup:
-  - action: delete_file
-    path: "./test-data.json"
-```
-
-## Example Library [LEVEL 1]
-
-This skill includes 15 complete working examples organized by application type and complexity level:
-
-### CLI Examples
-
-1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations
-2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery
-3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI
-
-### TUI Examples
-
-4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation
-5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation
-6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing
-
-### Web Examples
-
-7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification
-8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow
-9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing
-
-### Electron Examples
-
-10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test
-11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration
-12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions
-13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing
-
-### Custom Agent Examples
-
-14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic
-15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting
-
-See `examples/` directory for full example code with inline documentation.
-
-## Framework Freshness Check [LEVEL 3]
-
-This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists:
-
-```bash
-# Run the freshness check script
-python scripts/check-freshness.py
-
-# Output if outdated:
-# WARNING: Embedded framework version is 0.1.0
-# Latest GitHub version is 0.2.5
-#
-# New features in 0.2.5:
-# - Native Playwright support for web testing
-# - Video recording for all test types
-# - Parallel test execution
-#
-# Update with: npm update -g @gadugi/agentic-test
-```
-
-The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements.
-
-**When to Update This Skill**:
-
-- New framework version adds significant features
-- Breaking changes in YAML schema
-- New application types supported
-- Agent capabilities expand
-
-## Integration with Other Skills [LEVEL 2]
-
-### Works Well With
-
-**test-gap-analyzer**:
-
-- Use test-gap-analyzer to find untested functions
-- Write outside-in tests for critical user-facing paths
-- Use unit tests (from test-gap-analyzer) for internal functions
-
-**philosophy-guardian**:
-
-- Ensure test YAML follows ruthless simplicity
-- Verify tests focus on behavior, not implementation
-
-**pr-review-assistant**:
-
-- Include outside-in tests in PR reviews
-- Verify tests cover changed functionality
-- Check test readability and clarity
-
-**module-spec-generator**:
-
-- Generate module specs that include outside-in test scenarios
-- Use specs as templates for test YAML
-
-### Example Combined Workflow
-
-```bash
-# 1. Analyze coverage gaps
-claude "Use test-gap-analyzer on ./src"
-
-# 2. Write outside-in tests for critical paths
-claude "Use outside-in-testing to create web tests for authentication"
-
-# 3. Verify philosophy compliance
-claude "Use philosophy-guardian to review new test files"
-
-# 4. Include in PR
-git add tests/agentic/
-git commit -m "Add outside-in tests for auth flow"
-```
-
-## Troubleshooting [LEVEL 2]
-
-### Test Times Out
-
-**Symptom**: Test exceeds timeout and fails
-
-**Causes**:
-
-- Application takes longer to start than expected
-- Network requests are slow
-- Element never appears (incorrect selector)
-
-**Solutions**:
-
-```yaml
-# Increase timeout
-- action: wait_for_element
-  selector: ".slow-loading-element"
-  timeout: 30s # Increase from default
-
-# Add intermediate verification
-- action: launch
-  target: "./app"
-- action: wait_for_output
-  contains: "Initializing..."
-  timeout: 5s
-- action: wait_for_output
-  contains: "Ready"
-  timeout: 20s
-```
-
-### Element Not Found
-
-**Symptom**: `verify_element` or `click` fails with "element not found"
-
-**Causes**:
-
-- Incorrect CSS selector
-- Element not yet rendered (timing issue)
-- Element in iframe or shadow DOM
-
-**Solutions**:
-
-```yaml
-# Add wait before interaction
-- action: wait_for_element
-  selector: ".target-element"
-  timeout: 10s
-- action: click
-  selector: ".target-element"
-
-# Use more specific selector
-- action: click
-  selector: "button[data-testid='submit-button']"
-
-# Handle iframe
-- action: switch_to_iframe
-  selector: "iframe#payment-frame"
-- action: click
-  selector: ".pay-now-button"
-```
-
-### Test Passes Locally, Fails in CI
-
-**Symptom**: Test works on dev machine but fails in CI environment
-
-**Causes**:
-
-- Different screen size (web/Electron)
-- Missing dependencies
-- Timing differences (slower CI machines)
-- Environment variable differences
-
-**Solutions**:
-
-```yaml
-# Set explicit viewport size (web/Electron)
-scenario:
-  environment:
-    viewport:
-      width: 1920
-      height: 1080
-
-# Add longer timeouts in CI
-- action: wait_for_element
-  selector: ".element"
-  timeout: 30s  # Generous for CI
-
-# Verify prerequisites
-prerequisites:
-  - "Chrome browser installed"
-  - "Environment variable API_KEY is set"
-```
-
-### Output Doesn't Match Expected
-
-**Symptom**: `verify_output` fails even though output looks correct
-
-**Causes**:
-
-- Extra whitespace or newlines
-- ANSI color codes in output
-- Case sensitivity
-
-**Solutions**:
-
-```yaml
-# Use flexible matching
-- action: verify_output
-  matches: "Result:\\s+Success" # Allow flexible whitespace
-
-# Strip ANSI codes
-- action: verify_output
-  contains: "Success"
-  strip_ansi: true
-
-# Case-insensitive match
-- action: verify_output
-  contains: "success"
-  case_sensitive: false
-```
-
-## Reference: Action Catalog [LEVEL 3]
-
-### CLI Actions
-
-| Action             | Parameters                       | Description                            |
-| ------------------ | -------------------------------- | -------------------------------------- |
-| `launch`           | `target`, `args`, `cwd`, `env`   | Start CLI application                  |
-| `send_input`       | `value`, `delay`                 | Send text to stdin                     |
-| `send_signal`      | `signal`                         | Send OS signal (SIGINT, SIGTERM, etc.) |
-| `wait_for_output`  | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr         |
-| `verify_output`    | `contains`, `matches`, `stream`  | Check output content                   |
-| `verify_exit_code` | `expected`                       | Validate exit code                     |
-| `capture_output`   | `save_as`, `stream`              | Save output to file                    |
-
-### TUI Actions
-
-| Action               | Parameters                        | Description              |
-| -------------------- | --------------------------------- | ------------------------ |
-| `launch`             | `target`, `args`, `terminal_size` | Start TUI application    |
-| `send_keypress`      | `value`, `times`, `modifiers`     | Send keyboard input      |
-| `wait_for_screen`    | `contains`, `timeout`             | Wait for text on screen  |
-| `verify_screen`      | `contains`, `matches`, `region`   | Check screen content     |
-| `capture_screenshot` | `save_as`                         | Save terminal screenshot |
-| `navigate_menu`      | `path`                            | Navigate menu structure  |
-| `fill_form`          | `fields`                          | Fill TUI form fields     |
-
-### Web Actions
-
-| Action             | Parameters                                | Description            |
-| ------------------ | ----------------------------------------- | ---------------------- |
-| `navigate`         | `url`, `wait_for_load`                    | Go to URL              |
-| `click`            | `selector`, `text`, `nth`                 | Click element          |
-| `type`             | `selector`, `value`, `delay`              | Type into input        |
-| `wait_for_element` | `selector`, `timeout`, `disappears`       | Wait for element       |
-| `verify_element`   | `selector`, `contains`, `count`, `exists` | Check element state    |
-| `verify_url`       | `equals`, `contains`, `matches`           | Validate URL           |
-| `screenshot`       | `save_as`, `selector`, `full_page`        | Capture screenshot     |
-| `scroll`           | `selector`, `direction`, `amount`         | Scroll page/element    |
-| `select_option`    | `selector`, `value`                       | Select dropdown option |
-| `checkbox`         | `selector`, `checked`                     | Check/uncheck checkbox |
-
-### Electron Actions
-
-| Action          | Parameters                             | Description                |
-| --------------- | -------------------------------------- | -------------------------- |
-| `launch`        | `target`, `args`, `wait_for_window`    | Start Electron app         |
-| `window_action` | `window`, `action`                     | Interact with windows      |
-| `menu_click`    | `path`                                 | Click menu items           |
-| `dialog_action` | `type`, `action`, `filename`           | Handle dialogs             |
-| `ipc_send`      | `channel`, `data`                      | Send IPC message           |
-| `ipc_expect`    | `channel`, `timeout`                   | Wait for IPC message       |
-| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state         |
-| All web actions |                                        | Electron includes Chromium |
-
-### Common Parameters
-
-| Parameter             | Type       | Description                          |
-| --------------------- | ---------- | ------------------------------------ |
-| `timeout`             | Duration   | Maximum wait time (e.g., "5s", "2m") |
-| `description`         | String     | Human-readable step explanation      |
-| `continue_on_failure` | Boolean    | Don't fail scenario if step fails    |
-| `id`                  | String     | Step identifier for conditionals     |
-| `condition`           | Expression | Execute step only if condition true  |
-
-## Quick Reference: YAML Template [LEVEL 1]
-
-```yaml
-scenario:
-  # Required fields
-  name: "Test Name"
-  description: "What this test verifies"
-  type: cli | tui | web | electron
-
-  # Optional metadata
-  tags: [smoke, critical]
-  timeout: 60s
-
-  # What must be true before running
-  prerequisites:
-    - "Condition 1"
-    - "Condition 2"
-
-  # Environment setup
-  environment:
-    variables:
-      VAR_NAME: "value"
-
-  # Variables for templating
-  variables:
-    username: "testuser"
-
-  # Test steps (executed in order)
-  steps:
-    - action: launch
-      target: "./app"
-
-    - action: send_input
-      value: "command\n"
-
-    - action: verify_output
-      contains: "Success"
-      timeout: 5s
-
-  # Cleanup (always runs)
-  cleanup:
-    - action: stop_application
-```
-
-## Skill Usage Examples [LEVEL 1]
-
-### Invoke This Skill
-
-```
-Claude, use the outside-in-testing skill to create a CLI test for my calculator app.
-
-Claude, use outside-in-testing to generate web tests for the login flow.
-
-Claude, create TUI tests using the outside-in-testing skill for file navigation.
-```
-
-### What You'll Get
-
-1. **Complete YAML test scenario** matching your requirements
-2. **Inline documentation** explaining each section
-3. **Best practices applied** (timeouts, waits, verification)
-4. **Appropriate complexity level** (1, 2, or 3 based on needs)
-5. **Instructions for running** the test
-
-### Example Request and Response
-
-**User**: "Create a web test for user login with valid credentials"
-
-**Claude** (using this skill):
-
-```yaml
-scenario:
-  name: "User Login - Valid Credentials"
-  description: "Verifies user can successfully log in with valid email and password"
-  type: web
-  tags: [smoke, auth, critical]
-
-  prerequisites:
-    - "User test@example.com exists with password 'TestPass123!'"
-    - "Application is running on http://localhost:3000"
-
-  steps:
-    # Navigate to login page
-    - action: navigate
-      url: "http://localhost:3000/login"
-      description: "Open login page"
-
-    # Wait for page to fully load
-    - action: wait_for_element
-      selector: "form.login-form"
-      timeout: 5s
-      description: "Wait for login form to render"
-
-    # Fill in email field
-    - action: type
-      selector: "input[name='email']"
-      value: "test@example.com"
-      description: "Enter email address"
-
-    # Fill in password field
-    - action: type
-      selector: "input[name='password']"
-      value: "TestPass123!"
-      description: "Enter password"
-
-    # Submit form
-    - action: click
-      selector: "button[type='submit']"
-      description: "Click login button"
-
-    # Wait for redirect to dashboard
-    - action: wait_for_url
-      contains: "/dashboard"
-      timeout: 10s
-      description: "Should redirect to dashboard after successful login"
-
-    # Verify user is logged in
-    - action: verify_element
-      selector: ".user-profile"
-      contains: "test@example.com"
-      description: "User email should appear in profile widget"
-
-    # Capture evidence
-    - action: screenshot
-      save_as: "successful-login.png"
-      description: "Save screenshot of logged-in state"
-# To run this test:
-# 1. Save as test-login-valid.yaml
-# 2. Run: gadugi-test run test-login-valid.yaml
-# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/
-```
-
-## Related Resources [LEVEL 1]
-
-### Official Documentation
-
-- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test
-- **Framework Docs**: See repo README and docs/ folder
-- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
-
-## Level 4: Shadow Environment Integration [LEVEL 4]
-
-Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments.
-
-### Why Use Shadow Environments for Testing
-
-1. **Clean State**: Fresh container, no host pollution
-2. **Local Changes**: Test uncommitted code exactly as-is
-3. **Multi-Repo**: Coordinate changes across multiple repos
-4. **CI Parity**: What shadow sees ≈ what CI will see
-
-### Shadow Testing Workflow
-
-For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests:
-
-#### Pattern 1: CLI Tests in Shadow (Amplifier)
-
-```python
-# Create shadow with your local library changes
-shadow.create(local_sources=["~/repos/my-lib:org/my-lib"])
-
-# Run outside-in test scenarios inside shadow
-shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml")
-
-# Extract evidence
-shadow.extract(shadow_id, "/evidence", "./test-evidence")
-
-# Cleanup
-shadow.destroy(shadow_id)
-```
-
-#### Pattern 2: CLI Tests in Shadow (Standalone)
-
-```bash
-# Create shadow with local changes
-amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test
-
-# Run your test scenarios
-amplifier-shadow exec test "gadugi-test run test-scenario.yaml"
-
-# Extract results
-amplifier-shadow extract test /evidence ./test-evidence
-
-# Cleanup
-amplifier-shadow destroy test
-```
-
-#### Pattern 3: Multi-Repo Integration Test
-
-```yaml
-# test-multi-repo.yaml
-scenario:
-  name: "Multi-Repo Integration Test"
-  type: cli
-
-  prerequisites:
-    - "Shadow environment with core-lib and cli-tool"
-
-  steps:
-    - action: launch
-      target: "cli-tool"
-
-    - action: send_input
-      value: "process --lib core-lib\n"
-
-    - action: verify_output
-      contains: "Success: Using core-lib"
-```
-
-```bash
-# Setup shadow with both repos
-amplifier-shadow create \
-  --local ~/repos/core-lib:org/core-lib \
-  --local ~/repos/cli-tool:org/cli-tool \
-  --name multi-test
-
-# Run test that exercises both
-amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml"
-```
-
-#### Pattern 4: Web App Testing in Shadow
-
-```yaml
-# test-web-app.yaml
-scenario:
-  name: "Web App with Local Library"
-  type: web
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000"
-
-    - action: click
-      selector: "button.process"
-
-    - action: verify_element
-      selector: ".result"
-      contains: "Processed with v2.0" # Your local version
-```
-
-```bash
-# Shadow with library changes
-amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test
-
-# Start web app inside shadow (uses your local lib)
-amplifier-shadow exec web-test "
-  cd /workspace &&
-  git clone https://github.com/org/web-app &&
-  cd web-app &&
-  npm install &&  # Pulls your local my-lib via git URL rewriting
-  npm start &
-"
-
-# Wait for app to start, then run tests
-amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml"
-```
-
-### Verification Best Practices
-
-When running tests in shadow, always verify your local sources are being used:
-
-```bash
-# After shadow.create, check snapshot commits
-shadow.status(shadow_id)
-# Shows: snapshot_commits: {"org/my-lib": "abc1234..."}
-
-# When your test installs dependencies, verify commit matches
-# Look in test output for: my-lib @ git+...@abc1234
-```
-
-### Complete Example: Library Change Validation
-
-```yaml
-# test-library-change.yaml - Outside-in test
-scenario:
-  name: "Validate Library Breaking Change"
-  type: cli
-  description: "Test that dependent app still works with new library API"
-
-  steps:
-    - action: launch
-      target: "/workspace/org/dependent-app/cli.py"
-
-    - action: send_input
-      value: "process data.json\n"
-
-    - action: verify_output
-      contains: "Processed successfully"
-      description: "New library API should still work"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-```bash
-# Complete workflow
-# 1. Create shadow with your breaking change
-amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test
-
-# 2. Install dependent app (pulls your local lib)
-amplifier-shadow exec breaking-test "
-  cd /workspace &&
-  git clone https://github.com/org/dependent-app &&
-  cd dependent-app &&
-  pip install -e . &&  # This installs git+https://github.com/org/my-lib (your local version)
-  echo 'Ready to test'
-"
-
-# 3. Run outside-in test
-amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml"
-
-# If test passes, your breaking change is compatible!
-# If test fails, you've caught the issue before pushing
-```
-
-### When to Use Shadow Integration
-
-Use shadow + outside-in tests when:
-
-- ✅ Testing library changes with dependent projects
-- ✅ Validating multi-repo coordinated changes
-- ✅ Need clean-state validation before pushing
-- ✅ Want to catch integration issues early
-- ✅ Testing that setup/install procedures work
-
-Don't use shadow for:
-
-- ❌ Simple unit tests (too much overhead)
-- ❌ Tests of already-committed code (shadow adds no value)
-- ❌ Performance testing (container overhead skews results)
-
-### Learn More
-
-For complete shadow environment documentation, including:
-
-- Shell scripts for DIY setup
-- Docker Compose examples
-- Multi-language support (Python, Node, Rust, Go)
-- Troubleshooting and verification techniques
-
-**Load the shadow-testing skill**:
-
-```
-Claude, use the shadow-testing skill to set up a shadow environment
-```
-
-Or for Amplifier users, the shadow tool is built-in:
-
-```python
-shadow.create(local_sources=["~/repos/lib:org/lib"])
-```
-
----
-
-### Related Skills
-
-- **shadow-testing**: Complete shadow environment setup and usage
-- **test-gap-analyzer**: Find untested code paths
-- **philosophy-guardian**: Review test philosophy compliance
-- **pr-review-assistant**: Include tests in PR reviews
-- **module-spec-generator**: Generate specs with test scenarios
-
-### Further Reading
-
-- Outside-in vs inside-out testing approaches
-- Behavior-driven development (BDD) principles
-- AI-powered testing best practices
-- Test automation patterns
-- Shadow environment testing methodology
-
-## Changelog [LEVEL 3]
-
-### Version 1.1.0 (2026-01-29)
-
-- **NEW**: Level 4 - Shadow Environment Integration
-- Added complete shadow testing workflow patterns
-- Integration examples for Amplifier native and standalone CLI
-- Multi-repo integration test patterns
-- Web app testing in shadow environments
-- Complete workflow example for library change validation
-- References to shadow-testing skill for deep-dive documentation
-
-### Version 1.0.0 (2025-11-16)
-
-- Initial skill release
-- Support for CLI, TUI, Web, and Electron applications
-- 15 complete working examples
-- Progressive disclosure levels (1, 2, 3)
-- Embedded gadugi-agentic-test framework documentation (v0.1.0)
-- Freshness check script for version monitoring
-- Full integration with amplihack philosophy
-- Comprehensive troubleshooting guide
-- Action reference catalog
-
----
-
-**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows.
-
-Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen.
+See `../qa-team/README.md` and `../qa-team/SKILL.md` for the primary documentation.
diff --git a/amplifier-bundle/skills/outside-in-testing/examples b/amplifier-bundle/skills/outside-in-testing/examples
new file mode 120000
index 000000000..68c765545
--- /dev/null
+++ b/amplifier-bundle/skills/outside-in-testing/examples
@@ -0,0 +1 @@
+../qa-team/examples
\ No newline at end of file
diff --git a/amplifier-bundle/skills/outside-in-testing/scripts b/amplifier-bundle/skills/outside-in-testing/scripts
new file mode 120000
index 000000000..ff9bde766
--- /dev/null
+++ b/amplifier-bundle/skills/outside-in-testing/scripts
@@ -0,0 +1 @@
+../qa-team/scripts
\ No newline at end of file
diff --git a/amplifier-bundle/skills/outside-in-testing/tests b/amplifier-bundle/skills/outside-in-testing/tests
new file mode 120000
index 000000000..371fb2568
--- /dev/null
+++ b/amplifier-bundle/skills/outside-in-testing/tests
@@ -0,0 +1 @@
+../qa-team/tests
\ No newline at end of file
diff --git a/amplifier-bundle/skills/qa-team/README.md b/amplifier-bundle/skills/qa-team/README.md
new file mode 100644
index 000000000..51e32504a
--- /dev/null
+++ b/amplifier-bundle/skills/qa-team/README.md
@@ -0,0 +1,794 @@
+# QA Team Skill
+
+## Overview
+
+QA Team is the renamed primary skill for outside-in validation. It helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation, and it now also covers side-by-side parity loops for legacy-vs-new or A-vs-B comparisons.
+
+**Key Benefits**:
+
+- Tests survive refactoring (implementation changes don't break tests)
+- Readable by non-developers (declarative YAML format)
+- Platform-agnostic (same structure for CLI, TUI, Web, Electron)
+- AI-powered execution (agents handle complex interactions)
+- Evidence-based validation (screenshots, logs, output captures)
+
+## What is Outside-In Testing?
+
+**Traditional Testing** (Inside-Out):
+
+```python
+# Knows internal implementation
+def test_user_service():
+    service = UserService()
+    user = service.create_user("test@example.com")
+    assert user.id is not None
+    assert user.email == "test@example.com"
+    assert user.created_at <= datetime.now()  # Internal state
+```
+
+**Outside-In Testing**:
+
+```yaml
+# Only knows external behavior
+scenario:
+  name: "User Registration"
+  type: web
+  steps:
+    - action: navigate
+      url: "/register"
+    - action: type
+      selector: "#email"
+      value: "test@example.com"
+    - action: click
+      selector: "button[type=submit]"
+    - action: verify_url
+      contains: "/welcome"
+```
+
+The outside-in test verifies the same functionality but:
+
+- Doesn't depend on internal classes (`UserService`)
+- Doesn't check internal state (`created_at`, `id`)
+- Tests from user's perspective (what they see and do)
+- Remains valid even if implementation completely changes
+
+## When to Use This Skill
+
+### Perfect Scenarios
+
+1. **Smoke Testing** - Quickly verify critical paths work
+2. **Acceptance Testing** - Validate features meet requirements
+3. **Regression Testing** - Ensure changes don't break existing behavior
+4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach
+5. **Refactoring Safety** - Tests protect behavior during rewrites
+6. **Documentation as Tests** - YAML doubles as executable specifications
+
+### Complementary to Unit Tests
+
+Outside-in tests work best alongside unit tests:
+
+- **Unit Tests** (60%): Internal logic, edge cases, error handling
+- **Integration Tests** (30%): Component interactions, API contracts
+- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths
+
+Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation.
+
+## Parity, Shadow, and A/B Comparison
+
+Use QA Team when you need more than a single implementation test:
+
+- compare legacy vs replacement behavior side by side
+- run paired observable tmux sessions with `--observable`
+- execute the same parity suite remotely with `--ssh-target azlin`
+- log rollout divergences with `--shadow-mode --shadow-log ...`
+
+Example local parity command:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary
+```
+
+Example shadow-mode command:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary \
+  --shadow-mode \
+  --shadow-log /tmp/feature-shadow.jsonl
+```
+
+## Quick Start
+
+### 1. Install Framework
+
+**Option A: From GitHub (Recommended - Latest)**
+
+```bash
+# Install globally
+npm install -g github:rysweet/gadugi-agentic-test
+
+# Or use with npx
+npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml
+
+# Or clone and build
+git clone https://github.com/rysweet/gadugi-agentic-test
+cd gadugi-agentic-test
+npm install
+npm run build
+node dist/cli.js run scenarios/your-test.yaml
+```
+
+**Option B: From npm (when published)**
+
+```bash
+npm install -g gadugi-agentic-test
+gadugi-test run test.yaml
+```
+
+**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below.
+
+### 2. Create Your First Test
+
+Save as `test-hello.yaml`:
+
+```yaml
+scenario:
+  name: "Hello World Test"
+  description: "Verify application prints greeting"
+  type: cli
+
+  steps:
+    - action: launch
+      target: "./hello-world"
+
+    - action: verify_output
+      contains: "Hello, World!"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+### 3. Run the Test
+
+**If installed globally**:
+
+```bash
+gadugi-test run test-hello.yaml
+```
+
+**If using from source**:
+
+```bash
+cd /path/to/gadugi-agentic-test
+node dist/cli.js run /path/to/test-hello.yaml
+```
+
+**Run all tests in directory**:
+
+```bash
+node dist/cli.js run -d ./my-test-scenarios
+```
+
+### 4. Review Results
+
+The framework generates evidence in `./evidence/`:
+
+- Execution logs
+- Output captures
+- Screenshots (for TUI/Web/Electron)
+- Timing data
+- HTML report
+
+## Supported Application Types
+
+### CLI (Command-Line Interface)
+
+Test command-line tools, scripts, and utilities:
+
+```yaml
+scenario:
+  name: "Git Status Test"
+  type: cli
+  steps:
+    - action: launch
+      target: "git"
+      args: ["status"]
+    - action: verify_output
+      contains: "On branch"
+```
+
+**Common Use Cases**:
+
+- Package managers (npm, pip, cargo)
+- Build tools (make, gradle, webpack)
+- DevOps tools (docker, kubectl, terraform)
+- Custom CLI applications
+
+### TUI (Terminal User Interface)
+
+Test interactive terminal applications:
+
+```yaml
+scenario:
+  name: "TUI Navigation"
+  type: tui
+  steps:
+    - action: launch
+      target: "./file-manager"
+    - action: send_keypress
+      value: "down"
+      times: 3
+    - action: verify_screen
+      contains: "> documents/"
+```
+
+**Common Use Cases**:
+
+- System monitors (htop, top)
+- Text editors (vim, nano)
+- File managers (ranger, midnight commander)
+- Custom TUI dashboards
+
+### Web Applications
+
+Test browser-based applications:
+
+```yaml
+scenario:
+  name: "Web Dashboard Test"
+  type: web
+  steps:
+    - action: navigate
+      url: "http://localhost:3000"
+    - action: verify_element
+      selector: "h1"
+      contains: "Dashboard"
+```
+
+**Common Use Cases**:
+
+- SPAs (React, Vue, Angular apps)
+- Admin panels
+- E-commerce sites
+- SaaS applications
+
+### Electron Applications
+
+Test desktop apps built with Electron:
+
+```yaml
+scenario:
+  name: "Desktop App Test"
+  type: electron
+  steps:
+    - action: launch
+      target: "./dist/my-app"
+    - action: verify_window
+      title: "My Application"
+```
+
+**Common Use Cases**:
+
+- Code editors (VS Code-like apps)
+- Chat applications (Slack, Discord clones)
+- Productivity tools
+- Custom desktop applications
+
+## Progressive Learning Path
+
+The skill teaches testing in three levels:
+
+### Level 1: Fundamentals (Start Here)
+
+- Basic test structure (YAML anatomy)
+- Single-action tests
+- Simple verification
+- Smoke tests
+
+**Examples**:
+
+- `examples/cli/calculator-basic.yaml`
+- `examples/tui/file-manager-navigation.yaml`
+- `examples/web/dashboard-smoke-test.yaml`
+- `examples/electron/single-window-basic.yaml`
+
+### Level 2: Intermediate
+
+- Multi-step workflows
+- Conditional logic
+- Error handling
+- Variables and templating
+
+**Examples**:
+
+- `examples/cli/cli-error-handling.yaml`
+- `examples/tui/tui-form-validation.yaml`
+- `examples/web/web-authentication-flow.yaml`
+- `examples/electron/multi-window-coordination.yaml`
+
+### Level 3: Advanced
+
+- Custom comprehension agents
+- Visual regression testing
+- Performance validation
+- IPC testing (Electron)
+
+**Examples**:
+
+- `examples/tui/tui-performance-monitoring.yaml`
+- `examples/electron/electron-ipc-testing.yaml`
+- `examples/custom-agents/custom-comprehension-agent.yaml`
+- `examples/custom-agents/custom-reporter-integration.yaml`
+
+## Example Library
+
+This skill includes **15 complete working examples**:
+
+### CLI (3 examples)
+
+- Basic calculator operations [Level 1]
+- Error handling and recovery [Level 2]
+- Interactive session management [Level 2]
+
+### TUI (3 examples)
+
+- File manager navigation [Level 1]
+- Form validation [Level 2]
+- Performance monitoring [Level 3]
+
+### Web (3 examples)
+
+- Dashboard smoke test [Level 1]
+- Authentication flow [Level 2]
+- Visual regression [Level 2]
+
+### Electron (4 examples)
+
+- Single window basics [Level 1]
+- Multi-window coordination [Level 2]
+- Menu interactions [Level 2]
+- IPC testing [Level 3]
+
+### Custom Agents (2 examples)
+
+- Domain-specific comprehension [Level 3]
+- Custom reporting [Level 3]
+
+All examples include:
+
+- Complete working YAML
+- Inline documentation
+- Expected output
+- Prerequisites
+- Level indicators
+
+## Using This Skill in Claude
+
+### Invoke the Skill
+
+```
+Claude, use the qa-team skill to create a CLI test for my calculator app.
+
+Claude, use qa-team to generate web tests for user login.
+
+Claude, create Electron tests using qa-team for my desktop app.
+```
+
+### What You'll Receive
+
+1. **Complete YAML test scenario** matching your requirements
+2. **Inline comments** explaining each section
+3. **Best practices** applied (timeouts, waits, verification)
+4. **Appropriate complexity** (Level 1, 2, or 3 based on needs)
+5. **Instructions** for running the test
+
+### Example Interaction
+
+**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard"
+
+**Claude** (using this skill): Generates a complete Level 2 YAML scenario with:
+
+- Navigation to login page
+- Form filling (email, password)
+- Submit button click
+- URL verification (redirected to dashboard)
+- Element verification (user profile visible)
+- Screenshot capture
+- Proper timeouts and waits
+
+## Integration with Amplihack Philosophy
+
+This skill embodies amplihack's core principles:
+
+### Ruthless Simplicity
+
+- Declarative YAML over complex code
+- Minimal boilerplate
+- Focus on behavior, not implementation
+
+### Modular Design (Bricks & Studs)
+
+- Self-contained test scenarios
+- Clear action contracts
+- Composable test steps
+
+### Zero-BS Implementation
+
+- No stubs or placeholders
+- Every example is runnable
+- Clear, actionable error messages
+
+### Outside-In Thinking
+
+- User perspective first
+- Implementation-agnostic tests
+- Behavior-driven validation
+
+## Best Practices
+
+### 1. Start Simple
+
+Begin with basic smoke tests, then add complexity:
+
+```yaml
+# Level 1: Smoke test
+steps:
+  - action: launch
+    target: "./app"
+  - action: verify_output
+    contains: "Ready"
+
+# Level 2: Add interaction
+steps:
+  - action: launch
+    target: "./app"
+  - action: send_input
+    value: "command\n"
+  - action: verify_output
+    contains: "Success"
+```
+
+### 2. Use Descriptive Names
+
+```yaml
+# Good
+scenario:
+  name: "User Login - Valid Credentials"
+  description: "Verifies successful login with email and password"
+
+# Bad
+scenario:
+  name: "Test 1"
+```
+
+### 3. Verify Critical Paths Only
+
+Don't test every detail. Focus on user-facing behavior:
+
+```yaml
+# Good - User-visible behavior
+- action: verify_element
+  selector: ".welcome-message"
+  contains: "Welcome back"
+
+# Bad - Implementation detail
+- action: verify_element
+  selector: ".cache-status"
+  contains: "initialized"
+```
+
+### 4. Always Wait for Dynamic Content
+
+```yaml
+# Good - Wait before verification
+- action: click
+  selector: ".load-data"
+- action: wait_for_element
+  selector: ".data-table"
+  timeout: 10s
+- action: verify_element
+  selector: ".data-table"
+
+# Bad - May fail due to timing
+- action: click
+  selector: ".load-data"
+- action: verify_element
+  selector: ".data-table" # Might not exist yet!
+```
+
+### 5. Clean Up After Tests
+
+```yaml
+steps:
+  # Test steps...
+
+cleanup:
+  - action: delete_file
+    path: "./test-data.json"
+  - action: stop_application
+```
+
+## Troubleshooting
+
+### Installation Issues
+
+**Problem**: `@types/node-pty` not found error
+
+**Solution**: This was fixed in gadugi-agentic-test. If you see this:
+
+```bash
+# Update to latest version
+npm install -g github:rysweet/gadugi-agentic-test
+
+# Or if you cloned, pull latest:
+git pull origin main
+npm install
+npm run build
+```
+
+**Problem**: `tsc: command not found` when building
+
+**Solution**: TypeScript not installed
+
+```bash
+npm install  # Installs all dependencies including TypeScript
+npm run build  # Now will work
+```
+
+### Test Times Out
+
+**Problem**: Test exceeds timeout and fails
+
+**Solution**: Increase timeout for slow operations
+
+```yaml
+- action: wait_for_element
+  selector: ".slow-loading-data"
+  timeout: 30s # Generous timeout
+```
+
+### Scenario Format Issues
+
+**Problem**: "Scenario must have a name" error
+
+**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`:
+
+```yaml
+# WRONG (won't load)
+scenario:
+  name: "My Test"
+  steps: [...]
+
+# RIGHT
+name: "My Test"
+description: "What this tests"
+version: "1.0.0"
+config:
+  timeout: 120000
+steps: [...]
+```
+
+### Element Not Found
+
+**Problem**: Cannot find element to interact with
+
+**Solutions**:
+
+1. Use `wait_for_element` before interaction
+2. Verify selector is correct
+3. Check if element is in iframe
+
+```yaml
+- action: wait_for_element
+  selector: ".target"
+  timeout: 10s
+- action: click
+  selector: ".target"
+```
+
+### Flaky Tests in CI
+
+**Problem**: Tests pass locally but fail in CI
+
+**Solutions**:
+
+1. Add longer timeouts for CI environments
+2. Set explicit viewport sizes
+3. Wait for application readiness
+
+```yaml
+scenario:
+  environment:
+    viewport:
+      width: 1920
+      height: 1080
+
+  steps:
+    - action: wait_for_element
+      selector: ".app-ready"
+      timeout: 30s # Generous for CI
+```
+
+## Framework Version Check
+
+This skill embeds gadugi-agentic-test version **0.1.0**.
+
+To check for newer versions:
+
+```bash
+python scripts/check-freshness.py
+```
+
+The script compares the embedded version against the latest GitHub release and notifies you of new features.
+
+## Related Skills
+
+- **test-gap-analyzer**: Find untested code paths (unit test focus)
+- **philosophy-guardian**: Review test philosophy compliance
+- **pr-review-assistant**: Include tests in PR reviews
+- **module-spec-generator**: Generate specs with test scenarios
+
+## Resources
+
+### Documentation
+
+- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation
+- **Examples**: `examples/` - 15 complete working examples
+- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test
+
+### Getting Help
+
+- Review examples in `examples/` directory
+- Check `SKILL.md` for detailed explanations
+- See troubleshooting section in `SKILL.md`
+- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues
+
+## Quick Reference
+
+### Basic Test Template
+
+```yaml
+scenario:
+  name: "Test Name"
+  description: "What this verifies"
+  type: cli | tui | web | electron
+
+  prerequisites:
+    - "Condition 1"
+
+  steps:
+    - action: launch
+      target: "./app"
+
+    - action: verify_output
+      contains: "Expected"
+
+  cleanup:
+    - action: stop_application
+```
+
+### Common Actions
+
+**CLI**:
+
+- `launch` - Start application
+- `send_input` - Send text
+- `verify_output` - Check output
+- `verify_exit_code` - Validate exit code
+
+**TUI**:
+
+- `send_keypress` - Send keys
+- `verify_screen` - Check screen
+- `capture_screenshot` - Save screenshot
+
+**Web**:
+
+- `navigate` - Go to URL
+- `click` - Click element
+- `type` - Type text
+- `verify_element` - Check element
+
+**Electron**:
+
+- `window_action` - Control windows
+- `menu_click` - Click menus
+- `dialog_action` - Handle dialogs
+- All web actions
+
+## Success Stories
+
+Outside-in testing shines when:
+
+1. **Refactoring**: Change implementation without updating tests
+2. **Collaboration**: Non-developers can read and understand tests
+3. **Documentation**: Tests serve as executable specifications
+4. **Regression Prevention**: Catch breaking changes in critical flows
+5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron
+
+Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen.
+
+---
+
+**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation.
+
+## Real-World Example: Testing amplihack Guide Agent
+
+Based on actual testing of amplihack's guide agent, here's a complete working example:
+
+### Scenario: Naive Student Learning Flow
+
+```yaml
+name: "Guide Agent - Beginner First Question"
+description: "Test how guide responds to complete beginner"
+version: "1.0.0"
+
+config:
+  timeout: 180000 # 3 minutes for AI response
+  retries: 1
+  parallel: false
+
+agents:
+  - name: "student-cli"
+    type: "system"
+    config:
+      shell: "bash"
+      cwd: "/tmp/test-student"
+      timeout: 180000
+      capture_output: true
+
+steps:
+  - name: "Student asks: What is amplihack?"
+    agent: "student-cli"
+    action: "execute_command"
+    params:
+      command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100'
+    expect:
+      exit_code: 0
+      stdout_contains:
+        - "amplihack"
+        - "AI"
+    timeout: 180000
+
+  - name: "Verify guide gives immediate action"
+    agent: "student-cli"
+    action: "execute_command"
+    params:
+      command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md"
+    expect:
+      exit_code: 0
+    timeout: 5000
+
+metadata:
+  tags: ["guide-agent", "beginner", "real-world"]
+  priority: "high"
+```
+
+### What This Tests
+
+1. **Installation via uvx** - Tests users can run without installing
+2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works
+3. **Beginner-friendly response** - Checks for immediate actionable command
+4. **Interactive elements** - Looks for TRY IT prompts
+
+### Running This Test
+
+```bash
+cd gadugi-agentic-test
+node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose
+```
+
+### What We Learned
+
+**From testing amplihack guide agent**:
+
+- Long-running AI commands need 180s+ timeouts
+- Testing in clean `/tmp` directory avoids state pollution
+- Combining `uvx --from git+...` with gadugi tests unreleased branches
+- Checking file content (guide.md) verifies features beyond just output
+- Real-world tests exposed gaps (guide showing bash commands in REPL context)
diff --git a/amplifier-bundle/skills/qa-team/SKILL.md b/amplifier-bundle/skills/qa-team/SKILL.md
new file mode 100644
index 000000000..f75aa884d
--- /dev/null
+++ b/amplifier-bundle/skills/qa-team/SKILL.md
@@ -0,0 +1,2100 @@
+---
+name: qa-team
+description: |
+  QA team for outside-in validation, side-by-side parity loops, and A/B behavioral comparison.
+  Use when you need behavior-driven tests, legacy-vs-new comparison, or rollout shadow validation.
+  Creates executable scenarios and parity workflows that agents can observe, compare, and iterate on.
+  Supports local, observable tmux, remote SSH, and shadow-mode divergence logging patterns.
+version: 1.1.0
+embedded_framework_version: 0.1.0
+github_repo: https://github.com/rysweet/gadugi-agentic-test
+issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
+---
+
+# QA Team Skill
+
+## Purpose [LEVEL 1]
+
+This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate.
+
+**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details.
+
+## When to Use This Skill [LEVEL 1]
+
+### Perfect For
+
+- **Smoke Tests**: Quick validation that critical user flows work
+- **Behavior-Driven Testing**: Verify features from user perspective
+- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron
+- **Refactoring Safety**: Tests remain valid when implementation changes
+- **AI-Powered Testing**: Let agents handle complex interactions
+- **Documentation as Tests**: YAML scenarios double as executable specs
+
+### Use This Skill When
+
+- Starting a new project and defining expected behaviors
+- Refactoring code and need tests that won't break with internal changes
+- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps)
+- Writing acceptance criteria that can be automatically verified
+- Need tests that non-developers can read and understand
+- Want to catch regressions in critical user workflows
+- Testing complex multi-step interactions
+
+### Don't Use This Skill When
+
+- Need unit tests for internal functions (use test-gap-analyzer instead)
+- Testing performance or load characteristics
+- Need precise timing or concurrency control
+- Testing non-interactive batch processes
+- Implementation details matter more than behavior
+
+## Core Concepts [LEVEL 1]
+
+### Outside-In Testing Philosophy
+
+**Traditional Inside-Out Testing**:
+
+```python
+# Tightly coupled to implementation
+def test_calculator_add():
+    calc = Calculator()
+    result = calc.add(2, 3)
+    assert result == 5
+    assert calc.history == [(2, 3, 5)]  # Knows internal state
+```
+
+**Agentic Outside-In Testing**:
+
+```yaml
+# Implementation-agnostic behavior verification
+scenario:
+  name: "Calculator Addition"
+  steps:
+    - action: launch
+      target: "./calculator"
+    - action: send_input
+      value: "add 2 3"
+    - action: verify_output
+      contains: "Result: 5"
+```
+
+**Benefits**:
+
+- Tests survive refactoring (internal changes don't break tests)
+- Readable by non-developers (YAML is declarative)
+- Platform-agnostic (same structure for CLI/TUI/Web/Electron)
+- AI agents handle complexity (navigation, timing, screenshots)
+
+### The Gadugi Agentic Test Framework [LEVEL 2]
+
+Gadugi-agentic-test is a Python framework that:
+
+1. **Parses YAML test scenarios** with declarative steps
+2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents)
+3. **Executes actions** (launch, input, click, wait, verify)
+4. **Collects evidence** (screenshots, logs, output captures)
+5. **Validates outcomes** against expected results
+6. **Generates reports** with evidence trails
+
+**Architecture**:
+
+```
+YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine
+                                          ↓
+                     [CLI Agent, TUI Agent, Web Agent, Electron Agent]
+                                          ↓
+                           Observers → Comprehension Agent
+                                          ↓
+                                   Evidence Report
+```
+
+### Progressive Disclosure Levels [LEVEL 1]
+
+This skill teaches testing in four levels:
+
+- **Level 1: Fundamentals** - Basic single-action tests, simple verification
+- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling
+- **Level 3: Advanced** - Custom agents, visual regression, performance validation
+- **Level 4: Parity & Shadowing** - Side-by-side A/B comparison, remote observable runs, rollout divergence logging
+
+Each example is marked with its level. Start at Level 1 and progress as needed.
+
+## Side-by-Side Parity and A/B Validation [LEVEL 2]
+
+QA Team is the renamed primary skill for what used to be `outside-in-testing`. Use it for standard outside-in scenarios **and** for parity loops where you must compare a legacy implementation to a replacement, or compare approach A to approach B, as an external user would observe them.
+
+### Use QA Team for parity work when
+
+- migrating Python to Rust, old CLI to new CLI, or v1 to v2 behavior
+- validating a rewrite before switching defaults
+- comparing branch A vs branch B using the same user scenarios
+- running observable side-by-side sessions in paired virtual TTYs
+- logging rollout divergences in shadow mode without failing the run
+
+### Recommended parity loop
+
+1. Define shared user-facing scenarios first.
+2. Run both implementations in isolated sandboxes.
+3. Compare stdout, stderr, exit code, JSON outputs, and filesystem side effects.
+4. Re-run in `--observable` mode when you need paired tmux panes for debugging.
+5. Use `--ssh-target <host>` when parity must happen on a remote environment such as `azlin`.
+6. Use `--shadow-mode --shadow-log <file>` during rollout to log divergences without blocking execution.
+
+### Command pattern to reuse
+
+If the repo already has a parity harness, extend it instead of inventing a second one. A good baseline is:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary \
+  --observable
+```
+
+For remote parity:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --ssh-target azlin \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /remote/path/to/legacy-repo \
+  --rust-binary /remote/path/to/new-binary
+```
+
+For rollout shadow logging:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary \
+  --shadow-mode \
+  --shadow-log /tmp/feature-shadow.jsonl
+```
+
+## Quick Start [LEVEL 1]
+
+### Installation
+
+**Prerequisites (for native module compilation):**
+
+```bash
+# macOS
+xcode-select --install
+
+# Ubuntu/Debian
+sudo apt-get install -y build-essential python3
+
+# Windows: Install Visual Studio Build Tools with "Desktop development with C++"
+```
+
+**Install the framework:**
+
+```bash
+# Install globally for CLI access
+npm install -g @gadugi/agentic-test
+
+# Or install locally in your project
+npm install @gadugi/agentic-test
+
+# Verify installation
+gadugi-test --version
+```
+
+### Your First Test (CLI Example)
+
+Create `test-hello.yaml`:
+
+```yaml
+scenario:
+  name: "Hello World CLI Test"
+  description: "Verify CLI prints greeting"
+  type: cli
+
+  prerequisites:
+    - "./hello-world executable exists"
+
+  steps:
+    - action: launch
+      target: "./hello-world"
+
+    - action: verify_output
+      contains: "Hello, World!"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+Run the test:
+
+```bash
+gadugi-test run test-hello.yaml
+```
+
+Output:
+
+```
+✓ Scenario: Hello World CLI Test
+  ✓ Step 1: Launched ./hello-world
+  ✓ Step 2: Output contains "Hello, World!"
+  ✓ Step 3: Exit code is 0
+
+PASSED (3/3 steps successful)
+Evidence saved to: ./evidence/test-hello-20250116-093045/
+```
+
+### Understanding the YAML Structure [LEVEL 1]
+
+Every test scenario has this structure:
+
+```yaml
+scenario:
+  name: "Descriptive test name"
+  description: "What this test verifies"
+  type: cli | tui | web | electron
+
+  # Optional metadata
+  tags: [smoke, critical, auth]
+  timeout: 30s
+
+  # What must be true before test runs
+  prerequisites:
+    - "Condition 1"
+    - "Condition 2"
+
+  # The test steps (executed sequentially)
+  steps:
+    - action: action_name
+      parameter1: value1
+      parameter2: value2
+
+    - action: verify_something
+      expected: value
+
+  # Optional cleanup
+  cleanup:
+    - action: stop_application
+```
+
+## Application Types and Agents [LEVEL 2]
+
+### CLI Applications [LEVEL 1]
+
+**Use Case**: Command-line tools, scripts, build tools, package managers
+
+**Supported Actions**:
+
+- `launch` - Start the CLI program
+- `send_input` - Send text or commands via stdin
+- `send_signal` - Send OS signals (SIGINT, SIGTERM)
+- `wait_for_output` - Wait for specific text in stdout/stderr
+- `verify_output` - Check stdout/stderr contains/matches expected text
+- `verify_exit_code` - Validate process exit code
+- `capture_output` - Save output for later verification
+
+**Example** (see `examples/cli/calculator-basic.yaml`):
+
+```yaml
+scenario:
+  name: "CLI Calculator Basic Operations"
+  type: cli
+
+  steps:
+    - action: launch
+      target: "./calculator"
+      args: ["--mode", "interactive"]
+
+    - action: send_input
+      value: "add 5 3\n"
+
+    - action: verify_output
+      contains: "Result: 8"
+      timeout: 2s
+
+    - action: send_input
+      value: "multiply 4 7\n"
+
+    - action: verify_output
+      contains: "Result: 28"
+
+    - action: send_input
+      value: "exit\n"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+### TUI Applications [LEVEL 1]
+
+**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs)
+
+**Supported Actions**:
+
+- `launch` - Start TUI application
+- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.)
+- `wait_for_screen` - Wait for specific text to appear on screen
+- `verify_screen` - Check screen contents match expectations
+- `capture_screenshot` - Save terminal screenshot (ANSI art)
+- `navigate_menu` - Navigate menu structures
+- `fill_form` - Fill TUI form fields
+
+**Example** (see `examples/tui/file-manager-navigation.yaml`):
+
+```yaml
+scenario:
+  name: "TUI File Manager Navigation"
+  type: tui
+
+  steps:
+    - action: launch
+      target: "./file-manager"
+
+    - action: wait_for_screen
+      contains: "File Manager v1.0"
+      timeout: 3s
+
+    - action: send_keypress
+      value: "down"
+      times: 3
+
+    - action: verify_screen
+      contains: "> documents/"
+      description: "Third item should be selected"
+
+    - action: send_keypress
+      value: "enter"
+
+    - action: wait_for_screen
+      contains: "documents/"
+      timeout: 2s
+
+    - action: capture_screenshot
+      save_as: "documents-view.txt"
+```
+
+### Web Applications [LEVEL 1]
+
+**Use Case**: Web apps, dashboards, SPAs, admin panels
+
+**Supported Actions**:
+
+- `navigate` - Go to URL
+- `click` - Click element by selector or text
+- `type` - Type into input fields
+- `wait_for_element` - Wait for element to appear
+- `verify_element` - Check element exists/contains text
+- `verify_url` - Validate current URL
+- `screenshot` - Capture browser screenshot
+- `scroll` - Scroll page or element
+
+**Example** (see `examples/web/dashboard-smoke-test.yaml`):
+
+```yaml
+scenario:
+  name: "Dashboard Smoke Test"
+  type: web
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000/dashboard"
+
+    - action: wait_for_element
+      selector: "h1.dashboard-title"
+      timeout: 5s
+
+    - action: verify_element
+      selector: "h1.dashboard-title"
+      contains: "Analytics Dashboard"
+
+    - action: verify_element
+      selector: ".widget-stats"
+      count: 4
+      description: "Should have 4 stat widgets"
+
+    - action: click
+      selector: "button.refresh-data"
+
+    - action: wait_for_element
+      selector: ".loading-spinner"
+      disappears: true
+      timeout: 10s
+
+    - action: screenshot
+      save_as: "dashboard-loaded.png"
+```
+
+### Electron Applications [LEVEL 2]
+
+**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones)
+
+**Supported Actions**:
+
+- `launch` - Start Electron app
+- `window_action` - Interact with windows (focus, minimize, close)
+- `menu_click` - Click application menu items
+- `dialog_action` - Handle native dialogs (open file, save, confirm)
+- `ipc_send` - Send IPC message to main process
+- `verify_window` - Check window state/properties
+- All web actions (since Electron uses Chromium)
+
+**Example** (see `examples/electron/single-window-basic.yaml`):
+
+```yaml
+scenario:
+  name: "Electron Single Window Test"
+  type: electron
+
+  steps:
+    - action: launch
+      target: "./dist/my-app"
+      wait_for_window: true
+      timeout: 10s
+
+    - action: verify_window
+      title: "My Application"
+      visible: true
+
+    - action: menu_click
+      path: ["File", "New Document"]
+
+    - action: wait_for_element
+      selector: ".document-editor"
+
+    - action: type
+      selector: ".document-editor"
+      value: "Hello from test"
+
+    - action: menu_click
+      path: ["File", "Save"]
+
+    - action: dialog_action
+      type: save_file
+      filename: "test-document.txt"
+
+    - action: verify_window
+      title_contains: "test-document.txt"
+```
+
+## Test Scenario Anatomy [LEVEL 2]
+
+### Metadata Section
+
+```yaml
+scenario:
+  name: "Clear descriptive name"
+  description: "Detailed explanation of what this test verifies"
+  type: cli | tui | web | electron
+
+  # Optional fields
+  tags: [smoke, regression, auth, payment]
+  priority: high | medium | low
+  timeout: 60s # Overall scenario timeout
+  retry_on_failure: 2 # Retry count
+
+  # Environment requirements
+  environment:
+    variables:
+      API_URL: "http://localhost:8080"
+      DEBUG: "true"
+    files:
+      - "./config.json must exist"
+```
+
+### Prerequisites
+
+Prerequisites are conditions that must be true before the test runs. The framework validates these before execution.
+
+```yaml
+prerequisites:
+  - "./application binary exists"
+  - "Port 8080 is available"
+  - "Database is running"
+  - "User account test@example.com exists"
+  - "File ./test-data.json exists"
+```
+
+If prerequisites fail, the test is skipped (not failed).
+
+### Steps
+
+Steps execute sequentially. Each step has:
+
+- **action**: Required - the action to perform
+- **Parameters**: Action-specific parameters
+- **description**: Optional - human-readable explanation
+- **timeout**: Optional - step-specific timeout
+- **continue_on_failure**: Optional - don't fail scenario if step fails
+
+```yaml
+steps:
+  # Simple action
+  - action: launch
+    target: "./app"
+
+  # Action with multiple parameters
+  - action: verify_output
+    contains: "Success"
+    timeout: 5s
+    description: "App should print success message"
+
+  # Continue even if this fails
+  - action: click
+    selector: ".optional-button"
+    continue_on_failure: true
+```
+
+### Verification Actions [LEVEL 1]
+
+Verification actions check expected outcomes. They fail the test if expectations aren't met.
+
+**Common Verifications**:
+
+```yaml
+# CLI: Check output contains text
+- action: verify_output
+  contains: "Expected text"
+
+# CLI: Check output matches regex
+- action: verify_output
+  matches: "Result: \\d+"
+
+# CLI: Check exit code
+- action: verify_exit_code
+  expected: 0
+
+# Web/TUI: Check element exists
+- action: verify_element
+  selector: ".success-message"
+
+# Web/TUI: Check element contains text
+- action: verify_element
+  selector: "h1"
+  contains: "Welcome"
+
+# Web: Check URL
+- action: verify_url
+  equals: "http://localhost:3000/dashboard"
+
+# Web: Check element count
+- action: verify_element
+  selector: ".list-item"
+  count: 5
+
+# Electron: Check window state
+- action: verify_window
+  title: "My App"
+  visible: true
+  focused: true
+```
+
+### Cleanup Section
+
+Cleanup runs after all steps complete (success or failure). Use for teardown actions.
+
+```yaml
+cleanup:
+  - action: stop_application
+    force: true
+
+  - action: delete_file
+    path: "./temp-test-data.json"
+
+  - action: reset_database
+    connection: "test_db"
+```
+
+## Advanced Patterns [LEVEL 2]
+
+### Conditional Logic
+
+Execute steps based on conditions:
+
+```yaml
+steps:
+  - action: launch
+    target: "./app"
+
+  - action: verify_output
+    contains: "Login required"
+    id: login_check
+
+  # Only run if login_check passed
+  - action: send_input
+    value: "login admin password123\n"
+    condition: login_check.passed
+```
+
+### Variables and Templating [LEVEL 2]
+
+Define variables and use them throughout the scenario:
+
+```yaml
+scenario:
+  name: "Test with Variables"
+  type: cli
+
+  variables:
+    username: "testuser"
+    api_url: "http://localhost:8080"
+
+  steps:
+    - action: launch
+      target: "./app"
+      args: ["--api", "${api_url}"]
+
+    - action: send_input
+      value: "login ${username}\n"
+
+    - action: verify_output
+      contains: "Welcome, ${username}!"
+```
+
+### Loops and Repetition [LEVEL 2]
+
+Repeat actions multiple times:
+
+```yaml
+steps:
+  - action: launch
+    target: "./app"
+
+  # Repeat action N times
+  - action: send_keypress
+    value: "down"
+    times: 5
+
+  # Loop over list
+  - action: send_input
+    value: "${item}\n"
+    for_each:
+      - "apple"
+      - "banana"
+      - "cherry"
+```
+
+### Error Handling [LEVEL 2]
+
+Handle expected errors gracefully:
+
+```yaml
+steps:
+  - action: send_input
+    value: "invalid command\n"
+
+  # Verify error message appears
+  - action: verify_output
+    contains: "Error: Unknown command"
+    expected_failure: true
+
+  # App should still be running
+  - action: verify_running
+    expected: true
+```
+
+### Multi-Step Workflows [LEVEL 2]
+
+Complex scenarios with multiple phases:
+
+```yaml
+scenario:
+  name: "E-commerce Purchase Flow"
+  type: web
+
+  steps:
+    # Phase 1: Authentication
+    - action: navigate
+      url: "http://localhost:3000/login"
+
+    - action: type
+      selector: "#username"
+      value: "test@example.com"
+
+    - action: type
+      selector: "#password"
+      value: "password123"
+
+    - action: click
+      selector: "button[type=submit]"
+
+    - action: wait_for_url
+      contains: "/dashboard"
+
+    # Phase 2: Product Selection
+    - action: navigate
+      url: "http://localhost:3000/products"
+
+    - action: click
+      text: "Add to Cart"
+      nth: 1
+
+    - action: verify_element
+      selector: ".cart-badge"
+      contains: "1"
+
+    # Phase 3: Checkout
+    - action: click
+      selector: ".cart-icon"
+
+    - action: click
+      text: "Proceed to Checkout"
+
+    - action: fill_form
+      fields:
+        "#shipping-address": "123 Test St"
+        "#city": "Testville"
+        "#zip": "12345"
+
+    - action: click
+      selector: "#place-order"
+
+    - action: wait_for_element
+      selector: ".order-confirmation"
+      timeout: 10s
+
+    - action: verify_element
+      selector: ".order-number"
+      exists: true
+```
+
+## Level 3: Advanced Topics [LEVEL 3]
+
+### Custom Comprehension Agents
+
+The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic.
+
+**Default Comprehension Agent**:
+
+- Observes raw output (text, HTML, screenshots)
+- Applies general reasoning to verify expectations
+- Returns pass/fail with explanation
+
+**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`):
+
+```yaml
+scenario:
+  name: "Financial Dashboard Test with Custom Agent"
+  type: web
+
+  # Define custom comprehension logic
+  comprehension_agent:
+    model: "gpt-4"
+    system_prompt: |
+      You are a financial data validator. When verifying dashboard content:
+      1. All monetary values must use proper formatting ($1,234.56)
+      2. Percentages must include % symbol
+      3. Dates must be in MM/DD/YYYY format
+      4. Negative values must be red
+      5. Chart data must be logically consistent
+
+      Be strict about formatting and data consistency.
+
+    examples:
+      - input: "Total Revenue: 45000"
+        output: "FAIL - Missing currency symbol and comma separator"
+      - input: "Total Revenue: $45,000.00"
+        output: "PASS - Correctly formatted"
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000/financial-dashboard"
+
+    - action: verify_element
+      selector: ".revenue-widget"
+      use_custom_comprehension: true
+      description: "Revenue should be properly formatted"
+```
+
+### Visual Regression Testing [LEVEL 3]
+
+Compare screenshots against baseline images:
+
+```yaml
+scenario:
+  name: "Visual Regression - Homepage"
+  type: web
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000"
+
+    - action: wait_for_element
+      selector: ".page-loaded"
+
+    - action: screenshot
+      save_as: "homepage.png"
+
+    - action: visual_compare
+      screenshot: "homepage.png"
+      baseline: "./baselines/homepage-baseline.png"
+      threshold: 0.05 # 5% difference allowed
+      highlight_differences: true
+```
+
+### Performance Validation [LEVEL 3]
+
+Measure and validate performance metrics:
+
+```yaml
+scenario:
+  name: "Performance - Dashboard Load Time"
+  type: web
+
+  performance:
+    metrics:
+      - page_load_time
+      - first_contentful_paint
+      - time_to_interactive
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000/dashboard"
+      measure_timing: true
+
+    - action: verify_performance
+      metric: page_load_time
+      less_than: 3000 # 3 seconds
+
+    - action: verify_performance
+      metric: first_contentful_paint
+      less_than: 1500 # 1.5 seconds
+```
+
+### Multi-Window Coordination (Electron) [LEVEL 3]
+
+Test applications with multiple windows:
+
+```yaml
+scenario:
+  name: "Multi-Window Chat Application"
+  type: electron
+
+  steps:
+    - action: launch
+      target: "./chat-app"
+
+    - action: menu_click
+      path: ["Window", "New Chat"]
+
+    - action: verify_window
+      count: 2
+
+    - action: window_action
+      window: 1
+      action: focus
+
+    - action: type
+      selector: ".message-input"
+      value: "Hello from window 1"
+
+    - action: click
+      selector: ".send-button"
+
+    - action: window_action
+      window: 2
+      action: focus
+
+    - action: wait_for_element
+      selector: ".message"
+      contains: "Hello from window 1"
+      timeout: 5s
+```
+
+### IPC Testing (Electron) [LEVEL 3]
+
+Test Inter-Process Communication between renderer and main:
+
+```yaml
+scenario:
+  name: "Electron IPC Communication"
+  type: electron
+
+  steps:
+    - action: launch
+      target: "./my-app"
+
+    - action: ipc_send
+      channel: "get-system-info"
+
+    - action: ipc_expect
+      channel: "system-info-reply"
+      timeout: 3s
+
+    - action: verify_ipc_payload
+      contains:
+        platform: "darwin"
+        arch: "x64"
+```
+
+### Custom Reporters [LEVEL 3]
+
+Generate custom test reports:
+
+```yaml
+scenario:
+  name: "Test with Custom Reporting"
+  type: cli
+
+  reporting:
+    format: custom
+    template: "./report-template.html"
+    include:
+      - screenshots
+      - logs
+      - timing_data
+      - video_recording
+
+    email:
+      enabled: true
+      recipients: ["team@example.com"]
+      on_failure_only: true
+
+  steps:
+    # ... test steps ...
+```
+
+## Framework Integration [LEVEL 2]
+
+### Running Tests
+
+**Single test**:
+
+```bash
+gadugi-test run test-scenario.yaml
+```
+
+**Multiple tests**:
+
+```bash
+gadugi-test run tests/*.yaml
+```
+
+**With options**:
+
+```bash
+gadugi-test run test.yaml \
+  --verbose \
+  --evidence-dir ./test-evidence \
+  --retry 2 \
+  --timeout 60s
+```
+
+### CI/CD Integration
+
+**GitHub Actions** (`.github/workflows/agentic-tests.yml`):
+
+```yaml
+name: Agentic Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install gadugi-agentic-test
+        run: npm install -g @gadugi/agentic-test
+
+      - name: Run tests
+        run: gadugi-test run tests/agentic/*.yaml
+
+      - name: Upload evidence
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-evidence
+          path: ./evidence/
+```
+
+### Evidence Collection
+
+The framework automatically collects evidence for debugging:
+
+```
+evidence/
+  scenario-name-20250116-093045/
+    ├── scenario.yaml          # Original test scenario
+    ├── execution-log.json     # Detailed execution log
+    ├── screenshots/           # All captured screenshots
+    │   ├── step-1.png
+    │   ├── step-3.png
+    │   └── step-5.png
+    ├── output-captures/       # CLI/TUI output
+    │   ├── stdout.txt
+    │   └── stderr.txt
+    ├── timing.json            # Performance metrics
+    └── report.html            # Human-readable report
+```
+
+## Best Practices [LEVEL 2]
+
+### 1. Start Simple, Add Complexity
+
+Begin with basic smoke tests, then add detail:
+
+```yaml
+# Level 1: Basic smoke test
+steps:
+  - action: launch
+    target: "./app"
+  - action: verify_output
+    contains: "Ready"
+
+# Level 2: Add interaction
+steps:
+  - action: launch
+    target: "./app"
+  - action: send_input
+    value: "command\n"
+  - action: verify_output
+    contains: "Success"
+
+# Level 3: Add error handling and edge cases
+steps:
+  - action: launch
+    target: "./app"
+  - action: send_input
+    value: "invalid\n"
+  - action: verify_output
+    contains: "Error"
+  - action: send_input
+    value: "command\n"
+  - action: verify_output
+    contains: "Success"
+```
+
+### 2. Use Descriptive Names and Descriptions
+
+```yaml
+# Bad
+scenario:
+  name: "Test 1"
+  steps:
+    - action: click
+      selector: "button"
+
+# Good
+scenario:
+  name: "User Login Flow - Valid Credentials"
+  description: "Verifies user can log in with valid email and password"
+  steps:
+    - action: click
+      selector: "button[type=submit]"
+      description: "Submit login form"
+```
+
+### 3. Verify Critical Paths Only
+
+Don't test every tiny detail. Focus on user-facing behavior:
+
+```yaml
+# Bad - Tests implementation details
+- action: verify_element
+  selector: ".internal-cache-status"
+  contains: "initialized"
+
+# Good - Tests user-visible behavior
+- action: verify_element
+  selector: ".welcome-message"
+  contains: "Welcome back"
+```
+
+### 4. Use Prerequisites for Test Dependencies
+
+```yaml
+scenario:
+  name: "User Profile Edit"
+
+  prerequisites:
+    - "User testuser@example.com exists"
+    - "User is logged in"
+    - "Database is seeded with test data"
+
+  steps:
+    # Test assumes prerequisites are met
+    - action: navigate
+      url: "/profile"
+```
+
+### 5. Keep Tests Independent
+
+Each test should set up its own state and clean up:
+
+```yaml
+scenario:
+  name: "Create Document"
+
+  steps:
+    # Create test user (don't assume exists)
+    - action: api_call
+      endpoint: "/api/users"
+      method: POST
+      data: { email: "test@example.com" }
+
+    # Run test
+    - action: navigate
+      url: "/documents/new"
+    # ... test steps ...
+
+  cleanup:
+    # Remove test user
+    - action: api_call
+      endpoint: "/api/users/test@example.com"
+      method: DELETE
+```
+
+### 6. Use Tags for Organization
+
+```yaml
+scenario:
+  name: "Critical Payment Flow"
+  tags: [smoke, critical, payment, e2e]
+  # Run with: gadugi-test run --tags critical
+```
+
+### 7. Add Timeouts Strategically
+
+```yaml
+steps:
+  # Quick operations - short timeout
+  - action: click
+    selector: "button"
+    timeout: 2s
+
+  # Network operations - longer timeout
+  - action: wait_for_element
+    selector: ".data-loaded"
+    timeout: 10s
+
+  # Complex operations - generous timeout
+  - action: verify_element
+    selector: ".report-generated"
+    timeout: 60s
+```
+
+## Testing Strategies [LEVEL 2]
+
+### Smoke Tests
+
+Minimal tests that verify critical functionality works:
+
+```yaml
+scenario:
+  name: "Smoke Test - Application Starts"
+  tags: [smoke]
+
+  steps:
+    - action: launch
+      target: "./app"
+    - action: verify_output
+      contains: "Ready"
+      timeout: 5s
+```
+
+Run before every commit: `gadugi-test run --tags smoke`
+
+### Happy Path Tests
+
+Test the ideal user journey:
+
+```yaml
+scenario:
+  name: "Happy Path - User Registration"
+
+  steps:
+    - action: navigate
+      url: "/register"
+    - action: type
+      selector: "#email"
+      value: "newuser@example.com"
+    - action: type
+      selector: "#password"
+      value: "SecurePass123!"
+    - action: click
+      selector: "button[type=submit]"
+    - action: wait_for_url
+      contains: "/welcome"
+```
+
+### Error Path Tests
+
+Verify error handling:
+
+```yaml
+scenario:
+  name: "Error Path - Invalid Login"
+
+  steps:
+    - action: navigate
+      url: "/login"
+    - action: type
+      selector: "#email"
+      value: "invalid@example.com"
+    - action: type
+      selector: "#password"
+      value: "wrongpassword"
+    - action: click
+      selector: "button[type=submit]"
+    - action: verify_element
+      selector: ".error-message"
+      contains: "Invalid credentials"
+```
+
+### Regression Tests
+
+Prevent bugs from reappearing:
+
+```yaml
+scenario:
+  name: "Regression - Issue #123 Password Reset"
+  tags: [regression, bug-123]
+  description: "Verifies password reset email is sent (was broken in v1.2)"
+
+  steps:
+    - action: navigate
+      url: "/forgot-password"
+    - action: type
+      selector: "#email"
+      value: "user@example.com"
+    - action: click
+      selector: "button[type=submit]"
+    - action: verify_element
+      selector: ".success-message"
+      contains: "Reset email sent"
+```
+
+## Philosophy Alignment [LEVEL 2]
+
+This skill follows amplihack's core principles:
+
+### Ruthless Simplicity
+
+- **YAML over code**: Declarative tests are simpler than programmatic tests
+- **No implementation details**: Tests describe WHAT, not HOW
+- **Minimal boilerplate**: Each test is focused and concise
+
+### Modular Design (Bricks & Studs)
+
+- **Self-contained scenarios**: Each YAML file is independent
+- **Clear contracts**: Steps have well-defined inputs/outputs
+- **Composable actions**: Reuse actions across different test types
+
+### Zero-BS Implementation
+
+- **No stubs**: Every example in this skill is a complete, runnable test
+- **Working defaults**: Tests run with minimal configuration
+- **Clear errors**: Framework provides actionable error messages
+
+### Outside-In Thinking
+
+- **User perspective**: Tests verify behavior users care about
+- **Implementation agnostic**: Refactoring doesn't break tests
+- **Behavior-driven**: Focus on outcomes, not internals
+
+## Common Pitfalls and Solutions [LEVEL 2]
+
+### Pitfall 1: Over-Specifying
+
+**Problem**: Test breaks when UI changes slightly
+
+```yaml
+# Bad - Too specific
+- action: verify_element
+  selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold"
+  contains: "Welcome"
+```
+
+**Solution**: Use flexible selectors
+
+```yaml
+# Good - Focused on behavior
+- action: verify_element
+  selector: ".welcome-message"
+  contains: "Welcome"
+```
+
+### Pitfall 2: Missing Waits
+
+**Problem**: Test fails intermittently due to timing
+
+```yaml
+# Bad - No wait for async operation
+- action: click
+  selector: ".load-data-button"
+- action: verify_element
+  selector: ".data-table" # May not exist yet!
+```
+
+**Solution**: Always wait for dynamic content
+
+```yaml
+# Good - Wait for element to appear
+- action: click
+  selector: ".load-data-button"
+- action: wait_for_element
+  selector: ".data-table"
+  timeout: 10s
+- action: verify_element
+  selector: ".data-table"
+```
+
+### Pitfall 3: Testing Implementation Details
+
+**Problem**: Test coupled to internal state
+
+```yaml
+# Bad - Tests internal cache state
+- action: verify_output
+  contains: "Cache hit ratio: 85%"
+```
+
+**Solution**: Test user-visible behavior
+
+```yaml
+# Good - Tests response time
+- action: verify_response_time
+  less_than: 100ms
+  description: "Fast response indicates caching works"
+```
+
+### Pitfall 4: Flaky Assertions
+
+**Problem**: Assertions depend on exact timing or formatting
+
+```yaml
+# Bad - Exact timestamp match will fail
+- action: verify_output
+  contains: "Created at: 2025-11-16 09:30:45"
+```
+
+**Solution**: Use flexible patterns
+
+```yaml
+# Good - Match pattern, not exact value
+- action: verify_output
+  matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}"
+```
+
+### Pitfall 5: Not Cleaning Up
+
+**Problem**: Tests leave artifacts that affect future runs
+
+```yaml
+# Bad - No cleanup
+steps:
+  - action: create_file
+    path: "./test-data.json"
+  - action: launch
+    target: "./app"
+```
+
+**Solution**: Always use cleanup section
+
+```yaml
+# Good - Cleanup ensures clean slate
+steps:
+  - action: create_file
+    path: "./test-data.json"
+  - action: launch
+    target: "./app"
+
+cleanup:
+  - action: delete_file
+    path: "./test-data.json"
+```
+
+## Example Library [LEVEL 1]
+
+This skill includes 15 complete working examples organized by application type and complexity level:
+
+### CLI Examples
+
+1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations
+2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery
+3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI
+
+### TUI Examples
+
+4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation
+5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation
+6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing
+
+### Web Examples
+
+7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification
+8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow
+9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing
+
+### Electron Examples
+
+10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test
+11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration
+12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions
+13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing
+
+### Custom Agent Examples
+
+14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic
+15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting
+
+See `examples/` directory for full example code with inline documentation.
+
+## Framework Freshness Check [LEVEL 3]
+
+This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists:
+
+```bash
+# Run the freshness check script
+python scripts/check-freshness.py
+
+# Output if outdated:
+# WARNING: Embedded framework version is 0.1.0
+# Latest GitHub version is 0.2.5
+#
+# New features in 0.2.5:
+# - Native Playwright support for web testing
+# - Video recording for all test types
+# - Parallel test execution
+#
+# Update with: npm update -g @gadugi/agentic-test
+```
+
+The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements.
+
+**When to Update This Skill**:
+
+- New framework version adds significant features
+- Breaking changes in YAML schema
+- New application types supported
+- Agent capabilities expand
+
+## Integration with Other Skills [LEVEL 2]
+
+### Works Well With
+
+**test-gap-analyzer**:
+
+- Use test-gap-analyzer to find untested functions
+- Write outside-in tests for critical user-facing paths
+- Use unit tests (from test-gap-analyzer) for internal functions
+
+**philosophy-guardian**:
+
+- Ensure test YAML follows ruthless simplicity
+- Verify tests focus on behavior, not implementation
+
+**pr-review-assistant**:
+
+- Include outside-in tests in PR reviews
+- Verify tests cover changed functionality
+- Check test readability and clarity
+
+**module-spec-generator**:
+
+- Generate module specs that include outside-in test scenarios
+- Use specs as templates for test YAML
+
+### Example Combined Workflow
+
+```bash
+# 1. Analyze coverage gaps
+claude "Use test-gap-analyzer on ./src"
+
+# 2. Write outside-in tests for critical paths
+claude "Use qa-team to create web tests for authentication"
+
+# 3. Verify philosophy compliance
+claude "Use philosophy-guardian to review new test files"
+
+# 4. Include in PR
+git add tests/agentic/
+git commit -m "Add outside-in tests for auth flow"
+```
+
+## Troubleshooting [LEVEL 2]
+
+### Test Times Out
+
+**Symptom**: Test exceeds timeout and fails
+
+**Causes**:
+
+- Application takes longer to start than expected
+- Network requests are slow
+- Element never appears (incorrect selector)
+
+**Solutions**:
+
+```yaml
+# Increase timeout
+- action: wait_for_element
+  selector: ".slow-loading-element"
+  timeout: 30s # Increase from default
+
+# Add intermediate verification
+- action: launch
+  target: "./app"
+- action: wait_for_output
+  contains: "Initializing..."
+  timeout: 5s
+- action: wait_for_output
+  contains: "Ready"
+  timeout: 20s
+```
+
+### Element Not Found
+
+**Symptom**: `verify_element` or `click` fails with "element not found"
+
+**Causes**:
+
+- Incorrect CSS selector
+- Element not yet rendered (timing issue)
+- Element in iframe or shadow DOM
+
+**Solutions**:
+
+```yaml
+# Add wait before interaction
+- action: wait_for_element
+  selector: ".target-element"
+  timeout: 10s
+- action: click
+  selector: ".target-element"
+
+# Use more specific selector
+- action: click
+  selector: "button[data-testid='submit-button']"
+
+# Handle iframe
+- action: switch_to_iframe
+  selector: "iframe#payment-frame"
+- action: click
+  selector: ".pay-now-button"
+```
+
+### Test Passes Locally, Fails in CI
+
+**Symptom**: Test works on dev machine but fails in CI environment
+
+**Causes**:
+
+- Different screen size (web/Electron)
+- Missing dependencies
+- Timing differences (slower CI machines)
+- Environment variable differences
+
+**Solutions**:
+
+```yaml
+# Set explicit viewport size (web/Electron)
+scenario:
+  environment:
+    viewport:
+      width: 1920
+      height: 1080
+
+# Add longer timeouts in CI
+- action: wait_for_element
+  selector: ".element"
+  timeout: 30s  # Generous for CI
+
+# Verify prerequisites
+prerequisites:
+  - "Chrome browser installed"
+  - "Environment variable API_KEY is set"
+```
+
+### Output Doesn't Match Expected
+
+**Symptom**: `verify_output` fails even though output looks correct
+
+**Causes**:
+
+- Extra whitespace or newlines
+- ANSI color codes in output
+- Case sensitivity
+
+**Solutions**:
+
+```yaml
+# Use flexible matching
+- action: verify_output
+  matches: "Result:\\s+Success" # Allow flexible whitespace
+
+# Strip ANSI codes
+- action: verify_output
+  contains: "Success"
+  strip_ansi: true
+
+# Case-insensitive match
+- action: verify_output
+  contains: "success"
+  case_sensitive: false
+```
+
+## Reference: Action Catalog [LEVEL 3]
+
+### CLI Actions
+
+| Action             | Parameters                       | Description                            |
+| ------------------ | -------------------------------- | -------------------------------------- |
+| `launch`           | `target`, `args`, `cwd`, `env`   | Start CLI application                  |
+| `send_input`       | `value`, `delay`                 | Send text to stdin                     |
+| `send_signal`      | `signal`                         | Send OS signal (SIGINT, SIGTERM, etc.) |
+| `wait_for_output`  | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr         |
+| `verify_output`    | `contains`, `matches`, `stream`  | Check output content                   |
+| `verify_exit_code` | `expected`                       | Validate exit code                     |
+| `capture_output`   | `save_as`, `stream`              | Save output to file                    |
+
+### TUI Actions
+
+| Action               | Parameters                        | Description              |
+| -------------------- | --------------------------------- | ------------------------ |
+| `launch`             | `target`, `args`, `terminal_size` | Start TUI application    |
+| `send_keypress`      | `value`, `times`, `modifiers`     | Send keyboard input      |
+| `wait_for_screen`    | `contains`, `timeout`             | Wait for text on screen  |
+| `verify_screen`      | `contains`, `matches`, `region`   | Check screen content     |
+| `capture_screenshot` | `save_as`                         | Save terminal screenshot |
+| `navigate_menu`      | `path`                            | Navigate menu structure  |
+| `fill_form`          | `fields`                          | Fill TUI form fields     |
+
+### Web Actions
+
+| Action             | Parameters                                | Description            |
+| ------------------ | ----------------------------------------- | ---------------------- |
+| `navigate`         | `url`, `wait_for_load`                    | Go to URL              |
+| `click`            | `selector`, `text`, `nth`                 | Click element          |
+| `type`             | `selector`, `value`, `delay`              | Type into input        |
+| `wait_for_element` | `selector`, `timeout`, `disappears`       | Wait for element       |
+| `verify_element`   | `selector`, `contains`, `count`, `exists` | Check element state    |
+| `verify_url`       | `equals`, `contains`, `matches`           | Validate URL           |
+| `screenshot`       | `save_as`, `selector`, `full_page`        | Capture screenshot     |
+| `scroll`           | `selector`, `direction`, `amount`         | Scroll page/element    |
+| `select_option`    | `selector`, `value`                       | Select dropdown option |
+| `checkbox`         | `selector`, `checked`                     | Check/uncheck checkbox |
+
+### Electron Actions
+
+| Action          | Parameters                             | Description                |
+| --------------- | -------------------------------------- | -------------------------- |
+| `launch`        | `target`, `args`, `wait_for_window`    | Start Electron app         |
+| `window_action` | `window`, `action`                     | Interact with windows      |
+| `menu_click`    | `path`                                 | Click menu items           |
+| `dialog_action` | `type`, `action`, `filename`           | Handle dialogs             |
+| `ipc_send`      | `channel`, `data`                      | Send IPC message           |
+| `ipc_expect`    | `channel`, `timeout`                   | Wait for IPC message       |
+| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state         |
+| All web actions |                                        | Electron includes Chromium |
+
+### Common Parameters
+
+| Parameter             | Type       | Description                          |
+| --------------------- | ---------- | ------------------------------------ |
+| `timeout`             | Duration   | Maximum wait time (e.g., "5s", "2m") |
+| `description`         | String     | Human-readable step explanation      |
+| `continue_on_failure` | Boolean    | Don't fail scenario if step fails    |
+| `id`                  | String     | Step identifier for conditionals     |
+| `condition`           | Expression | Execute step only if condition true  |
+
+## Quick Reference: YAML Template [LEVEL 1]
+
+```yaml
+scenario:
+  # Required fields
+  name: "Test Name"
+  description: "What this test verifies"
+  type: cli | tui | web | electron
+
+  # Optional metadata
+  tags: [smoke, critical]
+  timeout: 60s
+
+  # What must be true before running
+  prerequisites:
+    - "Condition 1"
+    - "Condition 2"
+
+  # Environment setup
+  environment:
+    variables:
+      VAR_NAME: "value"
+
+  # Variables for templating
+  variables:
+    username: "testuser"
+
+  # Test steps (executed in order)
+  steps:
+    - action: launch
+      target: "./app"
+
+    - action: send_input
+      value: "command\n"
+
+    - action: verify_output
+      contains: "Success"
+      timeout: 5s
+
+  # Cleanup (always runs)
+  cleanup:
+    - action: stop_application
+```
+
+## Skill Usage Examples [LEVEL 1]
+
+### Invoke This Skill
+
+```
+Claude, use the qa-team skill to create a CLI test for my calculator app.
+
+Claude, use qa-team to generate web tests for the login flow.
+
+Claude, create TUI tests using the qa-team skill for file navigation.
+```
+
+### What You'll Get
+
+1. **Complete YAML test scenario** matching your requirements
+2. **Inline documentation** explaining each section
+3. **Best practices applied** (timeouts, waits, verification)
+4. **Appropriate complexity level** (1, 2, or 3 based on needs)
+5. **Instructions for running** the test
+
+### Example Request and Response
+
+**User**: "Create a web test for user login with valid credentials"
+
+**Claude** (using this skill):
+
+```yaml
+scenario:
+  name: "User Login - Valid Credentials"
+  description: "Verifies user can successfully log in with valid email and password"
+  type: web
+  tags: [smoke, auth, critical]
+
+  prerequisites:
+    - "User test@example.com exists with password 'TestPass123!'"
+    - "Application is running on http://localhost:3000"
+
+  steps:
+    # Navigate to login page
+    - action: navigate
+      url: "http://localhost:3000/login"
+      description: "Open login page"
+
+    # Wait for page to fully load
+    - action: wait_for_element
+      selector: "form.login-form"
+      timeout: 5s
+      description: "Wait for login form to render"
+
+    # Fill in email field
+    - action: type
+      selector: "input[name='email']"
+      value: "test@example.com"
+      description: "Enter email address"
+
+    # Fill in password field
+    - action: type
+      selector: "input[name='password']"
+      value: "TestPass123!"
+      description: "Enter password"
+
+    # Submit form
+    - action: click
+      selector: "button[type='submit']"
+      description: "Click login button"
+
+    # Wait for redirect to dashboard
+    - action: wait_for_url
+      contains: "/dashboard"
+      timeout: 10s
+      description: "Should redirect to dashboard after successful login"
+
+    # Verify user is logged in
+    - action: verify_element
+      selector: ".user-profile"
+      contains: "test@example.com"
+      description: "User email should appear in profile widget"
+
+    # Capture evidence
+    - action: screenshot
+      save_as: "successful-login.png"
+      description: "Save screenshot of logged-in state"
+# To run this test:
+# 1. Save as test-login-valid.yaml
+# 2. Run: gadugi-test run test-login-valid.yaml
+# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/
+```
+
+## Related Resources [LEVEL 1]
+
+### Official Documentation
+
+- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test
+- **Framework Docs**: See repo README and docs/ folder
+- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
+
+## Level 4: Shadow Environment Integration [LEVEL 4]
+
+Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments.
+
+### Why Use Shadow Environments for Testing
+
+1. **Clean State**: Fresh container, no host pollution
+2. **Local Changes**: Test uncommitted code exactly as-is
+3. **Multi-Repo**: Coordinate changes across multiple repos
+4. **CI Parity**: What shadow sees ≈ what CI will see
+
+### Shadow Testing Workflow
+
+For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests:
+
+#### Pattern 1: CLI Tests in Shadow (Amplifier)
+
+```python
+# Create shadow with your local library changes
+shadow.create(local_sources=["~/repos/my-lib:org/my-lib"])
+
+# Run outside-in test scenarios inside shadow
+shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml")
+
+# Extract evidence
+shadow.extract(shadow_id, "/evidence", "./test-evidence")
+
+# Cleanup
+shadow.destroy(shadow_id)
+```
+
+#### Pattern 2: CLI Tests in Shadow (Standalone)
+
+```bash
+# Create shadow with local changes
+amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test
+
+# Run your test scenarios
+amplifier-shadow exec test "gadugi-test run test-scenario.yaml"
+
+# Extract results
+amplifier-shadow extract test /evidence ./test-evidence
+
+# Cleanup
+amplifier-shadow destroy test
+```
+
+#### Pattern 3: Multi-Repo Integration Test
+
+```yaml
+# test-multi-repo.yaml
+scenario:
+  name: "Multi-Repo Integration Test"
+  type: cli
+
+  prerequisites:
+    - "Shadow environment with core-lib and cli-tool"
+
+  steps:
+    - action: launch
+      target: "cli-tool"
+
+    - action: send_input
+      value: "process --lib core-lib\n"
+
+    - action: verify_output
+      contains: "Success: Using core-lib"
+```
+
+```bash
+# Setup shadow with both repos
+amplifier-shadow create \
+  --local ~/repos/core-lib:org/core-lib \
+  --local ~/repos/cli-tool:org/cli-tool \
+  --name multi-test
+
+# Run test that exercises both
+amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml"
+```
+
+#### Pattern 4: Web App Testing in Shadow
+
+```yaml
+# test-web-app.yaml
+scenario:
+  name: "Web App with Local Library"
+  type: web
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000"
+
+    - action: click
+      selector: "button.process"
+
+    - action: verify_element
+      selector: ".result"
+      contains: "Processed with v2.0" # Your local version
+```
+
+```bash
+# Shadow with library changes
+amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test
+
+# Start web app inside shadow (uses your local lib)
+amplifier-shadow exec web-test "
+  cd /workspace &&
+  git clone https://github.com/org/web-app &&
+  cd web-app &&
+  npm install &&  # Pulls your local my-lib via git URL rewriting
+  npm start &
+"
+
+# Wait for app to start, then run tests
+amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml"
+```
+
+### Verification Best Practices
+
+When running tests in shadow, always verify your local sources are being used:
+
+```bash
+# After shadow.create, check snapshot commits
+shadow.status(shadow_id)
+# Shows: snapshot_commits: {"org/my-lib": "abc1234..."}
+
+# When your test installs dependencies, verify commit matches
+# Look in test output for: my-lib @ git+...@abc1234
+```
+
+### Complete Example: Library Change Validation
+
+```yaml
+# test-library-change.yaml - Outside-in test
+scenario:
+  name: "Validate Library Breaking Change"
+  type: cli
+  description: "Test that dependent app still works with new library API"
+
+  steps:
+    - action: launch
+      target: "/workspace/org/dependent-app/cli.py"
+
+    - action: send_input
+      value: "process data.json\n"
+
+    - action: verify_output
+      contains: "Processed successfully"
+      description: "New library API should still work"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+```bash
+# Complete workflow
+# 1. Create shadow with your breaking change
+amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test
+
+# 2. Install dependent app (pulls your local lib)
+amplifier-shadow exec breaking-test "
+  cd /workspace &&
+  git clone https://github.com/org/dependent-app &&
+  cd dependent-app &&
+  pip install -e . &&  # This installs git+https://github.com/org/my-lib (your local version)
+  echo 'Ready to test'
+"
+
+# 3. Run outside-in test
+amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml"
+
+# If test passes, your breaking change is compatible!
+# If test fails, you've caught the issue before pushing
+```
+
+### When to Use Shadow Integration
+
+Use shadow + outside-in tests when:
+
+- ✅ Testing library changes with dependent projects
+- ✅ Validating multi-repo coordinated changes
+- ✅ Need clean-state validation before pushing
+- ✅ Want to catch integration issues early
+- ✅ Testing that setup/install procedures work
+
+Don't use shadow for:
+
+- ❌ Simple unit tests (too much overhead)
+- ❌ Tests of already-committed code (shadow adds no value)
+- ❌ Performance testing (container overhead skews results)
+
+### Learn More
+
+For complete shadow environment documentation, including:
+
+- Shell scripts for DIY setup
+- Docker Compose examples
+- Multi-language support (Python, Node, Rust, Go)
+- Troubleshooting and verification techniques
+
+**Load the shadow-testing skill**:
+
+```
+Claude, use the shadow-testing skill to set up a shadow environment
+```
+
+Or for Amplifier users, the shadow tool is built-in:
+
+```python
+shadow.create(local_sources=["~/repos/lib:org/lib"])
+```
+
+---
+
+### Related Skills
+
+- **shadow-testing**: Complete shadow environment setup and usage
+- **test-gap-analyzer**: Find untested code paths
+- **philosophy-guardian**: Review test philosophy compliance
+- **pr-review-assistant**: Include tests in PR reviews
+- **module-spec-generator**: Generate specs with test scenarios
+
+### Further Reading
+
+- Outside-in vs inside-out testing approaches
+- Behavior-driven development (BDD) principles
+- AI-powered testing best practices
+- Test automation patterns
+- Shadow environment testing methodology
+
+## Changelog [LEVEL 3]
+
+### Version 1.1.0 (2026-01-29)
+
+- **NEW**: Level 4 - Shadow Environment Integration
+- Added complete shadow testing workflow patterns
+- Integration examples for Amplifier native and standalone CLI
+- Multi-repo integration test patterns
+- Web app testing in shadow environments
+- Complete workflow example for library change validation
+- References to shadow-testing skill for deep-dive documentation
+
+### Version 1.0.0 (2025-11-16)
+
+- Initial skill release
+- Support for CLI, TUI, Web, and Electron applications
+- 15 complete working examples
+- Progressive disclosure levels (1, 2, 3)
+- Embedded gadugi-agentic-test framework documentation (v0.1.0)
+- Freshness check script for version monitoring
+- Full integration with amplihack philosophy
+- Comprehensive troubleshooting guide
+- Action reference catalog
+
+---
+
+**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows.
+
+Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen.
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/cli/calculator-basic.yaml b/amplifier-bundle/skills/qa-team/examples/cli/calculator-basic.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/cli/calculator-basic.yaml
rename to amplifier-bundle/skills/qa-team/examples/cli/calculator-basic.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/cli/cli-error-handling.yaml b/amplifier-bundle/skills/qa-team/examples/cli/cli-error-handling.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/cli/cli-error-handling.yaml
rename to amplifier-bundle/skills/qa-team/examples/cli/cli-error-handling.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml b/amplifier-bundle/skills/qa-team/examples/cli/cli-interactive-session.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml
rename to amplifier-bundle/skills/qa-team/examples/cli/cli-interactive-session.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml b/amplifier-bundle/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml
rename to amplifier-bundle/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml b/amplifier-bundle/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml
rename to amplifier-bundle/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml b/amplifier-bundle/skills/qa-team/examples/electron/electron-ipc-testing.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml
rename to amplifier-bundle/skills/qa-team/examples/electron/electron-ipc-testing.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml b/amplifier-bundle/skills/qa-team/examples/electron/electron-menu-testing.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml
rename to amplifier-bundle/skills/qa-team/examples/electron/electron-menu-testing.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml b/amplifier-bundle/skills/qa-team/examples/electron/multi-window-coordination.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml
rename to amplifier-bundle/skills/qa-team/examples/electron/multi-window-coordination.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/electron/single-window-basic.yaml b/amplifier-bundle/skills/qa-team/examples/electron/single-window-basic.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/electron/single-window-basic.yaml
rename to amplifier-bundle/skills/qa-team/examples/electron/single-window-basic.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml b/amplifier-bundle/skills/qa-team/examples/tui/file-manager-navigation.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml
rename to amplifier-bundle/skills/qa-team/examples/tui/file-manager-navigation.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/tui/tui-form-validation.yaml b/amplifier-bundle/skills/qa-team/examples/tui/tui-form-validation.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/tui/tui-form-validation.yaml
rename to amplifier-bundle/skills/qa-team/examples/tui/tui-form-validation.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml b/amplifier-bundle/skills/qa-team/examples/tui/tui-performance-monitoring.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml
rename to amplifier-bundle/skills/qa-team/examples/tui/tui-performance-monitoring.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml b/amplifier-bundle/skills/qa-team/examples/web/dashboard-smoke-test.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml
rename to amplifier-bundle/skills/qa-team/examples/web/dashboard-smoke-test.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/web/web-authentication-flow.yaml b/amplifier-bundle/skills/qa-team/examples/web/web-authentication-flow.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/web/web-authentication-flow.yaml
rename to amplifier-bundle/skills/qa-team/examples/web/web-authentication-flow.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/examples/web/web-visual-regression.yaml b/amplifier-bundle/skills/qa-team/examples/web/web-visual-regression.yaml
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/examples/web/web-visual-regression.yaml
rename to amplifier-bundle/skills/qa-team/examples/web/web-visual-regression.yaml
diff --git a/amplifier-bundle/skills/outside-in-testing/scripts/check-freshness.py b/amplifier-bundle/skills/qa-team/scripts/check-freshness.py
similarity index 100%
rename from amplifier-bundle/skills/outside-in-testing/scripts/check-freshness.py
rename to amplifier-bundle/skills/qa-team/scripts/check-freshness.py
diff --git a/amplifier-bundle/skills/outside-in-testing/tests/test_skill_examples.py b/amplifier-bundle/skills/qa-team/tests/test_skill_examples.py
similarity index 98%
rename from amplifier-bundle/skills/outside-in-testing/tests/test_skill_examples.py
rename to amplifier-bundle/skills/qa-team/tests/test_skill_examples.py
index 5b57472e6..7ffeb669b 100644
--- a/amplifier-bundle/skills/outside-in-testing/tests/test_skill_examples.py
+++ b/amplifier-bundle/skills/qa-team/tests/test_skill_examples.py
@@ -1,5 +1,5 @@
 """
-Tests for outside-in-testing skill example YAML files.
+Tests for qa-team skill example YAML files.
 
 Validates that all example YAML files are:
 - Valid YAML syntax
@@ -276,6 +276,7 @@ def test_skill_has_yaml_frontmatter(self):
 
         # Check required frontmatter fields
         assert "name" in metadata, "Frontmatter missing 'name'"
+        assert metadata["name"] == "qa-team", "Frontmatter name should be 'qa-team'"
         assert "description" in metadata, "Frontmatter missing 'description'"
         assert "version" in metadata, "Frontmatter missing 'version'"
         assert "embedded_framework_version" in metadata, (
diff --git a/docs/claude/skills/outside-in-testing/README.md b/docs/claude/skills/outside-in-testing/README.md
deleted file mode 100644
index 9eb17ee84..000000000
--- a/docs/claude/skills/outside-in-testing/README.md
+++ /dev/null
@@ -1,765 +0,0 @@
-# Outside-In Testing Skill
-
-## Overview
-
-The Outside-In Testing Skill helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate.
-
-**Key Benefits**:
-
-- Tests survive refactoring (implementation changes don't break tests)
-- Readable by non-developers (declarative YAML format)
-- Platform-agnostic (same structure for CLI, TUI, Web, Electron)
-- AI-powered execution (agents handle complex interactions)
-- Evidence-based validation (screenshots, logs, output captures)
-
-## What is Outside-In Testing?
-
-**Traditional Testing** (Inside-Out):
-
-```python
-# Knows internal implementation
-def test_user_service():
-    service = UserService()
-    user = service.create_user("test@example.com")
-    assert user.id is not None
-    assert user.email == "test@example.com"
-    assert user.created_at <= datetime.now()  # Internal state
-```
-
-**Outside-In Testing**:
-
-```yaml
-# Only knows external behavior
-scenario:
-  name: "User Registration"
-  type: web
-  steps:
-    - action: navigate
-      url: "/register"
-    - action: type
-      selector: "#email"
-      value: "test@example.com"
-    - action: click
-      selector: "button[type=submit]"
-    - action: verify_url
-      contains: "/welcome"
-```
-
-The outside-in test verifies the same functionality but:
-
-- Doesn't depend on internal classes (`UserService`)
-- Doesn't check internal state (`created_at`, `id`)
-- Tests from user's perspective (what they see and do)
-- Remains valid even if implementation completely changes
-
-## When to Use This Skill
-
-### Perfect Scenarios
-
-1. **Smoke Testing** - Quickly verify critical paths work
-2. **Acceptance Testing** - Validate features meet requirements
-3. **Regression Testing** - Ensure changes don't break existing behavior
-4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach
-5. **Refactoring Safety** - Tests protect behavior during rewrites
-6. **Documentation as Tests** - YAML doubles as executable specifications
-
-### Complementary to Unit Tests
-
-Outside-in tests work best alongside unit tests:
-
-- **Unit Tests** (60%): Internal logic, edge cases, error handling
-- **Integration Tests** (30%): Component interactions, API contracts
-- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths
-
-Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation.
-
-## Quick Start
-
-### 1. Install Framework
-
-**Option A: From GitHub (Recommended - Latest)**
-
-```bash
-# Install globally
-npm install -g github:rysweet/gadugi-agentic-test
-
-# Or use with npx
-npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml
-
-# Or clone and build
-git clone https://github.com/rysweet/gadugi-agentic-test
-cd gadugi-agentic-test
-npm install
-npm run build
-node dist/cli.js run scenarios/your-test.yaml
-```
-
-**Option B: From npm (when published)**
-
-```bash
-npm install -g gadugi-agentic-test
-gadugi-test run test.yaml
-```
-
-**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below.
-
-### 2. Create Your First Test
-
-Save as `test-hello.yaml`:
-
-```yaml
-scenario:
-  name: "Hello World Test"
-  description: "Verify application prints greeting"
-  type: cli
-
-  steps:
-    - action: launch
-      target: "./hello-world"
-
-    - action: verify_output
-      contains: "Hello, World!"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-### 3. Run the Test
-
-**If installed globally**:
-
-```bash
-gadugi-test run test-hello.yaml
-```
-
-**If using from source**:
-
-```bash
-cd /path/to/gadugi-agentic-test
-node dist/cli.js run /path/to/test-hello.yaml
-```
-
-**Run all tests in directory**:
-
-```bash
-node dist/cli.js run -d ./my-test-scenarios
-```
-
-### 4. Review Results
-
-The framework generates evidence in `./evidence/`:
-
-- Execution logs
-- Output captures
-- Screenshots (for TUI/Web/Electron)
-- Timing data
-- HTML report
-
-## Supported Application Types
-
-### CLI (Command-Line Interface)
-
-Test command-line tools, scripts, and utilities:
-
-```yaml
-scenario:
-  name: "Git Status Test"
-  type: cli
-  steps:
-    - action: launch
-      target: "git"
-      args: ["status"]
-    - action: verify_output
-      contains: "On branch"
-```
-
-**Common Use Cases**:
-
-- Package managers (npm, pip, cargo)
-- Build tools (make, gradle, webpack)
-- DevOps tools (docker, kubectl, terraform)
-- Custom CLI applications
-
-### TUI (Terminal User Interface)
-
-Test interactive terminal applications:
-
-```yaml
-scenario:
-  name: "TUI Navigation"
-  type: tui
-  steps:
-    - action: launch
-      target: "./file-manager"
-    - action: send_keypress
-      value: "down"
-      times: 3
-    - action: verify_screen
-      contains: "> documents/"
-```
-
-**Common Use Cases**:
-
-- System monitors (htop, top)
-- Text editors (vim, nano)
-- File managers (ranger, midnight commander)
-- Custom TUI dashboards
-
-### Web Applications
-
-Test browser-based applications:
-
-```yaml
-scenario:
-  name: "Web Dashboard Test"
-  type: web
-  steps:
-    - action: navigate
-      url: "http://localhost:3000"
-    - action: verify_element
-      selector: "h1"
-      contains: "Dashboard"
-```
-
-**Common Use Cases**:
-
-- SPAs (React, Vue, Angular apps)
-- Admin panels
-- E-commerce sites
-- SaaS applications
-
-### Electron Applications
-
-Test desktop apps built with Electron:
-
-```yaml
-scenario:
-  name: "Desktop App Test"
-  type: electron
-  steps:
-    - action: launch
-      target: "./dist/my-app"
-    - action: verify_window
-      title: "My Application"
-```
-
-**Common Use Cases**:
-
-- Code editors (VS Code-like apps)
-- Chat applications (Slack, Discord clones)
-- Productivity tools
-- Custom desktop applications
-
-## Progressive Learning Path
-
-The skill teaches testing in three levels:
-
-### Level 1: Fundamentals (Start Here)
-
-- Basic test structure (YAML anatomy)
-- Single-action tests
-- Simple verification
-- Smoke tests
-
-**Examples**:
-
-- `examples/cli/calculator-basic.yaml`
-- `examples/tui/file-manager-navigation.yaml`
-- `examples/web/dashboard-smoke-test.yaml`
-- `examples/electron/single-window-basic.yaml`
-
-### Level 2: Intermediate
-
-- Multi-step workflows
-- Conditional logic
-- Error handling
-- Variables and templating
-
-**Examples**:
-
-- `examples/cli/cli-error-handling.yaml`
-- `examples/tui/tui-form-validation.yaml`
-- `examples/web/web-authentication-flow.yaml`
-- `examples/electron/multi-window-coordination.yaml`
-
-### Level 3: Advanced
-
-- Custom comprehension agents
-- Visual regression testing
-- Performance validation
-- IPC testing (Electron)
-
-**Examples**:
-
-- `examples/tui/tui-performance-monitoring.yaml`
-- `examples/electron/electron-ipc-testing.yaml`
-- `examples/custom-agents/custom-comprehension-agent.yaml`
-- `examples/custom-agents/custom-reporter-integration.yaml`
-
-## Example Library
-
-This skill includes **15 complete working examples**:
-
-### CLI (3 examples)
-
-- Basic calculator operations [Level 1]
-- Error handling and recovery [Level 2]
-- Interactive session management [Level 2]
-
-### TUI (3 examples)
-
-- File manager navigation [Level 1]
-- Form validation [Level 2]
-- Performance monitoring [Level 3]
-
-### Web (3 examples)
-
-- Dashboard smoke test [Level 1]
-- Authentication flow [Level 2]
-- Visual regression [Level 2]
-
-### Electron (4 examples)
-
-- Single window basics [Level 1]
-- Multi-window coordination [Level 2]
-- Menu interactions [Level 2]
-- IPC testing [Level 3]
-
-### Custom Agents (2 examples)
-
-- Domain-specific comprehension [Level 3]
-- Custom reporting [Level 3]
-
-All examples include:
-
-- Complete working YAML
-- Inline documentation
-- Expected output
-- Prerequisites
-- Level indicators
-
-## Using This Skill in Claude
-
-### Invoke the Skill
-
-```
-Claude, use the outside-in-testing skill to create a CLI test for my calculator app.
-
-Claude, use outside-in-testing to generate web tests for user login.
-
-Claude, create Electron tests using outside-in-testing for my desktop app.
-```
-
-### What You'll Receive
-
-1. **Complete YAML test scenario** matching your requirements
-2. **Inline comments** explaining each section
-3. **Best practices** applied (timeouts, waits, verification)
-4. **Appropriate complexity** (Level 1, 2, or 3 based on needs)
-5. **Instructions** for running the test
-
-### Example Interaction
-
-**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard"
-
-**Claude** (using this skill): Generates a complete Level 2 YAML scenario with:
-
-- Navigation to login page
-- Form filling (email, password)
-- Submit button click
-- URL verification (redirected to dashboard)
-- Element verification (user profile visible)
-- Screenshot capture
-- Proper timeouts and waits
-
-## Integration with Amplihack Philosophy
-
-This skill embodies amplihack's core principles:
-
-### Ruthless Simplicity
-
-- Declarative YAML over complex code
-- Minimal boilerplate
-- Focus on behavior, not implementation
-
-### Modular Design (Bricks & Studs)
-
-- Self-contained test scenarios
-- Clear action contracts
-- Composable test steps
-
-### Zero-BS Implementation
-
-- No stubs or placeholders
-- Every example is runnable
-- Clear, actionable error messages
-
-### Outside-In Thinking
-
-- User perspective first
-- Implementation-agnostic tests
-- Behavior-driven validation
-
-## Best Practices
-
-### 1. Start Simple
-
-Begin with basic smoke tests, then add complexity:
-
-```yaml
-# Level 1: Smoke test
-steps:
-  - action: launch
-    target: "./app"
-  - action: verify_output
-    contains: "Ready"
-
-# Level 2: Add interaction
-steps:
-  - action: launch
-    target: "./app"
-  - action: send_input
-    value: "command\n"
-  - action: verify_output
-    contains: "Success"
-```
-
-### 2. Use Descriptive Names
-
-```yaml
-# Good
-scenario:
-  name: "User Login - Valid Credentials"
-  description: "Verifies successful login with email and password"
-
-# Bad
-scenario:
-  name: "Test 1"
-```
-
-### 3. Verify Critical Paths Only
-
-Don't test every detail. Focus on user-facing behavior:
-
-```yaml
-# Good - User-visible behavior
-- action: verify_element
-  selector: ".welcome-message"
-  contains: "Welcome back"
-
-# Bad - Implementation detail
-- action: verify_element
-  selector: ".cache-status"
-  contains: "initialized"
-```
-
-### 4. Always Wait for Dynamic Content
-
-```yaml
-# Good - Wait before verification
-- action: click
-  selector: ".load-data"
-- action: wait_for_element
-  selector: ".data-table"
-  timeout: 10s
-- action: verify_element
-  selector: ".data-table"
-
-# Bad - May fail due to timing
-- action: click
-  selector: ".load-data"
-- action: verify_element
-  selector: ".data-table" # Might not exist yet!
-```
-
-### 5. Clean Up After Tests
-
-```yaml
-steps:
-  # Test steps...
-
-cleanup:
-  - action: delete_file
-    path: "./test-data.json"
-  - action: stop_application
-```
-
-## Troubleshooting
-
-### Installation Issues
-
-**Problem**: `@types/node-pty` not found error
-
-**Solution**: This was fixed in gadugi-agentic-test. If you see this:
-
-```bash
-# Update to latest version
-npm install -g github:rysweet/gadugi-agentic-test
-
-# Or if you cloned, pull latest:
-git pull origin main
-npm install
-npm run build
-```
-
-**Problem**: `tsc: command not found` when building
-
-**Solution**: TypeScript not installed
-
-```bash
-npm install  # Installs all dependencies including TypeScript
-npm run build  # Now will work
-```
-
-### Test Times Out
-
-**Problem**: Test exceeds timeout and fails
-
-**Solution**: Increase timeout for slow operations
-
-```yaml
-- action: wait_for_element
-  selector: ".slow-loading-data"
-  timeout: 30s # Generous timeout
-```
-
-### Scenario Format Issues
-
-**Problem**: "Scenario must have a name" error
-
-**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`:
-
-```yaml
-# WRONG (won't load)
-scenario:
-  name: "My Test"
-  steps: [...]
-
-# RIGHT
-name: "My Test"
-description: "What this tests"
-version: "1.0.0"
-config:
-  timeout: 120000
-steps: [...]
-```
-
-### Element Not Found
-
-**Problem**: Cannot find element to interact with
-
-**Solutions**:
-
-1. Use `wait_for_element` before interaction
-2. Verify selector is correct
-3. Check if element is in iframe
-
-```yaml
-- action: wait_for_element
-  selector: ".target"
-  timeout: 10s
-- action: click
-  selector: ".target"
-```
-
-### Flaky Tests in CI
-
-**Problem**: Tests pass locally but fail in CI
-
-**Solutions**:
-
-1. Add longer timeouts for CI environments
-2. Set explicit viewport sizes
-3. Wait for application readiness
-
-```yaml
-scenario:
-  environment:
-    viewport:
-      width: 1920
-      height: 1080
-
-  steps:
-    - action: wait_for_element
-      selector: ".app-ready"
-      timeout: 30s # Generous for CI
-```
-
-## Framework Version Check
-
-This skill embeds gadugi-agentic-test version **0.1.0**.
-
-To check for newer versions:
-
-```bash
-python scripts/check-freshness.py
-```
-
-The script compares the embedded version against the latest GitHub release and notifies you of new features.
-
-## Related Skills
-
-- **test-gap-analyzer**: Find untested code paths (unit test focus)
-- **philosophy-guardian**: Review test philosophy compliance
-- **pr-review-assistant**: Include tests in PR reviews
-- **module-spec-generator**: Generate specs with test scenarios
-
-## Resources
-
-### Documentation
-
-- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation
-- **Examples**: `examples/` - 15 complete working examples
-- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test
-
-### Getting Help
-
-- Review examples in `examples/` directory
-- Check `SKILL.md` for detailed explanations
-- See troubleshooting section in `SKILL.md`
-- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues
-
-## Quick Reference
-
-### Basic Test Template
-
-```yaml
-scenario:
-  name: "Test Name"
-  description: "What this verifies"
-  type: cli | tui | web | electron
-
-  prerequisites:
-    - "Condition 1"
-
-  steps:
-    - action: launch
-      target: "./app"
-
-    - action: verify_output
-      contains: "Expected"
-
-  cleanup:
-    - action: stop_application
-```
-
-### Common Actions
-
-**CLI**:
-
-- `launch` - Start application
-- `send_input` - Send text
-- `verify_output` - Check output
-- `verify_exit_code` - Validate exit code
-
-**TUI**:
-
-- `send_keypress` - Send keys
-- `verify_screen` - Check screen
-- `capture_screenshot` - Save screenshot
-
-**Web**:
-
-- `navigate` - Go to URL
-- `click` - Click element
-- `type` - Type text
-- `verify_element` - Check element
-
-**Electron**:
-
-- `window_action` - Control windows
-- `menu_click` - Click menus
-- `dialog_action` - Handle dialogs
-- All web actions
-
-## Success Stories
-
-Outside-in testing shines when:
-
-1. **Refactoring**: Change implementation without updating tests
-2. **Collaboration**: Non-developers can read and understand tests
-3. **Documentation**: Tests serve as executable specifications
-4. **Regression Prevention**: Catch breaking changes in critical flows
-5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron
-
-Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen.
-
----
-
-**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation.
-
-## Real-World Example: Testing amplihack Guide Agent
-
-Based on actual testing of amplihack's guide agent, here's a complete working example:
-
-### Scenario: Naive Student Learning Flow
-
-```yaml
-name: "Guide Agent - Beginner First Question"
-description: "Test how guide responds to complete beginner"
-version: "1.0.0"
-
-config:
-  timeout: 180000 # 3 minutes for AI response
-  retries: 1
-  parallel: false
-
-agents:
-  - name: "student-cli"
-    type: "system"
-    config:
-      shell: "bash"
-      cwd: "/tmp/test-student"
-      timeout: 180000
-      capture_output: true
-
-steps:
-  - name: "Student asks: What is amplihack?"
-    agent: "student-cli"
-    action: "execute_command"
-    params:
-      command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100'
-    expect:
-      exit_code: 0
-      stdout_contains:
-        - "amplihack"
-        - "AI"
-    timeout: 180000
-
-  - name: "Verify guide gives immediate action"
-    agent: "student-cli"
-    action: "execute_command"
-    params:
-      command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md"
-    expect:
-      exit_code: 0
-    timeout: 5000
-
-metadata:
-  tags: ["guide-agent", "beginner", "real-world"]
-  priority: "high"
-```
-
-### What This Tests
-
-1. **Installation via uvx** - Tests users can run without installing
-2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works
-3. **Beginner-friendly response** - Checks for immediate actionable command
-4. **Interactive elements** - Looks for TRY IT prompts
-
-### Running This Test
-
-```bash
-cd gadugi-agentic-test
-node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose
-```
-
-### What We Learned
-
-**From testing amplihack guide agent**:
-
-- Long-running AI commands need 180s+ timeouts
-- Testing in clean `/tmp` directory avoids state pollution
-- Combining `uvx --from git+...` with gadugi tests unreleased branches
-- Checking file content (guide.md) verifies features beyond just output
-- Real-world tests exposed gaps (guide showing bash commands in REPL context)
diff --git a/docs/claude/skills/outside-in-testing/README.md b/docs/claude/skills/outside-in-testing/README.md
new file mode 120000
index 000000000..40402e77f
--- /dev/null
+++ b/docs/claude/skills/outside-in-testing/README.md
@@ -0,0 +1 @@
+../qa-team/README.md
\ No newline at end of file
diff --git a/docs/claude/skills/outside-in-testing/SKILL.md b/docs/claude/skills/outside-in-testing/SKILL.md
index 91526566b..891b6fb02 100644
--- a/docs/claude/skills/outside-in-testing/SKILL.md
+++ b/docs/claude/skills/outside-in-testing/SKILL.md
@@ -1,2045 +1,16 @@
 ---
 name: outside-in-testing
 description: |
-  Generates agentic outside-in tests using gadugi-agentic-test framework for CLI, TUI, Web, and Electron apps.
-  Use when you need behavior-driven tests that verify external interfaces without internal implementation knowledge.
-  Creates YAML test scenarios that AI agents execute, observe, and validate against expected outcomes.
-  Supports progressive complexity from simple smoke tests to advanced multi-step workflows.
-version: 1.0.0
-embedded_framework_version: 0.1.0
-github_repo: https://github.com/rysweet/gadugi-agentic-test
-issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
+  Deprecated compatibility alias for qa-team.
+  Use when existing recipes or agents still invoke outside-in-testing by name.
+  Redirects future work to the qa-team skill for outside-in validation and parity loops.
+version: 1.1.0
 ---
 
-# Outside-In Testing Skill
+# outside-in-testing (Alias)
 
-## Purpose [LEVEL 1]
+`outside-in-testing` is now a compatibility alias for `qa-team`.
 
-This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate.
+Use `qa-team` for all new work. This alias remains so existing workflows, recipes, and skills that still invoke `outside-in-testing` continue to resolve cleanly while the rename propagates.
 
-**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details.
-
-## When to Use This Skill [LEVEL 1]
-
-### Perfect For
-
-- **Smoke Tests**: Quick validation that critical user flows work
-- **Behavior-Driven Testing**: Verify features from user perspective
-- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron
-- **Refactoring Safety**: Tests remain valid when implementation changes
-- **AI-Powered Testing**: Let agents handle complex interactions
-- **Documentation as Tests**: YAML scenarios double as executable specs
-
-### Use This Skill When
-
-- Starting a new project and defining expected behaviors
-- Refactoring code and need tests that won't break with internal changes
-- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps)
-- Writing acceptance criteria that can be automatically verified
-- Need tests that non-developers can read and understand
-- Want to catch regressions in critical user workflows
-- Testing complex multi-step interactions
-
-### Don't Use This Skill When
-
-- Need unit tests for internal functions (use test-gap-analyzer instead)
-- Testing performance or load characteristics
-- Need precise timing or concurrency control
-- Testing non-interactive batch processes
-- Implementation details matter more than behavior
-
-## Core Concepts [LEVEL 1]
-
-### Outside-In Testing Philosophy
-
-**Traditional Inside-Out Testing**:
-
-```python
-# Tightly coupled to implementation
-def test_calculator_add():
-    calc = Calculator()
-    result = calc.add(2, 3)
-    assert result == 5
-    assert calc.history == [(2, 3, 5)]  # Knows internal state
-```
-
-**Agentic Outside-In Testing**:
-
-```yaml
-# Implementation-agnostic behavior verification
-scenario:
-  name: "Calculator Addition"
-  steps:
-    - action: launch
-      target: "./calculator"
-    - action: send_input
-      value: "add 2 3"
-    - action: verify_output
-      contains: "Result: 5"
-```
-
-**Benefits**:
-
-- Tests survive refactoring (internal changes don't break tests)
-- Readable by non-developers (YAML is declarative)
-- Platform-agnostic (same structure for CLI/TUI/Web/Electron)
-- AI agents handle complexity (navigation, timing, screenshots)
-
-### The Gadugi Agentic Test Framework [LEVEL 2]
-
-Gadugi-agentic-test is a Python framework that:
-
-1. **Parses YAML test scenarios** with declarative steps
-2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents)
-3. **Executes actions** (launch, input, click, wait, verify)
-4. **Collects evidence** (screenshots, logs, output captures)
-5. **Validates outcomes** against expected results
-6. **Generates reports** with evidence trails
-
-**Architecture**:
-
-```
-YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine
-                                          ↓
-                     [CLI Agent, TUI Agent, Web Agent, Electron Agent]
-                                          ↓
-                           Observers → Comprehension Agent
-                                          ↓
-                                   Evidence Report
-```
-
-### Progressive Disclosure Levels [LEVEL 1]
-
-This skill teaches testing in three levels:
-
-- **Level 1: Fundamentals** - Basic single-action tests, simple verification
-- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling
-- **Level 3: Advanced** - Custom agents, visual regression, performance validation
-
-Each example is marked with its level. Start at Level 1 and progress as needed.
-
-## Quick Start [LEVEL 1]
-
-### Installation
-
-**Prerequisites (for native module compilation):**
-
-```bash
-# macOS
-xcode-select --install
-
-# Ubuntu/Debian
-sudo apt-get install -y build-essential python3
-
-# Windows: Install Visual Studio Build Tools with "Desktop development with C++"
-```
-
-**Install the framework:**
-
-```bash
-# Install globally for CLI access
-npm install -g @gadugi/agentic-test
-
-# Or install locally in your project
-npm install @gadugi/agentic-test
-
-# Verify installation
-gadugi-test --version
-```
-
-### Your First Test (CLI Example)
-
-Create `test-hello.yaml`:
-
-```yaml
-scenario:
-  name: "Hello World CLI Test"
-  description: "Verify CLI prints greeting"
-  type: cli
-
-  prerequisites:
-    - "./hello-world executable exists"
-
-  steps:
-    - action: launch
-      target: "./hello-world"
-
-    - action: verify_output
-      contains: "Hello, World!"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-Run the test:
-
-```bash
-gadugi-test run test-hello.yaml
-```
-
-Output:
-
-```
-✓ Scenario: Hello World CLI Test
-  ✓ Step 1: Launched ./hello-world
-  ✓ Step 2: Output contains "Hello, World!"
-  ✓ Step 3: Exit code is 0
-
-PASSED (3/3 steps successful)
-Evidence saved to: ./evidence/test-hello-20250116-093045/
-```
-
-### Understanding the YAML Structure [LEVEL 1]
-
-Every test scenario has this structure:
-
-```yaml
-scenario:
-  name: "Descriptive test name"
-  description: "What this test verifies"
-  type: cli | tui | web | electron
-
-  # Optional metadata
-  tags: [smoke, critical, auth]
-  timeout: 30s
-
-  # What must be true before test runs
-  prerequisites:
-    - "Condition 1"
-    - "Condition 2"
-
-  # The test steps (executed sequentially)
-  steps:
-    - action: action_name
-      parameter1: value1
-      parameter2: value2
-
-    - action: verify_something
-      expected: value
-
-  # Optional cleanup
-  cleanup:
-    - action: stop_application
-```
-
-## Application Types and Agents [LEVEL 2]
-
-### CLI Applications [LEVEL 1]
-
-**Use Case**: Command-line tools, scripts, build tools, package managers
-
-**Supported Actions**:
-
-- `launch` - Start the CLI program
-- `send_input` - Send text or commands via stdin
-- `send_signal` - Send OS signals (SIGINT, SIGTERM)
-- `wait_for_output` - Wait for specific text in stdout/stderr
-- `verify_output` - Check stdout/stderr contains/matches expected text
-- `verify_exit_code` - Validate process exit code
-- `capture_output` - Save output for later verification
-
-**Example** (see `examples/cli/calculator-basic.yaml`):
-
-```yaml
-scenario:
-  name: "CLI Calculator Basic Operations"
-  type: cli
-
-  steps:
-    - action: launch
-      target: "./calculator"
-      args: ["--mode", "interactive"]
-
-    - action: send_input
-      value: "add 5 3\n"
-
-    - action: verify_output
-      contains: "Result: 8"
-      timeout: 2s
-
-    - action: send_input
-      value: "multiply 4 7\n"
-
-    - action: verify_output
-      contains: "Result: 28"
-
-    - action: send_input
-      value: "exit\n"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-### TUI Applications [LEVEL 1]
-
-**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs)
-
-**Supported Actions**:
-
-- `launch` - Start TUI application
-- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.)
-- `wait_for_screen` - Wait for specific text to appear on screen
-- `verify_screen` - Check screen contents match expectations
-- `capture_screenshot` - Save terminal screenshot (ANSI art)
-- `navigate_menu` - Navigate menu structures
-- `fill_form` - Fill TUI form fields
-
-**Example** (see `examples/tui/file-manager-navigation.yaml`):
-
-```yaml
-scenario:
-  name: "TUI File Manager Navigation"
-  type: tui
-
-  steps:
-    - action: launch
-      target: "./file-manager"
-
-    - action: wait_for_screen
-      contains: "File Manager v1.0"
-      timeout: 3s
-
-    - action: send_keypress
-      value: "down"
-      times: 3
-
-    - action: verify_screen
-      contains: "> documents/"
-      description: "Third item should be selected"
-
-    - action: send_keypress
-      value: "enter"
-
-    - action: wait_for_screen
-      contains: "documents/"
-      timeout: 2s
-
-    - action: capture_screenshot
-      save_as: "documents-view.txt"
-```
-
-### Web Applications [LEVEL 1]
-
-**Use Case**: Web apps, dashboards, SPAs, admin panels
-
-**Supported Actions**:
-
-- `navigate` - Go to URL
-- `click` - Click element by selector or text
-- `type` - Type into input fields
-- `wait_for_element` - Wait for element to appear
-- `verify_element` - Check element exists/contains text
-- `verify_url` - Validate current URL
-- `screenshot` - Capture browser screenshot
-- `scroll` - Scroll page or element
-
-**Example** (see `examples/web/dashboard-smoke-test.yaml`):
-
-```yaml
-scenario:
-  name: "Dashboard Smoke Test"
-  type: web
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000/dashboard"
-
-    - action: wait_for_element
-      selector: "h1.dashboard-title"
-      timeout: 5s
-
-    - action: verify_element
-      selector: "h1.dashboard-title"
-      contains: "Analytics Dashboard"
-
-    - action: verify_element
-      selector: ".widget-stats"
-      count: 4
-      description: "Should have 4 stat widgets"
-
-    - action: click
-      selector: "button.refresh-data"
-
-    - action: wait_for_element
-      selector: ".loading-spinner"
-      disappears: true
-      timeout: 10s
-
-    - action: screenshot
-      save_as: "dashboard-loaded.png"
-```
-
-### Electron Applications [LEVEL 2]
-
-**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones)
-
-**Supported Actions**:
-
-- `launch` - Start Electron app
-- `window_action` - Interact with windows (focus, minimize, close)
-- `menu_click` - Click application menu items
-- `dialog_action` - Handle native dialogs (open file, save, confirm)
-- `ipc_send` - Send IPC message to main process
-- `verify_window` - Check window state/properties
-- All web actions (since Electron uses Chromium)
-
-**Example** (see `examples/electron/single-window-basic.yaml`):
-
-```yaml
-scenario:
-  name: "Electron Single Window Test"
-  type: electron
-
-  steps:
-    - action: launch
-      target: "./dist/my-app"
-      wait_for_window: true
-      timeout: 10s
-
-    - action: verify_window
-      title: "My Application"
-      visible: true
-
-    - action: menu_click
-      path: ["File", "New Document"]
-
-    - action: wait_for_element
-      selector: ".document-editor"
-
-    - action: type
-      selector: ".document-editor"
-      value: "Hello from test"
-
-    - action: menu_click
-      path: ["File", "Save"]
-
-    - action: dialog_action
-      type: save_file
-      filename: "test-document.txt"
-
-    - action: verify_window
-      title_contains: "test-document.txt"
-```
-
-## Test Scenario Anatomy [LEVEL 2]
-
-### Metadata Section
-
-```yaml
-scenario:
-  name: "Clear descriptive name"
-  description: "Detailed explanation of what this test verifies"
-  type: cli | tui | web | electron
-
-  # Optional fields
-  tags: [smoke, regression, auth, payment]
-  priority: high | medium | low
-  timeout: 60s # Overall scenario timeout
-  retry_on_failure: 2 # Retry count
-
-  # Environment requirements
-  environment:
-    variables:
-      API_URL: "http://localhost:8080"
-      DEBUG: "true"
-    files:
-      - "./config.json must exist"
-```
-
-### Prerequisites
-
-Prerequisites are conditions that must be true before the test runs. The framework validates these before execution.
-
-```yaml
-prerequisites:
-  - "./application binary exists"
-  - "Port 8080 is available"
-  - "Database is running"
-  - "User account test@example.com exists"
-  - "File ./test-data.json exists"
-```
-
-If prerequisites fail, the test is skipped (not failed).
-
-### Steps
-
-Steps execute sequentially. Each step has:
-
-- **action**: Required - the action to perform
-- **Parameters**: Action-specific parameters
-- **description**: Optional - human-readable explanation
-- **timeout**: Optional - step-specific timeout
-- **continue_on_failure**: Optional - don't fail scenario if step fails
-
-```yaml
-steps:
-  # Simple action
-  - action: launch
-    target: "./app"
-
-  # Action with multiple parameters
-  - action: verify_output
-    contains: "Success"
-    timeout: 5s
-    description: "App should print success message"
-
-  # Continue even if this fails
-  - action: click
-    selector: ".optional-button"
-    continue_on_failure: true
-```
-
-### Verification Actions [LEVEL 1]
-
-Verification actions check expected outcomes. They fail the test if expectations aren't met.
-
-**Common Verifications**:
-
-```yaml
-# CLI: Check output contains text
-- action: verify_output
-  contains: "Expected text"
-
-# CLI: Check output matches regex
-- action: verify_output
-  matches: "Result: \\d+"
-
-# CLI: Check exit code
-- action: verify_exit_code
-  expected: 0
-
-# Web/TUI: Check element exists
-- action: verify_element
-  selector: ".success-message"
-
-# Web/TUI: Check element contains text
-- action: verify_element
-  selector: "h1"
-  contains: "Welcome"
-
-# Web: Check URL
-- action: verify_url
-  equals: "http://localhost:3000/dashboard"
-
-# Web: Check element count
-- action: verify_element
-  selector: ".list-item"
-  count: 5
-
-# Electron: Check window state
-- action: verify_window
-  title: "My App"
-  visible: true
-  focused: true
-```
-
-### Cleanup Section
-
-Cleanup runs after all steps complete (success or failure). Use for teardown actions.
-
-```yaml
-cleanup:
-  - action: stop_application
-    force: true
-
-  - action: delete_file
-    path: "./temp-test-data.json"
-
-  - action: reset_database
-    connection: "test_db"
-```
-
-## Advanced Patterns [LEVEL 2]
-
-### Conditional Logic
-
-Execute steps based on conditions:
-
-```yaml
-steps:
-  - action: launch
-    target: "./app"
-
-  - action: verify_output
-    contains: "Login required"
-    id: login_check
-
-  # Only run if login_check passed
-  - action: send_input
-    value: "login admin password123\n"
-    condition: login_check.passed
-```
-
-### Variables and Templating [LEVEL 2]
-
-Define variables and use them throughout the scenario:
-
-```yaml
-scenario:
-  name: "Test with Variables"
-  type: cli
-
-  variables:
-    username: "testuser"
-    api_url: "http://localhost:8080"
-
-  steps:
-    - action: launch
-      target: "./app"
-      args: ["--api", "${api_url}"]
-
-    - action: send_input
-      value: "login ${username}\n"
-
-    - action: verify_output
-      contains: "Welcome, ${username}!"
-```
-
-### Loops and Repetition [LEVEL 2]
-
-Repeat actions multiple times:
-
-```yaml
-steps:
-  - action: launch
-    target: "./app"
-
-  # Repeat action N times
-  - action: send_keypress
-    value: "down"
-    times: 5
-
-  # Loop over list
-  - action: send_input
-    value: "${item}\n"
-    for_each:
-      - "apple"
-      - "banana"
-      - "cherry"
-```
-
-### Error Handling [LEVEL 2]
-
-Handle expected errors gracefully:
-
-```yaml
-steps:
-  - action: send_input
-    value: "invalid command\n"
-
-  # Verify error message appears
-  - action: verify_output
-    contains: "Error: Unknown command"
-    expected_failure: true
-
-  # App should still be running
-  - action: verify_running
-    expected: true
-```
-
-### Multi-Step Workflows [LEVEL 2]
-
-Complex scenarios with multiple phases:
-
-```yaml
-scenario:
-  name: "E-commerce Purchase Flow"
-  type: web
-
-  steps:
-    # Phase 1: Authentication
-    - action: navigate
-      url: "http://localhost:3000/login"
-
-    - action: type
-      selector: "#username"
-      value: "test@example.com"
-
-    - action: type
-      selector: "#password"
-      value: "password123"
-
-    - action: click
-      selector: "button[type=submit]"
-
-    - action: wait_for_url
-      contains: "/dashboard"
-
-    # Phase 2: Product Selection
-    - action: navigate
-      url: "http://localhost:3000/products"
-
-    - action: click
-      text: "Add to Cart"
-      nth: 1
-
-    - action: verify_element
-      selector: ".cart-badge"
-      contains: "1"
-
-    # Phase 3: Checkout
-    - action: click
-      selector: ".cart-icon"
-
-    - action: click
-      text: "Proceed to Checkout"
-
-    - action: fill_form
-      fields:
-        "#shipping-address": "123 Test St"
-        "#city": "Testville"
-        "#zip": "12345"
-
-    - action: click
-      selector: "#place-order"
-
-    - action: wait_for_element
-      selector: ".order-confirmation"
-      timeout: 10s
-
-    - action: verify_element
-      selector: ".order-number"
-      exists: true
-```
-
-## Level 3: Advanced Topics [LEVEL 3]
-
-### Custom Comprehension Agents
-
-The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic.
-
-**Default Comprehension Agent**:
-
-- Observes raw output (text, HTML, screenshots)
-- Applies general reasoning to verify expectations
-- Returns pass/fail with explanation
-
-**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`):
-
-```yaml
-scenario:
-  name: "Financial Dashboard Test with Custom Agent"
-  type: web
-
-  # Define custom comprehension logic
-  comprehension_agent:
-    model: "gpt-4"
-    system_prompt: |
-      You are a financial data validator. When verifying dashboard content:
-      1. All monetary values must use proper formatting ($1,234.56)
-      2. Percentages must include % symbol
-      3. Dates must be in MM/DD/YYYY format
-      4. Negative values must be red
-      5. Chart data must be logically consistent
-
-      Be strict about formatting and data consistency.
-
-    examples:
-      - input: "Total Revenue: 45000"
-        output: "FAIL - Missing currency symbol and comma separator"
-      - input: "Total Revenue: $45,000.00"
-        output: "PASS - Correctly formatted"
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000/financial-dashboard"
-
-    - action: verify_element
-      selector: ".revenue-widget"
-      use_custom_comprehension: true
-      description: "Revenue should be properly formatted"
-```
-
-### Visual Regression Testing [LEVEL 3]
-
-Compare screenshots against baseline images:
-
-```yaml
-scenario:
-  name: "Visual Regression - Homepage"
-  type: web
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000"
-
-    - action: wait_for_element
-      selector: ".page-loaded"
-
-    - action: screenshot
-      save_as: "homepage.png"
-
-    - action: visual_compare
-      screenshot: "homepage.png"
-      baseline: "./baselines/homepage-baseline.png"
-      threshold: 0.05 # 5% difference allowed
-      highlight_differences: true
-```
-
-### Performance Validation [LEVEL 3]
-
-Measure and validate performance metrics:
-
-```yaml
-scenario:
-  name: "Performance - Dashboard Load Time"
-  type: web
-
-  performance:
-    metrics:
-      - page_load_time
-      - first_contentful_paint
-      - time_to_interactive
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000/dashboard"
-      measure_timing: true
-
-    - action: verify_performance
-      metric: page_load_time
-      less_than: 3000 # 3 seconds
-
-    - action: verify_performance
-      metric: first_contentful_paint
-      less_than: 1500 # 1.5 seconds
-```
-
-### Multi-Window Coordination (Electron) [LEVEL 3]
-
-Test applications with multiple windows:
-
-```yaml
-scenario:
-  name: "Multi-Window Chat Application"
-  type: electron
-
-  steps:
-    - action: launch
-      target: "./chat-app"
-
-    - action: menu_click
-      path: ["Window", "New Chat"]
-
-    - action: verify_window
-      count: 2
-
-    - action: window_action
-      window: 1
-      action: focus
-
-    - action: type
-      selector: ".message-input"
-      value: "Hello from window 1"
-
-    - action: click
-      selector: ".send-button"
-
-    - action: window_action
-      window: 2
-      action: focus
-
-    - action: wait_for_element
-      selector: ".message"
-      contains: "Hello from window 1"
-      timeout: 5s
-```
-
-### IPC Testing (Electron) [LEVEL 3]
-
-Test Inter-Process Communication between renderer and main:
-
-```yaml
-scenario:
-  name: "Electron IPC Communication"
-  type: electron
-
-  steps:
-    - action: launch
-      target: "./my-app"
-
-    - action: ipc_send
-      channel: "get-system-info"
-
-    - action: ipc_expect
-      channel: "system-info-reply"
-      timeout: 3s
-
-    - action: verify_ipc_payload
-      contains:
-        platform: "darwin"
-        arch: "x64"
-```
-
-### Custom Reporters [LEVEL 3]
-
-Generate custom test reports:
-
-```yaml
-scenario:
-  name: "Test with Custom Reporting"
-  type: cli
-
-  reporting:
-    format: custom
-    template: "./report-template.html"
-    include:
-      - screenshots
-      - logs
-      - timing_data
-      - video_recording
-
-    email:
-      enabled: true
-      recipients: ["team@example.com"]
-      on_failure_only: true
-
-  steps:
-    # ... test steps ...
-```
-
-## Framework Integration [LEVEL 2]
-
-### Running Tests
-
-**Single test**:
-
-```bash
-gadugi-test run test-scenario.yaml
-```
-
-**Multiple tests**:
-
-```bash
-gadugi-test run tests/*.yaml
-```
-
-**With options**:
-
-```bash
-gadugi-test run test.yaml \
-  --verbose \
-  --evidence-dir ./test-evidence \
-  --retry 2 \
-  --timeout 60s
-```
-
-### CI/CD Integration
-
-**GitHub Actions** (`.github/workflows/agentic-tests.yml`):
-
-```yaml
-name: Agentic Tests
-
-on: [push, pull_request]
-
-jobs:
-  test:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-
-      - name: Install gadugi-agentic-test
-        run: npm install -g @gadugi/agentic-test
-
-      - name: Run tests
-        run: gadugi-test run tests/agentic/*.yaml
-
-      - name: Upload evidence
-        if: always()
-        uses: actions/upload-artifact@v3
-        with:
-          name: test-evidence
-          path: ./evidence/
-```
-
-### Evidence Collection
-
-The framework automatically collects evidence for debugging:
-
-```
-evidence/
-  scenario-name-20250116-093045/
-    ├── scenario.yaml          # Original test scenario
-    ├── execution-log.json     # Detailed execution log
-    ├── screenshots/           # All captured screenshots
-    │   ├── step-1.png
-    │   ├── step-3.png
-    │   └── step-5.png
-    ├── output-captures/       # CLI/TUI output
-    │   ├── stdout.txt
-    │   └── stderr.txt
-    ├── timing.json            # Performance metrics
-    └── report.html            # Human-readable report
-```
-
-## Best Practices [LEVEL 2]
-
-### 1. Start Simple, Add Complexity
-
-Begin with basic smoke tests, then add detail:
-
-```yaml
-# Level 1: Basic smoke test
-steps:
-  - action: launch
-    target: "./app"
-  - action: verify_output
-    contains: "Ready"
-
-# Level 2: Add interaction
-steps:
-  - action: launch
-    target: "./app"
-  - action: send_input
-    value: "command\n"
-  - action: verify_output
-    contains: "Success"
-
-# Level 3: Add error handling and edge cases
-steps:
-  - action: launch
-    target: "./app"
-  - action: send_input
-    value: "invalid\n"
-  - action: verify_output
-    contains: "Error"
-  - action: send_input
-    value: "command\n"
-  - action: verify_output
-    contains: "Success"
-```
-
-### 2. Use Descriptive Names and Descriptions
-
-```yaml
-# Bad
-scenario:
-  name: "Test 1"
-  steps:
-    - action: click
-      selector: "button"
-
-# Good
-scenario:
-  name: "User Login Flow - Valid Credentials"
-  description: "Verifies user can log in with valid email and password"
-  steps:
-    - action: click
-      selector: "button[type=submit]"
-      description: "Submit login form"
-```
-
-### 3. Verify Critical Paths Only
-
-Don't test every tiny detail. Focus on user-facing behavior:
-
-```yaml
-# Bad - Tests implementation details
-- action: verify_element
-  selector: ".internal-cache-status"
-  contains: "initialized"
-
-# Good - Tests user-visible behavior
-- action: verify_element
-  selector: ".welcome-message"
-  contains: "Welcome back"
-```
-
-### 4. Use Prerequisites for Test Dependencies
-
-```yaml
-scenario:
-  name: "User Profile Edit"
-
-  prerequisites:
-    - "User testuser@example.com exists"
-    - "User is logged in"
-    - "Database is seeded with test data"
-
-  steps:
-    # Test assumes prerequisites are met
-    - action: navigate
-      url: "/profile"
-```
-
-### 5. Keep Tests Independent
-
-Each test should set up its own state and clean up:
-
-```yaml
-scenario:
-  name: "Create Document"
-
-  steps:
-    # Create test user (don't assume exists)
-    - action: api_call
-      endpoint: "/api/users"
-      method: POST
-      data: { email: "test@example.com" }
-
-    # Run test
-    - action: navigate
-      url: "/documents/new"
-    # ... test steps ...
-
-  cleanup:
-    # Remove test user
-    - action: api_call
-      endpoint: "/api/users/test@example.com"
-      method: DELETE
-```
-
-### 6. Use Tags for Organization
-
-```yaml
-scenario:
-  name: "Critical Payment Flow"
-  tags: [smoke, critical, payment, e2e]
-  # Run with: gadugi-test run --tags critical
-```
-
-### 7. Add Timeouts Strategically
-
-```yaml
-steps:
-  # Quick operations - short timeout
-  - action: click
-    selector: "button"
-    timeout: 2s
-
-  # Network operations - longer timeout
-  - action: wait_for_element
-    selector: ".data-loaded"
-    timeout: 10s
-
-  # Complex operations - generous timeout
-  - action: verify_element
-    selector: ".report-generated"
-    timeout: 60s
-```
-
-## Testing Strategies [LEVEL 2]
-
-### Smoke Tests
-
-Minimal tests that verify critical functionality works:
-
-```yaml
-scenario:
-  name: "Smoke Test - Application Starts"
-  tags: [smoke]
-
-  steps:
-    - action: launch
-      target: "./app"
-    - action: verify_output
-      contains: "Ready"
-      timeout: 5s
-```
-
-Run before every commit: `gadugi-test run --tags smoke`
-
-### Happy Path Tests
-
-Test the ideal user journey:
-
-```yaml
-scenario:
-  name: "Happy Path - User Registration"
-
-  steps:
-    - action: navigate
-      url: "/register"
-    - action: type
-      selector: "#email"
-      value: "newuser@example.com"
-    - action: type
-      selector: "#password"
-      value: "SecurePass123!"
-    - action: click
-      selector: "button[type=submit]"
-    - action: wait_for_url
-      contains: "/welcome"
-```
-
-### Error Path Tests
-
-Verify error handling:
-
-```yaml
-scenario:
-  name: "Error Path - Invalid Login"
-
-  steps:
-    - action: navigate
-      url: "/login"
-    - action: type
-      selector: "#email"
-      value: "invalid@example.com"
-    - action: type
-      selector: "#password"
-      value: "wrongpassword"
-    - action: click
-      selector: "button[type=submit]"
-    - action: verify_element
-      selector: ".error-message"
-      contains: "Invalid credentials"
-```
-
-### Regression Tests
-
-Prevent bugs from reappearing:
-
-```yaml
-scenario:
-  name: "Regression - Issue #123 Password Reset"
-  tags: [regression, bug-123]
-  description: "Verifies password reset email is sent (was broken in v1.2)"
-
-  steps:
-    - action: navigate
-      url: "/forgot-password"
-    - action: type
-      selector: "#email"
-      value: "user@example.com"
-    - action: click
-      selector: "button[type=submit]"
-    - action: verify_element
-      selector: ".success-message"
-      contains: "Reset email sent"
-```
-
-## Philosophy Alignment [LEVEL 2]
-
-This skill follows amplihack's core principles:
-
-### Ruthless Simplicity
-
-- **YAML over code**: Declarative tests are simpler than programmatic tests
-- **No implementation details**: Tests describe WHAT, not HOW
-- **Minimal boilerplate**: Each test is focused and concise
-
-### Modular Design (Bricks & Studs)
-
-- **Self-contained scenarios**: Each YAML file is independent
-- **Clear contracts**: Steps have well-defined inputs/outputs
-- **Composable actions**: Reuse actions across different test types
-
-### Zero-BS Implementation
-
-- **No stubs**: Every example in this skill is a complete, runnable test
-- **Working defaults**: Tests run with minimal configuration
-- **Clear errors**: Framework provides actionable error messages
-
-### Outside-In Thinking
-
-- **User perspective**: Tests verify behavior users care about
-- **Implementation agnostic**: Refactoring doesn't break tests
-- **Behavior-driven**: Focus on outcomes, not internals
-
-## Common Pitfalls and Solutions [LEVEL 2]
-
-### Pitfall 1: Over-Specifying
-
-**Problem**: Test breaks when UI changes slightly
-
-```yaml
-# Bad - Too specific
-- action: verify_element
-  selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold"
-  contains: "Welcome"
-```
-
-**Solution**: Use flexible selectors
-
-```yaml
-# Good - Focused on behavior
-- action: verify_element
-  selector: ".welcome-message"
-  contains: "Welcome"
-```
-
-### Pitfall 2: Missing Waits
-
-**Problem**: Test fails intermittently due to timing
-
-```yaml
-# Bad - No wait for async operation
-- action: click
-  selector: ".load-data-button"
-- action: verify_element
-  selector: ".data-table" # May not exist yet!
-```
-
-**Solution**: Always wait for dynamic content
-
-```yaml
-# Good - Wait for element to appear
-- action: click
-  selector: ".load-data-button"
-- action: wait_for_element
-  selector: ".data-table"
-  timeout: 10s
-- action: verify_element
-  selector: ".data-table"
-```
-
-### Pitfall 3: Testing Implementation Details
-
-**Problem**: Test coupled to internal state
-
-```yaml
-# Bad - Tests internal cache state
-- action: verify_output
-  contains: "Cache hit ratio: 85%"
-```
-
-**Solution**: Test user-visible behavior
-
-```yaml
-# Good - Tests response time
-- action: verify_response_time
-  less_than: 100ms
-  description: "Fast response indicates caching works"
-```
-
-### Pitfall 4: Flaky Assertions
-
-**Problem**: Assertions depend on exact timing or formatting
-
-```yaml
-# Bad - Exact timestamp match will fail
-- action: verify_output
-  contains: "Created at: 2025-11-16 09:30:45"
-```
-
-**Solution**: Use flexible patterns
-
-```yaml
-# Good - Match pattern, not exact value
-- action: verify_output
-  matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}"
-```
-
-### Pitfall 5: Not Cleaning Up
-
-**Problem**: Tests leave artifacts that affect future runs
-
-```yaml
-# Bad - No cleanup
-steps:
-  - action: create_file
-    path: "./test-data.json"
-  - action: launch
-    target: "./app"
-```
-
-**Solution**: Always use cleanup section
-
-```yaml
-# Good - Cleanup ensures clean slate
-steps:
-  - action: create_file
-    path: "./test-data.json"
-  - action: launch
-    target: "./app"
-
-cleanup:
-  - action: delete_file
-    path: "./test-data.json"
-```
-
-## Example Library [LEVEL 1]
-
-This skill includes 15 complete working examples organized by application type and complexity level:
-
-### CLI Examples
-
-1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations
-2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery
-3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI
-
-### TUI Examples
-
-4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation
-5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation
-6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing
-
-### Web Examples
-
-7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification
-8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow
-9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing
-
-### Electron Examples
-
-10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test
-11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration
-12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions
-13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing
-
-### Custom Agent Examples
-
-14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic
-15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting
-
-See `examples/` directory for full example code with inline documentation.
-
-## Framework Freshness Check [LEVEL 3]
-
-This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists:
-
-```bash
-# Run the freshness check script
-python scripts/check-freshness.py
-
-# Output if outdated:
-# WARNING: Embedded framework version is 0.1.0
-# Latest GitHub version is 0.2.5
-#
-# New features in 0.2.5:
-# - Native Playwright support for web testing
-# - Video recording for all test types
-# - Parallel test execution
-#
-# Update with: npm update -g @gadugi/agentic-test
-```
-
-The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements.
-
-**When to Update This Skill**:
-
-- New framework version adds significant features
-- Breaking changes in YAML schema
-- New application types supported
-- Agent capabilities expand
-
-## Integration with Other Skills [LEVEL 2]
-
-### Works Well With
-
-**test-gap-analyzer**:
-
-- Use test-gap-analyzer to find untested functions
-- Write outside-in tests for critical user-facing paths
-- Use unit tests (from test-gap-analyzer) for internal functions
-
-**philosophy-guardian**:
-
-- Ensure test YAML follows ruthless simplicity
-- Verify tests focus on behavior, not implementation
-
-**pr-review-assistant**:
-
-- Include outside-in tests in PR reviews
-- Verify tests cover changed functionality
-- Check test readability and clarity
-
-**module-spec-generator**:
-
-- Generate module specs that include outside-in test scenarios
-- Use specs as templates for test YAML
-
-### Example Combined Workflow
-
-```bash
-# 1. Analyze coverage gaps
-claude "Use test-gap-analyzer on ./src"
-
-# 2. Write outside-in tests for critical paths
-claude "Use outside-in-testing to create web tests for authentication"
-
-# 3. Verify philosophy compliance
-claude "Use philosophy-guardian to review new test files"
-
-# 4. Include in PR
-git add tests/agentic/
-git commit -m "Add outside-in tests for auth flow"
-```
-
-## Troubleshooting [LEVEL 2]
-
-### Test Times Out
-
-**Symptom**: Test exceeds timeout and fails
-
-**Causes**:
-
-- Application takes longer to start than expected
-- Network requests are slow
-- Element never appears (incorrect selector)
-
-**Solutions**:
-
-```yaml
-# Increase timeout
-- action: wait_for_element
-  selector: ".slow-loading-element"
-  timeout: 30s # Increase from default
-
-# Add intermediate verification
-- action: launch
-  target: "./app"
-- action: wait_for_output
-  contains: "Initializing..."
-  timeout: 5s
-- action: wait_for_output
-  contains: "Ready"
-  timeout: 20s
-```
-
-### Element Not Found
-
-**Symptom**: `verify_element` or `click` fails with "element not found"
-
-**Causes**:
-
-- Incorrect CSS selector
-- Element not yet rendered (timing issue)
-- Element in iframe or shadow DOM
-
-**Solutions**:
-
-```yaml
-# Add wait before interaction
-- action: wait_for_element
-  selector: ".target-element"
-  timeout: 10s
-- action: click
-  selector: ".target-element"
-
-# Use more specific selector
-- action: click
-  selector: "button[data-testid='submit-button']"
-
-# Handle iframe
-- action: switch_to_iframe
-  selector: "iframe#payment-frame"
-- action: click
-  selector: ".pay-now-button"
-```
-
-### Test Passes Locally, Fails in CI
-
-**Symptom**: Test works on dev machine but fails in CI environment
-
-**Causes**:
-
-- Different screen size (web/Electron)
-- Missing dependencies
-- Timing differences (slower CI machines)
-- Environment variable differences
-
-**Solutions**:
-
-```yaml
-# Set explicit viewport size (web/Electron)
-scenario:
-  environment:
-    viewport:
-      width: 1920
-      height: 1080
-
-# Add longer timeouts in CI
-- action: wait_for_element
-  selector: ".element"
-  timeout: 30s  # Generous for CI
-
-# Verify prerequisites
-prerequisites:
-  - "Chrome browser installed"
-  - "Environment variable API_KEY is set"
-```
-
-### Output Doesn't Match Expected
-
-**Symptom**: `verify_output` fails even though output looks correct
-
-**Causes**:
-
-- Extra whitespace or newlines
-- ANSI color codes in output
-- Case sensitivity
-
-**Solutions**:
-
-```yaml
-# Use flexible matching
-- action: verify_output
-  matches: "Result:\\s+Success" # Allow flexible whitespace
-
-# Strip ANSI codes
-- action: verify_output
-  contains: "Success"
-  strip_ansi: true
-
-# Case-insensitive match
-- action: verify_output
-  contains: "success"
-  case_sensitive: false
-```
-
-## Reference: Action Catalog [LEVEL 3]
-
-### CLI Actions
-
-| Action             | Parameters                       | Description                            |
-| ------------------ | -------------------------------- | -------------------------------------- |
-| `launch`           | `target`, `args`, `cwd`, `env`   | Start CLI application                  |
-| `send_input`       | `value`, `delay`                 | Send text to stdin                     |
-| `send_signal`      | `signal`                         | Send OS signal (SIGINT, SIGTERM, etc.) |
-| `wait_for_output`  | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr         |
-| `verify_output`    | `contains`, `matches`, `stream`  | Check output content                   |
-| `verify_exit_code` | `expected`                       | Validate exit code                     |
-| `capture_output`   | `save_as`, `stream`              | Save output to file                    |
-
-### TUI Actions
-
-| Action               | Parameters                        | Description              |
-| -------------------- | --------------------------------- | ------------------------ |
-| `launch`             | `target`, `args`, `terminal_size` | Start TUI application    |
-| `send_keypress`      | `value`, `times`, `modifiers`     | Send keyboard input      |
-| `wait_for_screen`    | `contains`, `timeout`             | Wait for text on screen  |
-| `verify_screen`      | `contains`, `matches`, `region`   | Check screen content     |
-| `capture_screenshot` | `save_as`                         | Save terminal screenshot |
-| `navigate_menu`      | `path`                            | Navigate menu structure  |
-| `fill_form`          | `fields`                          | Fill TUI form fields     |
-
-### Web Actions
-
-| Action             | Parameters                                | Description            |
-| ------------------ | ----------------------------------------- | ---------------------- |
-| `navigate`         | `url`, `wait_for_load`                    | Go to URL              |
-| `click`            | `selector`, `text`, `nth`                 | Click element          |
-| `type`             | `selector`, `value`, `delay`              | Type into input        |
-| `wait_for_element` | `selector`, `timeout`, `disappears`       | Wait for element       |
-| `verify_element`   | `selector`, `contains`, `count`, `exists` | Check element state    |
-| `verify_url`       | `equals`, `contains`, `matches`           | Validate URL           |
-| `screenshot`       | `save_as`, `selector`, `full_page`        | Capture screenshot     |
-| `scroll`           | `selector`, `direction`, `amount`         | Scroll page/element    |
-| `select_option`    | `selector`, `value`                       | Select dropdown option |
-| `checkbox`         | `selector`, `checked`                     | Check/uncheck checkbox |
-
-### Electron Actions
-
-| Action          | Parameters                             | Description                |
-| --------------- | -------------------------------------- | -------------------------- |
-| `launch`        | `target`, `args`, `wait_for_window`    | Start Electron app         |
-| `window_action` | `window`, `action`                     | Interact with windows      |
-| `menu_click`    | `path`                                 | Click menu items           |
-| `dialog_action` | `type`, `action`, `filename`           | Handle dialogs             |
-| `ipc_send`      | `channel`, `data`                      | Send IPC message           |
-| `ipc_expect`    | `channel`, `timeout`                   | Wait for IPC message       |
-| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state         |
-| All web actions |                                        | Electron includes Chromium |
-
-### Common Parameters
-
-| Parameter             | Type       | Description                          |
-| --------------------- | ---------- | ------------------------------------ |
-| `timeout`             | Duration   | Maximum wait time (e.g., "5s", "2m") |
-| `description`         | String     | Human-readable step explanation      |
-| `continue_on_failure` | Boolean    | Don't fail scenario if step fails    |
-| `id`                  | String     | Step identifier for conditionals     |
-| `condition`           | Expression | Execute step only if condition true  |
-
-## Quick Reference: YAML Template [LEVEL 1]
-
-```yaml
-scenario:
-  # Required fields
-  name: "Test Name"
-  description: "What this test verifies"
-  type: cli | tui | web | electron
-
-  # Optional metadata
-  tags: [smoke, critical]
-  timeout: 60s
-
-  # What must be true before running
-  prerequisites:
-    - "Condition 1"
-    - "Condition 2"
-
-  # Environment setup
-  environment:
-    variables:
-      VAR_NAME: "value"
-
-  # Variables for templating
-  variables:
-    username: "testuser"
-
-  # Test steps (executed in order)
-  steps:
-    - action: launch
-      target: "./app"
-
-    - action: send_input
-      value: "command\n"
-
-    - action: verify_output
-      contains: "Success"
-      timeout: 5s
-
-  # Cleanup (always runs)
-  cleanup:
-    - action: stop_application
-```
-
-## Skill Usage Examples [LEVEL 1]
-
-### Invoke This Skill
-
-```
-Claude, use the outside-in-testing skill to create a CLI test for my calculator app.
-
-Claude, use outside-in-testing to generate web tests for the login flow.
-
-Claude, create TUI tests using the outside-in-testing skill for file navigation.
-```
-
-### What You'll Get
-
-1. **Complete YAML test scenario** matching your requirements
-2. **Inline documentation** explaining each section
-3. **Best practices applied** (timeouts, waits, verification)
-4. **Appropriate complexity level** (1, 2, or 3 based on needs)
-5. **Instructions for running** the test
-
-### Example Request and Response
-
-**User**: "Create a web test for user login with valid credentials"
-
-**Claude** (using this skill):
-
-```yaml
-scenario:
-  name: "User Login - Valid Credentials"
-  description: "Verifies user can successfully log in with valid email and password"
-  type: web
-  tags: [smoke, auth, critical]
-
-  prerequisites:
-    - "User test@example.com exists with password 'TestPass123!'"
-    - "Application is running on http://localhost:3000"
-
-  steps:
-    # Navigate to login page
-    - action: navigate
-      url: "http://localhost:3000/login"
-      description: "Open login page"
-
-    # Wait for page to fully load
-    - action: wait_for_element
-      selector: "form.login-form"
-      timeout: 5s
-      description: "Wait for login form to render"
-
-    # Fill in email field
-    - action: type
-      selector: "input[name='email']"
-      value: "test@example.com"
-      description: "Enter email address"
-
-    # Fill in password field
-    - action: type
-      selector: "input[name='password']"
-      value: "TestPass123!"
-      description: "Enter password"
-
-    # Submit form
-    - action: click
-      selector: "button[type='submit']"
-      description: "Click login button"
-
-    # Wait for redirect to dashboard
-    - action: wait_for_url
-      contains: "/dashboard"
-      timeout: 10s
-      description: "Should redirect to dashboard after successful login"
-
-    # Verify user is logged in
-    - action: verify_element
-      selector: ".user-profile"
-      contains: "test@example.com"
-      description: "User email should appear in profile widget"
-
-    # Capture evidence
-    - action: screenshot
-      save_as: "successful-login.png"
-      description: "Save screenshot of logged-in state"
-# To run this test:
-# 1. Save as test-login-valid.yaml
-# 2. Run: gadugi-test run test-login-valid.yaml
-# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/
-```
-
-## Related Resources [LEVEL 1]
-
-### Official Documentation
-
-- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test
-- **Framework Docs**: See repo README and docs/ folder
-- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
-
-## Level 4: Shadow Environment Integration [LEVEL 4]
-
-Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments.
-
-### Why Use Shadow Environments for Testing
-
-1. **Clean State**: Fresh container, no host pollution
-2. **Local Changes**: Test uncommitted code exactly as-is
-3. **Multi-Repo**: Coordinate changes across multiple repos
-4. **CI Parity**: What shadow sees ≈ what CI will see
-
-### Shadow Testing Workflow
-
-For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests:
-
-#### Pattern 1: CLI Tests in Shadow (Amplifier)
-
-```python
-# Create shadow with your local library changes
-shadow.create(local_sources=["~/repos/my-lib:org/my-lib"])
-
-# Run outside-in test scenarios inside shadow
-shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml")
-
-# Extract evidence
-shadow.extract(shadow_id, "/evidence", "./test-evidence")
-
-# Cleanup
-shadow.destroy(shadow_id)
-```
-
-#### Pattern 2: CLI Tests in Shadow (Standalone)
-
-```bash
-# Create shadow with local changes
-amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test
-
-# Run your test scenarios
-amplifier-shadow exec test "gadugi-test run test-scenario.yaml"
-
-# Extract results
-amplifier-shadow extract test /evidence ./test-evidence
-
-# Cleanup
-amplifier-shadow destroy test
-```
-
-#### Pattern 3: Multi-Repo Integration Test
-
-```yaml
-# test-multi-repo.yaml
-scenario:
-  name: "Multi-Repo Integration Test"
-  type: cli
-
-  prerequisites:
-    - "Shadow environment with core-lib and cli-tool"
-
-  steps:
-    - action: launch
-      target: "cli-tool"
-
-    - action: send_input
-      value: "process --lib core-lib\n"
-
-    - action: verify_output
-      contains: "Success: Using core-lib"
-```
-
-```bash
-# Setup shadow with both repos
-amplifier-shadow create \
-  --local ~/repos/core-lib:org/core-lib \
-  --local ~/repos/cli-tool:org/cli-tool \
-  --name multi-test
-
-# Run test that exercises both
-amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml"
-```
-
-#### Pattern 4: Web App Testing in Shadow
-
-```yaml
-# test-web-app.yaml
-scenario:
-  name: "Web App with Local Library"
-  type: web
-
-  steps:
-    - action: navigate
-      url: "http://localhost:3000"
-
-    - action: click
-      selector: "button.process"
-
-    - action: verify_element
-      selector: ".result"
-      contains: "Processed with v2.0" # Your local version
-```
-
-```bash
-# Shadow with library changes
-amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test
-
-# Start web app inside shadow (uses your local lib)
-amplifier-shadow exec web-test "
-  cd /workspace &&
-  git clone https://github.com/org/web-app &&
-  cd web-app &&
-  npm install &&  # Pulls your local my-lib via git URL rewriting
-  npm start &
-"
-
-# Wait for app to start, then run tests
-amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml"
-```
-
-### Verification Best Practices
-
-When running tests in shadow, always verify your local sources are being used:
-
-```bash
-# After shadow.create, check snapshot commits
-shadow.status(shadow_id)
-# Shows: snapshot_commits: {"org/my-lib": "abc1234..."}
-
-# When your test installs dependencies, verify commit matches
-# Look in test output for: my-lib @ git+...@abc1234
-```
-
-### Complete Example: Library Change Validation
-
-```yaml
-# test-library-change.yaml - Outside-in test
-scenario:
-  name: "Validate Library Breaking Change"
-  type: cli
-  description: "Test that dependent app still works with new library API"
-
-  steps:
-    - action: launch
-      target: "/workspace/org/dependent-app/cli.py"
-
-    - action: send_input
-      value: "process data.json\n"
-
-    - action: verify_output
-      contains: "Processed successfully"
-      description: "New library API should still work"
-
-    - action: verify_exit_code
-      expected: 0
-```
-
-```bash
-# Complete workflow
-# 1. Create shadow with your breaking change
-amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test
-
-# 2. Install dependent app (pulls your local lib)
-amplifier-shadow exec breaking-test "
-  cd /workspace &&
-  git clone https://github.com/org/dependent-app &&
-  cd dependent-app &&
-  pip install -e . &&  # This installs git+https://github.com/org/my-lib (your local version)
-  echo 'Ready to test'
-"
-
-# 3. Run outside-in test
-amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml"
-
-# If test passes, your breaking change is compatible!
-# If test fails, you've caught the issue before pushing
-```
-
-### When to Use Shadow Integration
-
-Use shadow + outside-in tests when:
-
-- ✅ Testing library changes with dependent projects
-- ✅ Validating multi-repo coordinated changes
-- ✅ Need clean-state validation before pushing
-- ✅ Want to catch integration issues early
-- ✅ Testing that setup/install procedures work
-
-Don't use shadow for:
-
-- ❌ Simple unit tests (too much overhead)
-- ❌ Tests of already-committed code (shadow adds no value)
-- ❌ Performance testing (container overhead skews results)
-
-### Learn More
-
-For complete shadow environment documentation, including:
-
-- Shell scripts for DIY setup
-- Docker Compose examples
-- Multi-language support (Python, Node, Rust, Go)
-- Troubleshooting and verification techniques
-
-**Load the shadow-testing skill**:
-
-```
-Claude, use the shadow-testing skill to set up a shadow environment
-```
-
-Or for Amplifier users, the shadow tool is built-in:
-
-```python
-shadow.create(local_sources=["~/repos/lib:org/lib"])
-```
-
----
-
-### Related Skills
-
-- **shadow-testing**: Complete shadow environment setup and usage
-- **test-gap-analyzer**: Find untested code paths
-- **philosophy-guardian**: Review test philosophy compliance
-- **pr-review-assistant**: Include tests in PR reviews
-- **module-spec-generator**: Generate specs with test scenarios
-
-### Further Reading
-
-- Outside-in vs inside-out testing approaches
-- Behavior-driven development (BDD) principles
-- AI-powered testing best practices
-- Test automation patterns
-- Shadow environment testing methodology
-
-## Changelog [LEVEL 3]
-
-### Version 1.1.0 (2026-01-29)
-
-- **NEW**: Level 4 - Shadow Environment Integration
-- Added complete shadow testing workflow patterns
-- Integration examples for Amplifier native and standalone CLI
-- Multi-repo integration test patterns
-- Web app testing in shadow environments
-- Complete workflow example for library change validation
-- References to shadow-testing skill for deep-dive documentation
-
-### Version 1.0.0 (2025-11-16)
-
-- Initial skill release
-- Support for CLI, TUI, Web, and Electron applications
-- 15 complete working examples
-- Progressive disclosure levels (1, 2, 3)
-- Embedded gadugi-agentic-test framework documentation (v0.1.0)
-- Freshness check script for version monitoring
-- Full integration with amplihack philosophy
-- Comprehensive troubleshooting guide
-- Action reference catalog
-
----
-
-**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows.
-
-Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen.
+See `../qa-team/README.md` and `../qa-team/SKILL.md` for the primary documentation.
diff --git a/docs/claude/skills/outside-in-testing/examples b/docs/claude/skills/outside-in-testing/examples
new file mode 120000
index 000000000..68c765545
--- /dev/null
+++ b/docs/claude/skills/outside-in-testing/examples
@@ -0,0 +1 @@
+../qa-team/examples
\ No newline at end of file
diff --git a/docs/claude/skills/outside-in-testing/scripts b/docs/claude/skills/outside-in-testing/scripts
new file mode 120000
index 000000000..ff9bde766
--- /dev/null
+++ b/docs/claude/skills/outside-in-testing/scripts
@@ -0,0 +1 @@
+../qa-team/scripts
\ No newline at end of file
diff --git a/docs/claude/skills/outside-in-testing/tests b/docs/claude/skills/outside-in-testing/tests
new file mode 120000
index 000000000..371fb2568
--- /dev/null
+++ b/docs/claude/skills/outside-in-testing/tests
@@ -0,0 +1 @@
+../qa-team/tests
\ No newline at end of file
diff --git a/docs/claude/skills/qa-team/README.md b/docs/claude/skills/qa-team/README.md
new file mode 100644
index 000000000..51e32504a
--- /dev/null
+++ b/docs/claude/skills/qa-team/README.md
@@ -0,0 +1,794 @@
+# QA Team Skill
+
+## Overview
+
+QA Team is the renamed primary skill for outside-in validation. It helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation, and it now also covers side-by-side parity loops for legacy-vs-new or A-vs-B comparisons.
+
+**Key Benefits**:
+
+- Tests survive refactoring (implementation changes don't break tests)
+- Readable by non-developers (declarative YAML format)
+- Platform-agnostic (same structure for CLI, TUI, Web, Electron)
+- AI-powered execution (agents handle complex interactions)
+- Evidence-based validation (screenshots, logs, output captures)
+
+## What is Outside-In Testing?
+
+**Traditional Testing** (Inside-Out):
+
+```python
+# Knows internal implementation
+def test_user_service():
+    service = UserService()
+    user = service.create_user("test@example.com")
+    assert user.id is not None
+    assert user.email == "test@example.com"
+    assert user.created_at <= datetime.now()  # Internal state
+```
+
+**Outside-In Testing**:
+
+```yaml
+# Only knows external behavior
+scenario:
+  name: "User Registration"
+  type: web
+  steps:
+    - action: navigate
+      url: "/register"
+    - action: type
+      selector: "#email"
+      value: "test@example.com"
+    - action: click
+      selector: "button[type=submit]"
+    - action: verify_url
+      contains: "/welcome"
+```
+
+The outside-in test verifies the same functionality but:
+
+- Doesn't depend on internal classes (`UserService`)
+- Doesn't check internal state (`created_at`, `id`)
+- Tests from user's perspective (what they see and do)
+- Remains valid even if implementation completely changes
+
+## When to Use This Skill
+
+### Perfect Scenarios
+
+1. **Smoke Testing** - Quickly verify critical paths work
+2. **Acceptance Testing** - Validate features meet requirements
+3. **Regression Testing** - Ensure changes don't break existing behavior
+4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach
+5. **Refactoring Safety** - Tests protect behavior during rewrites
+6. **Documentation as Tests** - YAML doubles as executable specifications
+
+### Complementary to Unit Tests
+
+Outside-in tests work best alongside unit tests:
+
+- **Unit Tests** (60%): Internal logic, edge cases, error handling
+- **Integration Tests** (30%): Component interactions, API contracts
+- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths
+
+Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation.
+
+## Parity, Shadow, and A/B Comparison
+
+Use QA Team when you need more than a single implementation test:
+
+- compare legacy vs replacement behavior side by side
+- run paired observable tmux sessions with `--observable`
+- execute the same parity suite remotely with `--ssh-target azlin`
+- log rollout divergences with `--shadow-mode --shadow-log ...`
+
+Example local parity command:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary
+```
+
+Example shadow-mode command:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary \
+  --shadow-mode \
+  --shadow-log /tmp/feature-shadow.jsonl
+```
+
+## Quick Start
+
+### 1. Install Framework
+
+**Option A: From GitHub (Recommended - Latest)**
+
+```bash
+# Install globally
+npm install -g github:rysweet/gadugi-agentic-test
+
+# Or use with npx
+npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml
+
+# Or clone and build
+git clone https://github.com/rysweet/gadugi-agentic-test
+cd gadugi-agentic-test
+npm install
+npm run build
+node dist/cli.js run scenarios/your-test.yaml
+```
+
+**Option B: From npm (when published)**
+
+```bash
+npm install -g gadugi-agentic-test
+gadugi-test run test.yaml
+```
+
+**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below.
+
+### 2. Create Your First Test
+
+Save as `test-hello.yaml`:
+
+```yaml
+scenario:
+  name: "Hello World Test"
+  description: "Verify application prints greeting"
+  type: cli
+
+  steps:
+    - action: launch
+      target: "./hello-world"
+
+    - action: verify_output
+      contains: "Hello, World!"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+### 3. Run the Test
+
+**If installed globally**:
+
+```bash
+gadugi-test run test-hello.yaml
+```
+
+**If using from source**:
+
+```bash
+cd /path/to/gadugi-agentic-test
+node dist/cli.js run /path/to/test-hello.yaml
+```
+
+**Run all tests in directory**:
+
+```bash
+node dist/cli.js run -d ./my-test-scenarios
+```
+
+### 4. Review Results
+
+The framework generates evidence in `./evidence/`:
+
+- Execution logs
+- Output captures
+- Screenshots (for TUI/Web/Electron)
+- Timing data
+- HTML report
+
+## Supported Application Types
+
+### CLI (Command-Line Interface)
+
+Test command-line tools, scripts, and utilities:
+
+```yaml
+scenario:
+  name: "Git Status Test"
+  type: cli
+  steps:
+    - action: launch
+      target: "git"
+      args: ["status"]
+    - action: verify_output
+      contains: "On branch"
+```
+
+**Common Use Cases**:
+
+- Package managers (npm, pip, cargo)
+- Build tools (make, gradle, webpack)
+- DevOps tools (docker, kubectl, terraform)
+- Custom CLI applications
+
+### TUI (Terminal User Interface)
+
+Test interactive terminal applications:
+
+```yaml
+scenario:
+  name: "TUI Navigation"
+  type: tui
+  steps:
+    - action: launch
+      target: "./file-manager"
+    - action: send_keypress
+      value: "down"
+      times: 3
+    - action: verify_screen
+      contains: "> documents/"
+```
+
+**Common Use Cases**:
+
+- System monitors (htop, top)
+- Text editors (vim, nano)
+- File managers (ranger, midnight commander)
+- Custom TUI dashboards
+
+### Web Applications
+
+Test browser-based applications:
+
+```yaml
+scenario:
+  name: "Web Dashboard Test"
+  type: web
+  steps:
+    - action: navigate
+      url: "http://localhost:3000"
+    - action: verify_element
+      selector: "h1"
+      contains: "Dashboard"
+```
+
+**Common Use Cases**:
+
+- SPAs (React, Vue, Angular apps)
+- Admin panels
+- E-commerce sites
+- SaaS applications
+
+### Electron Applications
+
+Test desktop apps built with Electron:
+
+```yaml
+scenario:
+  name: "Desktop App Test"
+  type: electron
+  steps:
+    - action: launch
+      target: "./dist/my-app"
+    - action: verify_window
+      title: "My Application"
+```
+
+**Common Use Cases**:
+
+- Code editors (VS Code-like apps)
+- Chat applications (Slack, Discord clones)
+- Productivity tools
+- Custom desktop applications
+
+## Progressive Learning Path
+
+The skill teaches testing in three levels:
+
+### Level 1: Fundamentals (Start Here)
+
+- Basic test structure (YAML anatomy)
+- Single-action tests
+- Simple verification
+- Smoke tests
+
+**Examples**:
+
+- `examples/cli/calculator-basic.yaml`
+- `examples/tui/file-manager-navigation.yaml`
+- `examples/web/dashboard-smoke-test.yaml`
+- `examples/electron/single-window-basic.yaml`
+
+### Level 2: Intermediate
+
+- Multi-step workflows
+- Conditional logic
+- Error handling
+- Variables and templating
+
+**Examples**:
+
+- `examples/cli/cli-error-handling.yaml`
+- `examples/tui/tui-form-validation.yaml`
+- `examples/web/web-authentication-flow.yaml`
+- `examples/electron/multi-window-coordination.yaml`
+
+### Level 3: Advanced
+
+- Custom comprehension agents
+- Visual regression testing
+- Performance validation
+- IPC testing (Electron)
+
+**Examples**:
+
+- `examples/tui/tui-performance-monitoring.yaml`
+- `examples/electron/electron-ipc-testing.yaml`
+- `examples/custom-agents/custom-comprehension-agent.yaml`
+- `examples/custom-agents/custom-reporter-integration.yaml`
+
+## Example Library
+
+This skill includes **15 complete working examples**:
+
+### CLI (3 examples)
+
+- Basic calculator operations [Level 1]
+- Error handling and recovery [Level 2]
+- Interactive session management [Level 2]
+
+### TUI (3 examples)
+
+- File manager navigation [Level 1]
+- Form validation [Level 2]
+- Performance monitoring [Level 3]
+
+### Web (3 examples)
+
+- Dashboard smoke test [Level 1]
+- Authentication flow [Level 2]
+- Visual regression [Level 2]
+
+### Electron (4 examples)
+
+- Single window basics [Level 1]
+- Multi-window coordination [Level 2]
+- Menu interactions [Level 2]
+- IPC testing [Level 3]
+
+### Custom Agents (2 examples)
+
+- Domain-specific comprehension [Level 3]
+- Custom reporting [Level 3]
+
+All examples include:
+
+- Complete working YAML
+- Inline documentation
+- Expected output
+- Prerequisites
+- Level indicators
+
+## Using This Skill in Claude
+
+### Invoke the Skill
+
+```
+Claude, use the qa-team skill to create a CLI test for my calculator app.
+
+Claude, use qa-team to generate web tests for user login.
+
+Claude, create Electron tests using qa-team for my desktop app.
+```
+
+### What You'll Receive
+
+1. **Complete YAML test scenario** matching your requirements
+2. **Inline comments** explaining each section
+3. **Best practices** applied (timeouts, waits, verification)
+4. **Appropriate complexity** (Level 1, 2, or 3 based on needs)
+5. **Instructions** for running the test
+
+### Example Interaction
+
+**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard"
+
+**Claude** (using this skill): Generates a complete Level 2 YAML scenario with:
+
+- Navigation to login page
+- Form filling (email, password)
+- Submit button click
+- URL verification (redirected to dashboard)
+- Element verification (user profile visible)
+- Screenshot capture
+- Proper timeouts and waits
+
+## Integration with Amplihack Philosophy
+
+This skill embodies amplihack's core principles:
+
+### Ruthless Simplicity
+
+- Declarative YAML over complex code
+- Minimal boilerplate
+- Focus on behavior, not implementation
+
+### Modular Design (Bricks & Studs)
+
+- Self-contained test scenarios
+- Clear action contracts
+- Composable test steps
+
+### Zero-BS Implementation
+
+- No stubs or placeholders
+- Every example is runnable
+- Clear, actionable error messages
+
+### Outside-In Thinking
+
+- User perspective first
+- Implementation-agnostic tests
+- Behavior-driven validation
+
+## Best Practices
+
+### 1. Start Simple
+
+Begin with basic smoke tests, then add complexity:
+
+```yaml
+# Level 1: Smoke test
+steps:
+  - action: launch
+    target: "./app"
+  - action: verify_output
+    contains: "Ready"
+
+# Level 2: Add interaction
+steps:
+  - action: launch
+    target: "./app"
+  - action: send_input
+    value: "command\n"
+  - action: verify_output
+    contains: "Success"
+```
+
+### 2. Use Descriptive Names
+
+```yaml
+# Good
+scenario:
+  name: "User Login - Valid Credentials"
+  description: "Verifies successful login with email and password"
+
+# Bad
+scenario:
+  name: "Test 1"
+```
+
+### 3. Verify Critical Paths Only
+
+Don't test every detail. Focus on user-facing behavior:
+
+```yaml
+# Good - User-visible behavior
+- action: verify_element
+  selector: ".welcome-message"
+  contains: "Welcome back"
+
+# Bad - Implementation detail
+- action: verify_element
+  selector: ".cache-status"
+  contains: "initialized"
+```
+
+### 4. Always Wait for Dynamic Content
+
+```yaml
+# Good - Wait before verification
+- action: click
+  selector: ".load-data"
+- action: wait_for_element
+  selector: ".data-table"
+  timeout: 10s
+- action: verify_element
+  selector: ".data-table"
+
+# Bad - May fail due to timing
+- action: click
+  selector: ".load-data"
+- action: verify_element
+  selector: ".data-table" # Might not exist yet!
+```
+
+### 5. Clean Up After Tests
+
+```yaml
+steps:
+  # Test steps...
+
+cleanup:
+  - action: delete_file
+    path: "./test-data.json"
+  - action: stop_application
+```
+
+## Troubleshooting
+
+### Installation Issues
+
+**Problem**: `@types/node-pty` not found error
+
+**Solution**: This was fixed in gadugi-agentic-test. If you see this:
+
+```bash
+# Update to latest version
+npm install -g github:rysweet/gadugi-agentic-test
+
+# Or if you cloned, pull latest:
+git pull origin main
+npm install
+npm run build
+```
+
+**Problem**: `tsc: command not found` when building
+
+**Solution**: TypeScript not installed
+
+```bash
+npm install  # Installs all dependencies including TypeScript
+npm run build  # Now will work
+```
+
+### Test Times Out
+
+**Problem**: Test exceeds timeout and fails
+
+**Solution**: Increase timeout for slow operations
+
+```yaml
+- action: wait_for_element
+  selector: ".slow-loading-data"
+  timeout: 30s # Generous timeout
+```
+
+### Scenario Format Issues
+
+**Problem**: "Scenario must have a name" error
+
+**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`:
+
+```yaml
+# WRONG (won't load)
+scenario:
+  name: "My Test"
+  steps: [...]
+
+# RIGHT
+name: "My Test"
+description: "What this tests"
+version: "1.0.0"
+config:
+  timeout: 120000
+steps: [...]
+```
+
+### Element Not Found
+
+**Problem**: Cannot find element to interact with
+
+**Solutions**:
+
+1. Use `wait_for_element` before interaction
+2. Verify selector is correct
+3. Check if element is in iframe
+
+```yaml
+- action: wait_for_element
+  selector: ".target"
+  timeout: 10s
+- action: click
+  selector: ".target"
+```
+
+### Flaky Tests in CI
+
+**Problem**: Tests pass locally but fail in CI
+
+**Solutions**:
+
+1. Add longer timeouts for CI environments
+2. Set explicit viewport sizes
+3. Wait for application readiness
+
+```yaml
+scenario:
+  environment:
+    viewport:
+      width: 1920
+      height: 1080
+
+  steps:
+    - action: wait_for_element
+      selector: ".app-ready"
+      timeout: 30s # Generous for CI
+```
+
+## Framework Version Check
+
+This skill embeds gadugi-agentic-test version **0.1.0**.
+
+To check for newer versions:
+
+```bash
+python scripts/check-freshness.py
+```
+
+The script compares the embedded version against the latest GitHub release and notifies you of new features.
+
+## Related Skills
+
+- **test-gap-analyzer**: Find untested code paths (unit test focus)
+- **philosophy-guardian**: Review test philosophy compliance
+- **pr-review-assistant**: Include tests in PR reviews
+- **module-spec-generator**: Generate specs with test scenarios
+
+## Resources
+
+### Documentation
+
+- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation
+- **Examples**: `examples/` - 15 complete working examples
+- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test
+
+### Getting Help
+
+- Review examples in `examples/` directory
+- Check `SKILL.md` for detailed explanations
+- See troubleshooting section in `SKILL.md`
+- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues
+
+## Quick Reference
+
+### Basic Test Template
+
+```yaml
+scenario:
+  name: "Test Name"
+  description: "What this verifies"
+  type: cli | tui | web | electron
+
+  prerequisites:
+    - "Condition 1"
+
+  steps:
+    - action: launch
+      target: "./app"
+
+    - action: verify_output
+      contains: "Expected"
+
+  cleanup:
+    - action: stop_application
+```
+
+### Common Actions
+
+**CLI**:
+
+- `launch` - Start application
+- `send_input` - Send text
+- `verify_output` - Check output
+- `verify_exit_code` - Validate exit code
+
+**TUI**:
+
+- `send_keypress` - Send keys
+- `verify_screen` - Check screen
+- `capture_screenshot` - Save screenshot
+
+**Web**:
+
+- `navigate` - Go to URL
+- `click` - Click element
+- `type` - Type text
+- `verify_element` - Check element
+
+**Electron**:
+
+- `window_action` - Control windows
+- `menu_click` - Click menus
+- `dialog_action` - Handle dialogs
+- All web actions
+
+## Success Stories
+
+Outside-in testing shines when:
+
+1. **Refactoring**: Change implementation without updating tests
+2. **Collaboration**: Non-developers can read and understand tests
+3. **Documentation**: Tests serve as executable specifications
+4. **Regression Prevention**: Catch breaking changes in critical flows
+5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron
+
+Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen.
+
+---
+
+**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation.
+
+## Real-World Example: Testing amplihack Guide Agent
+
+Based on actual testing of amplihack's guide agent, here's a complete working example:
+
+### Scenario: Naive Student Learning Flow
+
+```yaml
+name: "Guide Agent - Beginner First Question"
+description: "Test how guide responds to complete beginner"
+version: "1.0.0"
+
+config:
+  timeout: 180000 # 3 minutes for AI response
+  retries: 1
+  parallel: false
+
+agents:
+  - name: "student-cli"
+    type: "system"
+    config:
+      shell: "bash"
+      cwd: "/tmp/test-student"
+      timeout: 180000
+      capture_output: true
+
+steps:
+  - name: "Student asks: What is amplihack?"
+    agent: "student-cli"
+    action: "execute_command"
+    params:
+      command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100'
+    expect:
+      exit_code: 0
+      stdout_contains:
+        - "amplihack"
+        - "AI"
+    timeout: 180000
+
+  - name: "Verify guide gives immediate action"
+    agent: "student-cli"
+    action: "execute_command"
+    params:
+      command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md"
+    expect:
+      exit_code: 0
+    timeout: 5000
+
+metadata:
+  tags: ["guide-agent", "beginner", "real-world"]
+  priority: "high"
+```
+
+### What This Tests
+
+1. **Installation via uvx** - Tests users can run without installing
+2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works
+3. **Beginner-friendly response** - Checks for immediate actionable command
+4. **Interactive elements** - Looks for TRY IT prompts
+
+### Running This Test
+
+```bash
+cd gadugi-agentic-test
+node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose
+```
+
+### What We Learned
+
+**From testing amplihack guide agent**:
+
+- Long-running AI commands need 180s+ timeouts
+- Testing in clean `/tmp` directory avoids state pollution
+- Combining `uvx --from git+...` with gadugi tests unreleased branches
+- Checking file content (guide.md) verifies features beyond just output
+- Real-world tests exposed gaps (guide showing bash commands in REPL context)
diff --git a/docs/claude/skills/qa-team/SKILL.md b/docs/claude/skills/qa-team/SKILL.md
new file mode 100644
index 000000000..f75aa884d
--- /dev/null
+++ b/docs/claude/skills/qa-team/SKILL.md
@@ -0,0 +1,2100 @@
+---
+name: qa-team
+description: |
+  QA team for outside-in validation, side-by-side parity loops, and A/B behavioral comparison.
+  Use when you need behavior-driven tests, legacy-vs-new comparison, or rollout shadow validation.
+  Creates executable scenarios and parity workflows that agents can observe, compare, and iterate on.
+  Supports local, observable tmux, remote SSH, and shadow-mode divergence logging patterns.
+version: 1.1.0
+embedded_framework_version: 0.1.0
+github_repo: https://github.com/rysweet/gadugi-agentic-test
+issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
+---
+
+# QA Team Skill
+
+## Purpose [LEVEL 1]
+
+This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate.
+
+**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details.
+
+## When to Use This Skill [LEVEL 1]
+
+### Perfect For
+
+- **Smoke Tests**: Quick validation that critical user flows work
+- **Behavior-Driven Testing**: Verify features from user perspective
+- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron
+- **Refactoring Safety**: Tests remain valid when implementation changes
+- **AI-Powered Testing**: Let agents handle complex interactions
+- **Documentation as Tests**: YAML scenarios double as executable specs
+
+### Use This Skill When
+
+- Starting a new project and defining expected behaviors
+- Refactoring code and need tests that won't break with internal changes
+- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps)
+- Writing acceptance criteria that can be automatically verified
+- Need tests that non-developers can read and understand
+- Want to catch regressions in critical user workflows
+- Testing complex multi-step interactions
+
+### Don't Use This Skill When
+
+- Need unit tests for internal functions (use test-gap-analyzer instead)
+- Testing performance or load characteristics
+- Need precise timing or concurrency control
+- Testing non-interactive batch processes
+- Implementation details matter more than behavior
+
+## Core Concepts [LEVEL 1]
+
+### Outside-In Testing Philosophy
+
+**Traditional Inside-Out Testing**:
+
+```python
+# Tightly coupled to implementation
+def test_calculator_add():
+    calc = Calculator()
+    result = calc.add(2, 3)
+    assert result == 5
+    assert calc.history == [(2, 3, 5)]  # Knows internal state
+```
+
+**Agentic Outside-In Testing**:
+
+```yaml
+# Implementation-agnostic behavior verification
+scenario:
+  name: "Calculator Addition"
+  steps:
+    - action: launch
+      target: "./calculator"
+    - action: send_input
+      value: "add 2 3"
+    - action: verify_output
+      contains: "Result: 5"
+```
+
+**Benefits**:
+
+- Tests survive refactoring (internal changes don't break tests)
+- Readable by non-developers (YAML is declarative)
+- Platform-agnostic (same structure for CLI/TUI/Web/Electron)
+- AI agents handle complexity (navigation, timing, screenshots)
+
+### The Gadugi Agentic Test Framework [LEVEL 2]
+
+Gadugi-agentic-test is a Python framework that:
+
+1. **Parses YAML test scenarios** with declarative steps
+2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents)
+3. **Executes actions** (launch, input, click, wait, verify)
+4. **Collects evidence** (screenshots, logs, output captures)
+5. **Validates outcomes** against expected results
+6. **Generates reports** with evidence trails
+
+**Architecture**:
+
+```
+YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine
+                                          ↓
+                     [CLI Agent, TUI Agent, Web Agent, Electron Agent]
+                                          ↓
+                           Observers → Comprehension Agent
+                                          ↓
+                                   Evidence Report
+```
+
+### Progressive Disclosure Levels [LEVEL 1]
+
+This skill teaches testing in four levels:
+
+- **Level 1: Fundamentals** - Basic single-action tests, simple verification
+- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling
+- **Level 3: Advanced** - Custom agents, visual regression, performance validation
+- **Level 4: Parity & Shadowing** - Side-by-side A/B comparison, remote observable runs, rollout divergence logging
+
+Each example is marked with its level. Start at Level 1 and progress as needed.
+
+## Side-by-Side Parity and A/B Validation [LEVEL 2]
+
+QA Team is the renamed primary skill for what used to be `outside-in-testing`. Use it for standard outside-in scenarios **and** for parity loops where you must compare a legacy implementation to a replacement, or compare approach A to approach B, as an external user would observe them.
+
+### Use QA Team for parity work when
+
+- migrating Python to Rust, old CLI to new CLI, or v1 to v2 behavior
+- validating a rewrite before switching defaults
+- comparing branch A vs branch B using the same user scenarios
+- running observable side-by-side sessions in paired virtual TTYs
+- logging rollout divergences in shadow mode without failing the run
+
+### Recommended parity loop
+
+1. Define shared user-facing scenarios first.
+2. Run both implementations in isolated sandboxes.
+3. Compare stdout, stderr, exit code, JSON outputs, and filesystem side effects.
+4. Re-run in `--observable` mode when you need paired tmux panes for debugging.
+5. Use `--ssh-target <host>` when parity must happen on a remote environment such as `azlin`.
+6. Use `--shadow-mode --shadow-log <file>` during rollout to log divergences without blocking execution.
+
+### Command pattern to reuse
+
+If the repo already has a parity harness, extend it instead of inventing a second one. A good baseline is:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary \
+  --observable
+```
+
+For remote parity:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --ssh-target azlin \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /remote/path/to/legacy-repo \
+  --rust-binary /remote/path/to/new-binary
+```
+
+For rollout shadow logging:
+
+```bash
+python tests/parity/validate_cli_parity.py \
+  --scenario tests/parity/scenarios/feature.yaml \
+  --python-repo /path/to/legacy-repo \
+  --rust-binary /path/to/new-binary \
+  --shadow-mode \
+  --shadow-log /tmp/feature-shadow.jsonl
+```
+
+## Quick Start [LEVEL 1]
+
+### Installation
+
+**Prerequisites (for native module compilation):**
+
+```bash
+# macOS
+xcode-select --install
+
+# Ubuntu/Debian
+sudo apt-get install -y build-essential python3
+
+# Windows: Install Visual Studio Build Tools with "Desktop development with C++"
+```
+
+**Install the framework:**
+
+```bash
+# Install globally for CLI access
+npm install -g @gadugi/agentic-test
+
+# Or install locally in your project
+npm install @gadugi/agentic-test
+
+# Verify installation
+gadugi-test --version
+```
+
+### Your First Test (CLI Example)
+
+Create `test-hello.yaml`:
+
+```yaml
+scenario:
+  name: "Hello World CLI Test"
+  description: "Verify CLI prints greeting"
+  type: cli
+
+  prerequisites:
+    - "./hello-world executable exists"
+
+  steps:
+    - action: launch
+      target: "./hello-world"
+
+    - action: verify_output
+      contains: "Hello, World!"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+Run the test:
+
+```bash
+gadugi-test run test-hello.yaml
+```
+
+Output:
+
+```
+✓ Scenario: Hello World CLI Test
+  ✓ Step 1: Launched ./hello-world
+  ✓ Step 2: Output contains "Hello, World!"
+  ✓ Step 3: Exit code is 0
+
+PASSED (3/3 steps successful)
+Evidence saved to: ./evidence/test-hello-20250116-093045/
+```
+
+### Understanding the YAML Structure [LEVEL 1]
+
+Every test scenario has this structure:
+
+```yaml
+scenario:
+  name: "Descriptive test name"
+  description: "What this test verifies"
+  type: cli | tui | web | electron
+
+  # Optional metadata
+  tags: [smoke, critical, auth]
+  timeout: 30s
+
+  # What must be true before test runs
+  prerequisites:
+    - "Condition 1"
+    - "Condition 2"
+
+  # The test steps (executed sequentially)
+  steps:
+    - action: action_name
+      parameter1: value1
+      parameter2: value2
+
+    - action: verify_something
+      expected: value
+
+  # Optional cleanup
+  cleanup:
+    - action: stop_application
+```
+
+## Application Types and Agents [LEVEL 2]
+
+### CLI Applications [LEVEL 1]
+
+**Use Case**: Command-line tools, scripts, build tools, package managers
+
+**Supported Actions**:
+
+- `launch` - Start the CLI program
+- `send_input` - Send text or commands via stdin
+- `send_signal` - Send OS signals (SIGINT, SIGTERM)
+- `wait_for_output` - Wait for specific text in stdout/stderr
+- `verify_output` - Check stdout/stderr contains/matches expected text
+- `verify_exit_code` - Validate process exit code
+- `capture_output` - Save output for later verification
+
+**Example** (see `examples/cli/calculator-basic.yaml`):
+
+```yaml
+scenario:
+  name: "CLI Calculator Basic Operations"
+  type: cli
+
+  steps:
+    - action: launch
+      target: "./calculator"
+      args: ["--mode", "interactive"]
+
+    - action: send_input
+      value: "add 5 3\n"
+
+    - action: verify_output
+      contains: "Result: 8"
+      timeout: 2s
+
+    - action: send_input
+      value: "multiply 4 7\n"
+
+    - action: verify_output
+      contains: "Result: 28"
+
+    - action: send_input
+      value: "exit\n"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+### TUI Applications [LEVEL 1]
+
+**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs)
+
+**Supported Actions**:
+
+- `launch` - Start TUI application
+- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.)
+- `wait_for_screen` - Wait for specific text to appear on screen
+- `verify_screen` - Check screen contents match expectations
+- `capture_screenshot` - Save terminal screenshot (ANSI art)
+- `navigate_menu` - Navigate menu structures
+- `fill_form` - Fill TUI form fields
+
+**Example** (see `examples/tui/file-manager-navigation.yaml`):
+
+```yaml
+scenario:
+  name: "TUI File Manager Navigation"
+  type: tui
+
+  steps:
+    - action: launch
+      target: "./file-manager"
+
+    - action: wait_for_screen
+      contains: "File Manager v1.0"
+      timeout: 3s
+
+    - action: send_keypress
+      value: "down"
+      times: 3
+
+    - action: verify_screen
+      contains: "> documents/"
+      description: "Third item should be selected"
+
+    - action: send_keypress
+      value: "enter"
+
+    - action: wait_for_screen
+      contains: "documents/"
+      timeout: 2s
+
+    - action: capture_screenshot
+      save_as: "documents-view.txt"
+```
+
+### Web Applications [LEVEL 1]
+
+**Use Case**: Web apps, dashboards, SPAs, admin panels
+
+**Supported Actions**:
+
+- `navigate` - Go to URL
+- `click` - Click element by selector or text
+- `type` - Type into input fields
+- `wait_for_element` - Wait for element to appear
+- `verify_element` - Check element exists/contains text
+- `verify_url` - Validate current URL
+- `screenshot` - Capture browser screenshot
+- `scroll` - Scroll page or element
+
+**Example** (see `examples/web/dashboard-smoke-test.yaml`):
+
+```yaml
+scenario:
+  name: "Dashboard Smoke Test"
+  type: web
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000/dashboard"
+
+    - action: wait_for_element
+      selector: "h1.dashboard-title"
+      timeout: 5s
+
+    - action: verify_element
+      selector: "h1.dashboard-title"
+      contains: "Analytics Dashboard"
+
+    - action: verify_element
+      selector: ".widget-stats"
+      count: 4
+      description: "Should have 4 stat widgets"
+
+    - action: click
+      selector: "button.refresh-data"
+
+    - action: wait_for_element
+      selector: ".loading-spinner"
+      disappears: true
+      timeout: 10s
+
+    - action: screenshot
+      save_as: "dashboard-loaded.png"
+```
+
+### Electron Applications [LEVEL 2]
+
+**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones)
+
+**Supported Actions**:
+
+- `launch` - Start Electron app
+- `window_action` - Interact with windows (focus, minimize, close)
+- `menu_click` - Click application menu items
+- `dialog_action` - Handle native dialogs (open file, save, confirm)
+- `ipc_send` - Send IPC message to main process
+- `verify_window` - Check window state/properties
+- All web actions (since Electron uses Chromium)
+
+**Example** (see `examples/electron/single-window-basic.yaml`):
+
+```yaml
+scenario:
+  name: "Electron Single Window Test"
+  type: electron
+
+  steps:
+    - action: launch
+      target: "./dist/my-app"
+      wait_for_window: true
+      timeout: 10s
+
+    - action: verify_window
+      title: "My Application"
+      visible: true
+
+    - action: menu_click
+      path: ["File", "New Document"]
+
+    - action: wait_for_element
+      selector: ".document-editor"
+
+    - action: type
+      selector: ".document-editor"
+      value: "Hello from test"
+
+    - action: menu_click
+      path: ["File", "Save"]
+
+    - action: dialog_action
+      type: save_file
+      filename: "test-document.txt"
+
+    - action: verify_window
+      title_contains: "test-document.txt"
+```
+
+## Test Scenario Anatomy [LEVEL 2]
+
+### Metadata Section
+
+```yaml
+scenario:
+  name: "Clear descriptive name"
+  description: "Detailed explanation of what this test verifies"
+  type: cli | tui | web | electron
+
+  # Optional fields
+  tags: [smoke, regression, auth, payment]
+  priority: high | medium | low
+  timeout: 60s # Overall scenario timeout
+  retry_on_failure: 2 # Retry count
+
+  # Environment requirements
+  environment:
+    variables:
+      API_URL: "http://localhost:8080"
+      DEBUG: "true"
+    files:
+      - "./config.json must exist"
+```
+
+### Prerequisites
+
+Prerequisites are conditions that must be true before the test runs. The framework validates these before execution.
+
+```yaml
+prerequisites:
+  - "./application binary exists"
+  - "Port 8080 is available"
+  - "Database is running"
+  - "User account test@example.com exists"
+  - "File ./test-data.json exists"
+```
+
+If prerequisites fail, the test is skipped (not failed).
+
+### Steps
+
+Steps execute sequentially. Each step has:
+
+- **action**: Required - the action to perform
+- **Parameters**: Action-specific parameters
+- **description**: Optional - human-readable explanation
+- **timeout**: Optional - step-specific timeout
+- **continue_on_failure**: Optional - don't fail scenario if step fails
+
+```yaml
+steps:
+  # Simple action
+  - action: launch
+    target: "./app"
+
+  # Action with multiple parameters
+  - action: verify_output
+    contains: "Success"
+    timeout: 5s
+    description: "App should print success message"
+
+  # Continue even if this fails
+  - action: click
+    selector: ".optional-button"
+    continue_on_failure: true
+```
+
+### Verification Actions [LEVEL 1]
+
+Verification actions check expected outcomes. They fail the test if expectations aren't met.
+
+**Common Verifications**:
+
+```yaml
+# CLI: Check output contains text
+- action: verify_output
+  contains: "Expected text"
+
+# CLI: Check output matches regex
+- action: verify_output
+  matches: "Result: \\d+"
+
+# CLI: Check exit code
+- action: verify_exit_code
+  expected: 0
+
+# Web/TUI: Check element exists
+- action: verify_element
+  selector: ".success-message"
+
+# Web/TUI: Check element contains text
+- action: verify_element
+  selector: "h1"
+  contains: "Welcome"
+
+# Web: Check URL
+- action: verify_url
+  equals: "http://localhost:3000/dashboard"
+
+# Web: Check element count
+- action: verify_element
+  selector: ".list-item"
+  count: 5
+
+# Electron: Check window state
+- action: verify_window
+  title: "My App"
+  visible: true
+  focused: true
+```
+
+### Cleanup Section
+
+Cleanup runs after all steps complete (success or failure). Use for teardown actions.
+
+```yaml
+cleanup:
+  - action: stop_application
+    force: true
+
+  - action: delete_file
+    path: "./temp-test-data.json"
+
+  - action: reset_database
+    connection: "test_db"
+```
+
+## Advanced Patterns [LEVEL 2]
+
+### Conditional Logic
+
+Execute steps based on conditions:
+
+```yaml
+steps:
+  - action: launch
+    target: "./app"
+
+  - action: verify_output
+    contains: "Login required"
+    id: login_check
+
+  # Only run if login_check passed
+  - action: send_input
+    value: "login admin password123\n"
+    condition: login_check.passed
+```
+
+### Variables and Templating [LEVEL 2]
+
+Define variables and use them throughout the scenario:
+
+```yaml
+scenario:
+  name: "Test with Variables"
+  type: cli
+
+  variables:
+    username: "testuser"
+    api_url: "http://localhost:8080"
+
+  steps:
+    - action: launch
+      target: "./app"
+      args: ["--api", "${api_url}"]
+
+    - action: send_input
+      value: "login ${username}\n"
+
+    - action: verify_output
+      contains: "Welcome, ${username}!"
+```
+
+### Loops and Repetition [LEVEL 2]
+
+Repeat actions multiple times:
+
+```yaml
+steps:
+  - action: launch
+    target: "./app"
+
+  # Repeat action N times
+  - action: send_keypress
+    value: "down"
+    times: 5
+
+  # Loop over list
+  - action: send_input
+    value: "${item}\n"
+    for_each:
+      - "apple"
+      - "banana"
+      - "cherry"
+```
+
+### Error Handling [LEVEL 2]
+
+Handle expected errors gracefully:
+
+```yaml
+steps:
+  - action: send_input
+    value: "invalid command\n"
+
+  # Verify error message appears
+  - action: verify_output
+    contains: "Error: Unknown command"
+    expected_failure: true
+
+  # App should still be running
+  - action: verify_running
+    expected: true
+```
+
+### Multi-Step Workflows [LEVEL 2]
+
+Complex scenarios with multiple phases:
+
+```yaml
+scenario:
+  name: "E-commerce Purchase Flow"
+  type: web
+
+  steps:
+    # Phase 1: Authentication
+    - action: navigate
+      url: "http://localhost:3000/login"
+
+    - action: type
+      selector: "#username"
+      value: "test@example.com"
+
+    - action: type
+      selector: "#password"
+      value: "password123"
+
+    - action: click
+      selector: "button[type=submit]"
+
+    - action: wait_for_url
+      contains: "/dashboard"
+
+    # Phase 2: Product Selection
+    - action: navigate
+      url: "http://localhost:3000/products"
+
+    - action: click
+      text: "Add to Cart"
+      nth: 1
+
+    - action: verify_element
+      selector: ".cart-badge"
+      contains: "1"
+
+    # Phase 3: Checkout
+    - action: click
+      selector: ".cart-icon"
+
+    - action: click
+      text: "Proceed to Checkout"
+
+    - action: fill_form
+      fields:
+        "#shipping-address": "123 Test St"
+        "#city": "Testville"
+        "#zip": "12345"
+
+    - action: click
+      selector: "#place-order"
+
+    - action: wait_for_element
+      selector: ".order-confirmation"
+      timeout: 10s
+
+    - action: verify_element
+      selector: ".order-number"
+      exists: true
+```
+
+## Level 3: Advanced Topics [LEVEL 3]
+
+### Custom Comprehension Agents
+
+The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic.
+
+**Default Comprehension Agent**:
+
+- Observes raw output (text, HTML, screenshots)
+- Applies general reasoning to verify expectations
+- Returns pass/fail with explanation
+
+**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`):
+
+```yaml
+scenario:
+  name: "Financial Dashboard Test with Custom Agent"
+  type: web
+
+  # Define custom comprehension logic
+  comprehension_agent:
+    model: "gpt-4"
+    system_prompt: |
+      You are a financial data validator. When verifying dashboard content:
+      1. All monetary values must use proper formatting ($1,234.56)
+      2. Percentages must include % symbol
+      3. Dates must be in MM/DD/YYYY format
+      4. Negative values must be red
+      5. Chart data must be logically consistent
+
+      Be strict about formatting and data consistency.
+
+    examples:
+      - input: "Total Revenue: 45000"
+        output: "FAIL - Missing currency symbol and comma separator"
+      - input: "Total Revenue: $45,000.00"
+        output: "PASS - Correctly formatted"
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000/financial-dashboard"
+
+    - action: verify_element
+      selector: ".revenue-widget"
+      use_custom_comprehension: true
+      description: "Revenue should be properly formatted"
+```
+
+### Visual Regression Testing [LEVEL 3]
+
+Compare screenshots against baseline images:
+
+```yaml
+scenario:
+  name: "Visual Regression - Homepage"
+  type: web
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000"
+
+    - action: wait_for_element
+      selector: ".page-loaded"
+
+    - action: screenshot
+      save_as: "homepage.png"
+
+    - action: visual_compare
+      screenshot: "homepage.png"
+      baseline: "./baselines/homepage-baseline.png"
+      threshold: 0.05 # 5% difference allowed
+      highlight_differences: true
+```
+
+### Performance Validation [LEVEL 3]
+
+Measure and validate performance metrics:
+
+```yaml
+scenario:
+  name: "Performance - Dashboard Load Time"
+  type: web
+
+  performance:
+    metrics:
+      - page_load_time
+      - first_contentful_paint
+      - time_to_interactive
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000/dashboard"
+      measure_timing: true
+
+    - action: verify_performance
+      metric: page_load_time
+      less_than: 3000 # 3 seconds
+
+    - action: verify_performance
+      metric: first_contentful_paint
+      less_than: 1500 # 1.5 seconds
+```
+
+### Multi-Window Coordination (Electron) [LEVEL 3]
+
+Test applications with multiple windows:
+
+```yaml
+scenario:
+  name: "Multi-Window Chat Application"
+  type: electron
+
+  steps:
+    - action: launch
+      target: "./chat-app"
+
+    - action: menu_click
+      path: ["Window", "New Chat"]
+
+    - action: verify_window
+      count: 2
+
+    - action: window_action
+      window: 1
+      action: focus
+
+    - action: type
+      selector: ".message-input"
+      value: "Hello from window 1"
+
+    - action: click
+      selector: ".send-button"
+
+    - action: window_action
+      window: 2
+      action: focus
+
+    - action: wait_for_element
+      selector: ".message"
+      contains: "Hello from window 1"
+      timeout: 5s
+```
+
+### IPC Testing (Electron) [LEVEL 3]
+
+Test Inter-Process Communication between renderer and main:
+
+```yaml
+scenario:
+  name: "Electron IPC Communication"
+  type: electron
+
+  steps:
+    - action: launch
+      target: "./my-app"
+
+    - action: ipc_send
+      channel: "get-system-info"
+
+    - action: ipc_expect
+      channel: "system-info-reply"
+      timeout: 3s
+
+    - action: verify_ipc_payload
+      contains:
+        platform: "darwin"
+        arch: "x64"
+```
+
+### Custom Reporters [LEVEL 3]
+
+Generate custom test reports:
+
+```yaml
+scenario:
+  name: "Test with Custom Reporting"
+  type: cli
+
+  reporting:
+    format: custom
+    template: "./report-template.html"
+    include:
+      - screenshots
+      - logs
+      - timing_data
+      - video_recording
+
+    email:
+      enabled: true
+      recipients: ["team@example.com"]
+      on_failure_only: true
+
+  steps:
+    # ... test steps ...
+```
+
+## Framework Integration [LEVEL 2]
+
+### Running Tests
+
+**Single test**:
+
+```bash
+gadugi-test run test-scenario.yaml
+```
+
+**Multiple tests**:
+
+```bash
+gadugi-test run tests/*.yaml
+```
+
+**With options**:
+
+```bash
+gadugi-test run test.yaml \
+  --verbose \
+  --evidence-dir ./test-evidence \
+  --retry 2 \
+  --timeout 60s
+```
+
+### CI/CD Integration
+
+**GitHub Actions** (`.github/workflows/agentic-tests.yml`):
+
+```yaml
+name: Agentic Tests
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Install gadugi-agentic-test
+        run: npm install -g @gadugi/agentic-test
+
+      - name: Run tests
+        run: gadugi-test run tests/agentic/*.yaml
+
+      - name: Upload evidence
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: test-evidence
+          path: ./evidence/
+```
+
+### Evidence Collection
+
+The framework automatically collects evidence for debugging:
+
+```
+evidence/
+  scenario-name-20250116-093045/
+    ├── scenario.yaml          # Original test scenario
+    ├── execution-log.json     # Detailed execution log
+    ├── screenshots/           # All captured screenshots
+    │   ├── step-1.png
+    │   ├── step-3.png
+    │   └── step-5.png
+    ├── output-captures/       # CLI/TUI output
+    │   ├── stdout.txt
+    │   └── stderr.txt
+    ├── timing.json            # Performance metrics
+    └── report.html            # Human-readable report
+```
+
+## Best Practices [LEVEL 2]
+
+### 1. Start Simple, Add Complexity
+
+Begin with basic smoke tests, then add detail:
+
+```yaml
+# Level 1: Basic smoke test
+steps:
+  - action: launch
+    target: "./app"
+  - action: verify_output
+    contains: "Ready"
+
+# Level 2: Add interaction
+steps:
+  - action: launch
+    target: "./app"
+  - action: send_input
+    value: "command\n"
+  - action: verify_output
+    contains: "Success"
+
+# Level 3: Add error handling and edge cases
+steps:
+  - action: launch
+    target: "./app"
+  - action: send_input
+    value: "invalid\n"
+  - action: verify_output
+    contains: "Error"
+  - action: send_input
+    value: "command\n"
+  - action: verify_output
+    contains: "Success"
+```
+
+### 2. Use Descriptive Names and Descriptions
+
+```yaml
+# Bad
+scenario:
+  name: "Test 1"
+  steps:
+    - action: click
+      selector: "button"
+
+# Good
+scenario:
+  name: "User Login Flow - Valid Credentials"
+  description: "Verifies user can log in with valid email and password"
+  steps:
+    - action: click
+      selector: "button[type=submit]"
+      description: "Submit login form"
+```
+
+### 3. Verify Critical Paths Only
+
+Don't test every tiny detail. Focus on user-facing behavior:
+
+```yaml
+# Bad - Tests implementation details
+- action: verify_element
+  selector: ".internal-cache-status"
+  contains: "initialized"
+
+# Good - Tests user-visible behavior
+- action: verify_element
+  selector: ".welcome-message"
+  contains: "Welcome back"
+```
+
+### 4. Use Prerequisites for Test Dependencies
+
+```yaml
+scenario:
+  name: "User Profile Edit"
+
+  prerequisites:
+    - "User testuser@example.com exists"
+    - "User is logged in"
+    - "Database is seeded with test data"
+
+  steps:
+    # Test assumes prerequisites are met
+    - action: navigate
+      url: "/profile"
+```
+
+### 5. Keep Tests Independent
+
+Each test should set up its own state and clean up:
+
+```yaml
+scenario:
+  name: "Create Document"
+
+  steps:
+    # Create test user (don't assume exists)
+    - action: api_call
+      endpoint: "/api/users"
+      method: POST
+      data: { email: "test@example.com" }
+
+    # Run test
+    - action: navigate
+      url: "/documents/new"
+    # ... test steps ...
+
+  cleanup:
+    # Remove test user
+    - action: api_call
+      endpoint: "/api/users/test@example.com"
+      method: DELETE
+```
+
+### 6. Use Tags for Organization
+
+```yaml
+scenario:
+  name: "Critical Payment Flow"
+  tags: [smoke, critical, payment, e2e]
+  # Run with: gadugi-test run --tags critical
+```
+
+### 7. Add Timeouts Strategically
+
+```yaml
+steps:
+  # Quick operations - short timeout
+  - action: click
+    selector: "button"
+    timeout: 2s
+
+  # Network operations - longer timeout
+  - action: wait_for_element
+    selector: ".data-loaded"
+    timeout: 10s
+
+  # Complex operations - generous timeout
+  - action: verify_element
+    selector: ".report-generated"
+    timeout: 60s
+```
+
+## Testing Strategies [LEVEL 2]
+
+### Smoke Tests
+
+Minimal tests that verify critical functionality works:
+
+```yaml
+scenario:
+  name: "Smoke Test - Application Starts"
+  tags: [smoke]
+
+  steps:
+    - action: launch
+      target: "./app"
+    - action: verify_output
+      contains: "Ready"
+      timeout: 5s
+```
+
+Run before every commit: `gadugi-test run --tags smoke`
+
+### Happy Path Tests
+
+Test the ideal user journey:
+
+```yaml
+scenario:
+  name: "Happy Path - User Registration"
+
+  steps:
+    - action: navigate
+      url: "/register"
+    - action: type
+      selector: "#email"
+      value: "newuser@example.com"
+    - action: type
+      selector: "#password"
+      value: "SecurePass123!"
+    - action: click
+      selector: "button[type=submit]"
+    - action: wait_for_url
+      contains: "/welcome"
+```
+
+### Error Path Tests
+
+Verify error handling:
+
+```yaml
+scenario:
+  name: "Error Path - Invalid Login"
+
+  steps:
+    - action: navigate
+      url: "/login"
+    - action: type
+      selector: "#email"
+      value: "invalid@example.com"
+    - action: type
+      selector: "#password"
+      value: "wrongpassword"
+    - action: click
+      selector: "button[type=submit]"
+    - action: verify_element
+      selector: ".error-message"
+      contains: "Invalid credentials"
+```
+
+### Regression Tests
+
+Prevent bugs from reappearing:
+
+```yaml
+scenario:
+  name: "Regression - Issue #123 Password Reset"
+  tags: [regression, bug-123]
+  description: "Verifies password reset email is sent (was broken in v1.2)"
+
+  steps:
+    - action: navigate
+      url: "/forgot-password"
+    - action: type
+      selector: "#email"
+      value: "user@example.com"
+    - action: click
+      selector: "button[type=submit]"
+    - action: verify_element
+      selector: ".success-message"
+      contains: "Reset email sent"
+```
+
+## Philosophy Alignment [LEVEL 2]
+
+This skill follows amplihack's core principles:
+
+### Ruthless Simplicity
+
+- **YAML over code**: Declarative tests are simpler than programmatic tests
+- **No implementation details**: Tests describe WHAT, not HOW
+- **Minimal boilerplate**: Each test is focused and concise
+
+### Modular Design (Bricks & Studs)
+
+- **Self-contained scenarios**: Each YAML file is independent
+- **Clear contracts**: Steps have well-defined inputs/outputs
+- **Composable actions**: Reuse actions across different test types
+
+### Zero-BS Implementation
+
+- **No stubs**: Every example in this skill is a complete, runnable test
+- **Working defaults**: Tests run with minimal configuration
+- **Clear errors**: Framework provides actionable error messages
+
+### Outside-In Thinking
+
+- **User perspective**: Tests verify behavior users care about
+- **Implementation agnostic**: Refactoring doesn't break tests
+- **Behavior-driven**: Focus on outcomes, not internals
+
+## Common Pitfalls and Solutions [LEVEL 2]
+
+### Pitfall 1: Over-Specifying
+
+**Problem**: Test breaks when UI changes slightly
+
+```yaml
+# Bad - Too specific
+- action: verify_element
+  selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold"
+  contains: "Welcome"
+```
+
+**Solution**: Use flexible selectors
+
+```yaml
+# Good - Focused on behavior
+- action: verify_element
+  selector: ".welcome-message"
+  contains: "Welcome"
+```
+
+### Pitfall 2: Missing Waits
+
+**Problem**: Test fails intermittently due to timing
+
+```yaml
+# Bad - No wait for async operation
+- action: click
+  selector: ".load-data-button"
+- action: verify_element
+  selector: ".data-table" # May not exist yet!
+```
+
+**Solution**: Always wait for dynamic content
+
+```yaml
+# Good - Wait for element to appear
+- action: click
+  selector: ".load-data-button"
+- action: wait_for_element
+  selector: ".data-table"
+  timeout: 10s
+- action: verify_element
+  selector: ".data-table"
+```
+
+### Pitfall 3: Testing Implementation Details
+
+**Problem**: Test coupled to internal state
+
+```yaml
+# Bad - Tests internal cache state
+- action: verify_output
+  contains: "Cache hit ratio: 85%"
+```
+
+**Solution**: Test user-visible behavior
+
+```yaml
+# Good - Tests response time
+- action: verify_response_time
+  less_than: 100ms
+  description: "Fast response indicates caching works"
+```
+
+### Pitfall 4: Flaky Assertions
+
+**Problem**: Assertions depend on exact timing or formatting
+
+```yaml
+# Bad - Exact timestamp match will fail
+- action: verify_output
+  contains: "Created at: 2025-11-16 09:30:45"
+```
+
+**Solution**: Use flexible patterns
+
+```yaml
+# Good - Match pattern, not exact value
+- action: verify_output
+  matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}"
+```
+
+### Pitfall 5: Not Cleaning Up
+
+**Problem**: Tests leave artifacts that affect future runs
+
+```yaml
+# Bad - No cleanup
+steps:
+  - action: create_file
+    path: "./test-data.json"
+  - action: launch
+    target: "./app"
+```
+
+**Solution**: Always use cleanup section
+
+```yaml
+# Good - Cleanup ensures clean slate
+steps:
+  - action: create_file
+    path: "./test-data.json"
+  - action: launch
+    target: "./app"
+
+cleanup:
+  - action: delete_file
+    path: "./test-data.json"
+```
+
+## Example Library [LEVEL 1]
+
+This skill includes 15 complete working examples organized by application type and complexity level:
+
+### CLI Examples
+
+1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations
+2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery
+3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI
+
+### TUI Examples
+
+4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation
+5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation
+6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing
+
+### Web Examples
+
+7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification
+8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow
+9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing
+
+### Electron Examples
+
+10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test
+11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration
+12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions
+13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing
+
+### Custom Agent Examples
+
+14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic
+15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting
+
+See `examples/` directory for full example code with inline documentation.
+
+## Framework Freshness Check [LEVEL 3]
+
+This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists:
+
+```bash
+# Run the freshness check script
+python scripts/check-freshness.py
+
+# Output if outdated:
+# WARNING: Embedded framework version is 0.1.0
+# Latest GitHub version is 0.2.5
+#
+# New features in 0.2.5:
+# - Native Playwright support for web testing
+# - Video recording for all test types
+# - Parallel test execution
+#
+# Update with: npm update -g @gadugi/agentic-test
+```
+
+The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements.
+
+**When to Update This Skill**:
+
+- New framework version adds significant features
+- Breaking changes in YAML schema
+- New application types supported
+- Agent capabilities expand
+
+## Integration with Other Skills [LEVEL 2]
+
+### Works Well With
+
+**test-gap-analyzer**:
+
+- Use test-gap-analyzer to find untested functions
+- Write outside-in tests for critical user-facing paths
+- Use unit tests (from test-gap-analyzer) for internal functions
+
+**philosophy-guardian**:
+
+- Ensure test YAML follows ruthless simplicity
+- Verify tests focus on behavior, not implementation
+
+**pr-review-assistant**:
+
+- Include outside-in tests in PR reviews
+- Verify tests cover changed functionality
+- Check test readability and clarity
+
+**module-spec-generator**:
+
+- Generate module specs that include outside-in test scenarios
+- Use specs as templates for test YAML
+
+### Example Combined Workflow
+
+```bash
+# 1. Analyze coverage gaps
+claude "Use test-gap-analyzer on ./src"
+
+# 2. Write outside-in tests for critical paths
+claude "Use qa-team to create web tests for authentication"
+
+# 3. Verify philosophy compliance
+claude "Use philosophy-guardian to review new test files"
+
+# 4. Include in PR
+git add tests/agentic/
+git commit -m "Add outside-in tests for auth flow"
+```
+
+## Troubleshooting [LEVEL 2]
+
+### Test Times Out
+
+**Symptom**: Test exceeds timeout and fails
+
+**Causes**:
+
+- Application takes longer to start than expected
+- Network requests are slow
+- Element never appears (incorrect selector)
+
+**Solutions**:
+
+```yaml
+# Increase timeout
+- action: wait_for_element
+  selector: ".slow-loading-element"
+  timeout: 30s # Increase from default
+
+# Add intermediate verification
+- action: launch
+  target: "./app"
+- action: wait_for_output
+  contains: "Initializing..."
+  timeout: 5s
+- action: wait_for_output
+  contains: "Ready"
+  timeout: 20s
+```
+
+### Element Not Found
+
+**Symptom**: `verify_element` or `click` fails with "element not found"
+
+**Causes**:
+
+- Incorrect CSS selector
+- Element not yet rendered (timing issue)
+- Element in iframe or shadow DOM
+
+**Solutions**:
+
+```yaml
+# Add wait before interaction
+- action: wait_for_element
+  selector: ".target-element"
+  timeout: 10s
+- action: click
+  selector: ".target-element"
+
+# Use more specific selector
+- action: click
+  selector: "button[data-testid='submit-button']"
+
+# Handle iframe
+- action: switch_to_iframe
+  selector: "iframe#payment-frame"
+- action: click
+  selector: ".pay-now-button"
+```
+
+### Test Passes Locally, Fails in CI
+
+**Symptom**: Test works on dev machine but fails in CI environment
+
+**Causes**:
+
+- Different screen size (web/Electron)
+- Missing dependencies
+- Timing differences (slower CI machines)
+- Environment variable differences
+
+**Solutions**:
+
+```yaml
+# Set explicit viewport size (web/Electron)
+scenario:
+  environment:
+    viewport:
+      width: 1920
+      height: 1080
+
+# Add longer timeouts in CI
+- action: wait_for_element
+  selector: ".element"
+  timeout: 30s  # Generous for CI
+
+# Verify prerequisites
+prerequisites:
+  - "Chrome browser installed"
+  - "Environment variable API_KEY is set"
+```
+
+### Output Doesn't Match Expected
+
+**Symptom**: `verify_output` fails even though output looks correct
+
+**Causes**:
+
+- Extra whitespace or newlines
+- ANSI color codes in output
+- Case sensitivity
+
+**Solutions**:
+
+```yaml
+# Use flexible matching
+- action: verify_output
+  matches: "Result:\\s+Success" # Allow flexible whitespace
+
+# Strip ANSI codes
+- action: verify_output
+  contains: "Success"
+  strip_ansi: true
+
+# Case-insensitive match
+- action: verify_output
+  contains: "success"
+  case_sensitive: false
+```
+
+## Reference: Action Catalog [LEVEL 3]
+
+### CLI Actions
+
+| Action             | Parameters                       | Description                            |
+| ------------------ | -------------------------------- | -------------------------------------- |
+| `launch`           | `target`, `args`, `cwd`, `env`   | Start CLI application                  |
+| `send_input`       | `value`, `delay`                 | Send text to stdin                     |
+| `send_signal`      | `signal`                         | Send OS signal (SIGINT, SIGTERM, etc.) |
+| `wait_for_output`  | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr         |
+| `verify_output`    | `contains`, `matches`, `stream`  | Check output content                   |
+| `verify_exit_code` | `expected`                       | Validate exit code                     |
+| `capture_output`   | `save_as`, `stream`              | Save output to file                    |
+
+### TUI Actions
+
+| Action               | Parameters                        | Description              |
+| -------------------- | --------------------------------- | ------------------------ |
+| `launch`             | `target`, `args`, `terminal_size` | Start TUI application    |
+| `send_keypress`      | `value`, `times`, `modifiers`     | Send keyboard input      |
+| `wait_for_screen`    | `contains`, `timeout`             | Wait for text on screen  |
+| `verify_screen`      | `contains`, `matches`, `region`   | Check screen content     |
+| `capture_screenshot` | `save_as`                         | Save terminal screenshot |
+| `navigate_menu`      | `path`                            | Navigate menu structure  |
+| `fill_form`          | `fields`                          | Fill TUI form fields     |
+
+### Web Actions
+
+| Action             | Parameters                                | Description            |
+| ------------------ | ----------------------------------------- | ---------------------- |
+| `navigate`         | `url`, `wait_for_load`                    | Go to URL              |
+| `click`            | `selector`, `text`, `nth`                 | Click element          |
+| `type`             | `selector`, `value`, `delay`              | Type into input        |
+| `wait_for_element` | `selector`, `timeout`, `disappears`       | Wait for element       |
+| `verify_element`   | `selector`, `contains`, `count`, `exists` | Check element state    |
+| `verify_url`       | `equals`, `contains`, `matches`           | Validate URL           |
+| `screenshot`       | `save_as`, `selector`, `full_page`        | Capture screenshot     |
+| `scroll`           | `selector`, `direction`, `amount`         | Scroll page/element    |
+| `select_option`    | `selector`, `value`                       | Select dropdown option |
+| `checkbox`         | `selector`, `checked`                     | Check/uncheck checkbox |
+
+### Electron Actions
+
+| Action          | Parameters                             | Description                |
+| --------------- | -------------------------------------- | -------------------------- |
+| `launch`        | `target`, `args`, `wait_for_window`    | Start Electron app         |
+| `window_action` | `window`, `action`                     | Interact with windows      |
+| `menu_click`    | `path`                                 | Click menu items           |
+| `dialog_action` | `type`, `action`, `filename`           | Handle dialogs             |
+| `ipc_send`      | `channel`, `data`                      | Send IPC message           |
+| `ipc_expect`    | `channel`, `timeout`                   | Wait for IPC message       |
+| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state         |
+| All web actions |                                        | Electron includes Chromium |
+
+### Common Parameters
+
+| Parameter             | Type       | Description                          |
+| --------------------- | ---------- | ------------------------------------ |
+| `timeout`             | Duration   | Maximum wait time (e.g., "5s", "2m") |
+| `description`         | String     | Human-readable step explanation      |
+| `continue_on_failure` | Boolean    | Don't fail scenario if step fails    |
+| `id`                  | String     | Step identifier for conditionals     |
+| `condition`           | Expression | Execute step only if condition true  |
+
+## Quick Reference: YAML Template [LEVEL 1]
+
+```yaml
+scenario:
+  # Required fields
+  name: "Test Name"
+  description: "What this test verifies"
+  type: cli | tui | web | electron
+
+  # Optional metadata
+  tags: [smoke, critical]
+  timeout: 60s
+
+  # What must be true before running
+  prerequisites:
+    - "Condition 1"
+    - "Condition 2"
+
+  # Environment setup
+  environment:
+    variables:
+      VAR_NAME: "value"
+
+  # Variables for templating
+  variables:
+    username: "testuser"
+
+  # Test steps (executed in order)
+  steps:
+    - action: launch
+      target: "./app"
+
+    - action: send_input
+      value: "command\n"
+
+    - action: verify_output
+      contains: "Success"
+      timeout: 5s
+
+  # Cleanup (always runs)
+  cleanup:
+    - action: stop_application
+```
+
+## Skill Usage Examples [LEVEL 1]
+
+### Invoke This Skill
+
+```
+Claude, use the qa-team skill to create a CLI test for my calculator app.
+
+Claude, use qa-team to generate web tests for the login flow.
+
+Claude, create TUI tests using the qa-team skill for file navigation.
+```
+
+### What You'll Get
+
+1. **Complete YAML test scenario** matching your requirements
+2. **Inline documentation** explaining each section
+3. **Best practices applied** (timeouts, waits, verification)
+4. **Appropriate complexity level** (1, 2, or 3 based on needs)
+5. **Instructions for running** the test
+
+### Example Request and Response
+
+**User**: "Create a web test for user login with valid credentials"
+
+**Claude** (using this skill):
+
+```yaml
+scenario:
+  name: "User Login - Valid Credentials"
+  description: "Verifies user can successfully log in with valid email and password"
+  type: web
+  tags: [smoke, auth, critical]
+
+  prerequisites:
+    - "User test@example.com exists with password 'TestPass123!'"
+    - "Application is running on http://localhost:3000"
+
+  steps:
+    # Navigate to login page
+    - action: navigate
+      url: "http://localhost:3000/login"
+      description: "Open login page"
+
+    # Wait for page to fully load
+    - action: wait_for_element
+      selector: "form.login-form"
+      timeout: 5s
+      description: "Wait for login form to render"
+
+    # Fill in email field
+    - action: type
+      selector: "input[name='email']"
+      value: "test@example.com"
+      description: "Enter email address"
+
+    # Fill in password field
+    - action: type
+      selector: "input[name='password']"
+      value: "TestPass123!"
+      description: "Enter password"
+
+    # Submit form
+    - action: click
+      selector: "button[type='submit']"
+      description: "Click login button"
+
+    # Wait for redirect to dashboard
+    - action: wait_for_url
+      contains: "/dashboard"
+      timeout: 10s
+      description: "Should redirect to dashboard after successful login"
+
+    # Verify user is logged in
+    - action: verify_element
+      selector: ".user-profile"
+      contains: "test@example.com"
+      description: "User email should appear in profile widget"
+
+    # Capture evidence
+    - action: screenshot
+      save_as: "successful-login.png"
+      description: "Save screenshot of logged-in state"
+# To run this test:
+# 1. Save as test-login-valid.yaml
+# 2. Run: gadugi-test run test-login-valid.yaml
+# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/
+```
+
+## Related Resources [LEVEL 1]
+
+### Official Documentation
+
+- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test
+- **Framework Docs**: See repo README and docs/ folder
+- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356
+
+## Level 4: Shadow Environment Integration [LEVEL 4]
+
+Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments.
+
+### Why Use Shadow Environments for Testing
+
+1. **Clean State**: Fresh container, no host pollution
+2. **Local Changes**: Test uncommitted code exactly as-is
+3. **Multi-Repo**: Coordinate changes across multiple repos
+4. **CI Parity**: What shadow sees ≈ what CI will see
+
+### Shadow Testing Workflow
+
+For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests:
+
+#### Pattern 1: CLI Tests in Shadow (Amplifier)
+
+```python
+# Create shadow with your local library changes
+shadow.create(local_sources=["~/repos/my-lib:org/my-lib"])
+
+# Run outside-in test scenarios inside shadow
+shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml")
+
+# Extract evidence
+shadow.extract(shadow_id, "/evidence", "./test-evidence")
+
+# Cleanup
+shadow.destroy(shadow_id)
+```
+
+#### Pattern 2: CLI Tests in Shadow (Standalone)
+
+```bash
+# Create shadow with local changes
+amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test
+
+# Run your test scenarios
+amplifier-shadow exec test "gadugi-test run test-scenario.yaml"
+
+# Extract results
+amplifier-shadow extract test /evidence ./test-evidence
+
+# Cleanup
+amplifier-shadow destroy test
+```
+
+#### Pattern 3: Multi-Repo Integration Test
+
+```yaml
+# test-multi-repo.yaml
+scenario:
+  name: "Multi-Repo Integration Test"
+  type: cli
+
+  prerequisites:
+    - "Shadow environment with core-lib and cli-tool"
+
+  steps:
+    - action: launch
+      target: "cli-tool"
+
+    - action: send_input
+      value: "process --lib core-lib\n"
+
+    - action: verify_output
+      contains: "Success: Using core-lib"
+```
+
+```bash
+# Setup shadow with both repos
+amplifier-shadow create \
+  --local ~/repos/core-lib:org/core-lib \
+  --local ~/repos/cli-tool:org/cli-tool \
+  --name multi-test
+
+# Run test that exercises both
+amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml"
+```
+
+#### Pattern 4: Web App Testing in Shadow
+
+```yaml
+# test-web-app.yaml
+scenario:
+  name: "Web App with Local Library"
+  type: web
+
+  steps:
+    - action: navigate
+      url: "http://localhost:3000"
+
+    - action: click
+      selector: "button.process"
+
+    - action: verify_element
+      selector: ".result"
+      contains: "Processed with v2.0" # Your local version
+```
+
+```bash
+# Shadow with library changes
+amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test
+
+# Start web app inside shadow (uses your local lib)
+amplifier-shadow exec web-test "
+  cd /workspace &&
+  git clone https://github.com/org/web-app &&
+  cd web-app &&
+  npm install &&  # Pulls your local my-lib via git URL rewriting
+  npm start &
+"
+
+# Wait for app to start, then run tests
+amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml"
+```
+
+### Verification Best Practices
+
+When running tests in shadow, always verify your local sources are being used:
+
+```bash
+# After shadow.create, check snapshot commits
+shadow.status(shadow_id)
+# Shows: snapshot_commits: {"org/my-lib": "abc1234..."}
+
+# When your test installs dependencies, verify commit matches
+# Look in test output for: my-lib @ git+...@abc1234
+```
+
+### Complete Example: Library Change Validation
+
+```yaml
+# test-library-change.yaml - Outside-in test
+scenario:
+  name: "Validate Library Breaking Change"
+  type: cli
+  description: "Test that dependent app still works with new library API"
+
+  steps:
+    - action: launch
+      target: "/workspace/org/dependent-app/cli.py"
+
+    - action: send_input
+      value: "process data.json\n"
+
+    - action: verify_output
+      contains: "Processed successfully"
+      description: "New library API should still work"
+
+    - action: verify_exit_code
+      expected: 0
+```
+
+```bash
+# Complete workflow
+# 1. Create shadow with your breaking change
+amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test
+
+# 2. Install dependent app (pulls your local lib)
+amplifier-shadow exec breaking-test "
+  cd /workspace &&
+  git clone https://github.com/org/dependent-app &&
+  cd dependent-app &&
+  pip install -e . &&  # This installs git+https://github.com/org/my-lib (your local version)
+  echo 'Ready to test'
+"
+
+# 3. Run outside-in test
+amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml"
+
+# If test passes, your breaking change is compatible!
+# If test fails, you've caught the issue before pushing
+```
+
+### When to Use Shadow Integration
+
+Use shadow + outside-in tests when:
+
+- ✅ Testing library changes with dependent projects
+- ✅ Validating multi-repo coordinated changes
+- ✅ Need clean-state validation before pushing
+- ✅ Want to catch integration issues early
+- ✅ Testing that setup/install procedures work
+
+Don't use shadow for:
+
+- ❌ Simple unit tests (too much overhead)
+- ❌ Tests of already-committed code (shadow adds no value)
+- ❌ Performance testing (container overhead skews results)
+
+### Learn More
+
+For complete shadow environment documentation, including:
+
+- Shell scripts for DIY setup
+- Docker Compose examples
+- Multi-language support (Python, Node, Rust, Go)
+- Troubleshooting and verification techniques
+
+**Load the shadow-testing skill**:
+
+```
+Claude, use the shadow-testing skill to set up a shadow environment
+```
+
+Or for Amplifier users, the shadow tool is built-in:
+
+```python
+shadow.create(local_sources=["~/repos/lib:org/lib"])
+```
+
+---
+
+### Related Skills
+
+- **shadow-testing**: Complete shadow environment setup and usage
+- **test-gap-analyzer**: Find untested code paths
+- **philosophy-guardian**: Review test philosophy compliance
+- **pr-review-assistant**: Include tests in PR reviews
+- **module-spec-generator**: Generate specs with test scenarios
+
+### Further Reading
+
+- Outside-in vs inside-out testing approaches
+- Behavior-driven development (BDD) principles
+- AI-powered testing best practices
+- Test automation patterns
+- Shadow environment testing methodology
+
+## Changelog [LEVEL 3]
+
+### Version 1.1.0 (2026-01-29)
+
+- **NEW**: Level 4 - Shadow Environment Integration
+- Added complete shadow testing workflow patterns
+- Integration examples for Amplifier native and standalone CLI
+- Multi-repo integration test patterns
+- Web app testing in shadow environments
+- Complete workflow example for library change validation
+- References to shadow-testing skill for deep-dive documentation
+
+### Version 1.0.0 (2025-11-16)
+
+- Initial skill release
+- Support for CLI, TUI, Web, and Electron applications
+- 15 complete working examples
+- Progressive disclosure levels (1, 2, 3)
+- Embedded gadugi-agentic-test framework documentation (v0.1.0)
+- Freshness check script for version monitoring
+- Full integration with amplihack philosophy
+- Comprehensive troubleshooting guide
+- Action reference catalog
+
+---
+
+**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows.
+
+Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen.
diff --git a/docs/claude/skills/outside-in-testing/examples/cli/calculator-basic.yaml b/docs/claude/skills/qa-team/examples/cli/calculator-basic.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/cli/calculator-basic.yaml
rename to docs/claude/skills/qa-team/examples/cli/calculator-basic.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/cli/cli-error-handling.yaml b/docs/claude/skills/qa-team/examples/cli/cli-error-handling.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/cli/cli-error-handling.yaml
rename to docs/claude/skills/qa-team/examples/cli/cli-error-handling.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml b/docs/claude/skills/qa-team/examples/cli/cli-interactive-session.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml
rename to docs/claude/skills/qa-team/examples/cli/cli-interactive-session.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml b/docs/claude/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml
rename to docs/claude/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml b/docs/claude/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml
rename to docs/claude/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml b/docs/claude/skills/qa-team/examples/electron/electron-ipc-testing.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml
rename to docs/claude/skills/qa-team/examples/electron/electron-ipc-testing.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml b/docs/claude/skills/qa-team/examples/electron/electron-menu-testing.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml
rename to docs/claude/skills/qa-team/examples/electron/electron-menu-testing.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml b/docs/claude/skills/qa-team/examples/electron/multi-window-coordination.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml
rename to docs/claude/skills/qa-team/examples/electron/multi-window-coordination.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/electron/single-window-basic.yaml b/docs/claude/skills/qa-team/examples/electron/single-window-basic.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/electron/single-window-basic.yaml
rename to docs/claude/skills/qa-team/examples/electron/single-window-basic.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml b/docs/claude/skills/qa-team/examples/tui/file-manager-navigation.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml
rename to docs/claude/skills/qa-team/examples/tui/file-manager-navigation.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/tui/tui-form-validation.yaml b/docs/claude/skills/qa-team/examples/tui/tui-form-validation.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/tui/tui-form-validation.yaml
rename to docs/claude/skills/qa-team/examples/tui/tui-form-validation.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml b/docs/claude/skills/qa-team/examples/tui/tui-performance-monitoring.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml
rename to docs/claude/skills/qa-team/examples/tui/tui-performance-monitoring.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml b/docs/claude/skills/qa-team/examples/web/dashboard-smoke-test.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml
rename to docs/claude/skills/qa-team/examples/web/dashboard-smoke-test.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/web/web-authentication-flow.yaml b/docs/claude/skills/qa-team/examples/web/web-authentication-flow.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/web/web-authentication-flow.yaml
rename to docs/claude/skills/qa-team/examples/web/web-authentication-flow.yaml
diff --git a/docs/claude/skills/outside-in-testing/examples/web/web-visual-regression.yaml b/docs/claude/skills/qa-team/examples/web/web-visual-regression.yaml
similarity index 100%
rename from docs/claude/skills/outside-in-testing/examples/web/web-visual-regression.yaml
rename to docs/claude/skills/qa-team/examples/web/web-visual-regression.yaml
diff --git a/docs/claude/skills/outside-in-testing/scripts/check-freshness.py b/docs/claude/skills/qa-team/scripts/check-freshness.py
similarity index 100%
rename from docs/claude/skills/outside-in-testing/scripts/check-freshness.py
rename to docs/claude/skills/qa-team/scripts/check-freshness.py
diff --git a/docs/claude/skills/outside-in-testing/tests/test_skill_examples.py b/docs/claude/skills/qa-team/tests/test_skill_examples.py
similarity index 98%
rename from docs/claude/skills/outside-in-testing/tests/test_skill_examples.py
rename to docs/claude/skills/qa-team/tests/test_skill_examples.py
index 5b57472e6..7ffeb669b 100644
--- a/docs/claude/skills/outside-in-testing/tests/test_skill_examples.py
+++ b/docs/claude/skills/qa-team/tests/test_skill_examples.py
@@ -1,5 +1,5 @@
 """
-Tests for outside-in-testing skill example YAML files.
+Tests for qa-team skill example YAML files.
 
 Validates that all example YAML files are:
 - Valid YAML syntax
@@ -276,6 +276,7 @@ def test_skill_has_yaml_frontmatter(self):
 
         # Check required frontmatter fields
         assert "name" in metadata, "Frontmatter missing 'name'"
+        assert metadata["name"] == "qa-team", "Frontmatter name should be 'qa-team'"
         assert "description" in metadata, "Frontmatter missing 'description'"
         assert "version" in metadata, "Frontmatter missing 'version'"
         assert "embedded_framework_version" in metadata, (
diff --git a/src/amplihack/known_skills.py b/src/amplihack/known_skills.py
index 76f2331c6..9971d7bf4 100644
--- a/src/amplihack/known_skills.py
+++ b/src/amplihack/known_skills.py
@@ -69,6 +69,7 @@
         "n-version-workflow",
         "novelist-analyst",
         "outside-in-testing",
+        "qa-team",
         "pdf",
         "philosopher-analyst",
         "philosophy-compliance-workflow",
diff --git a/tests/skills/test_qa_team_skill.py b/tests/skills/test_qa_team_skill.py
new file mode 100644
index 000000000..4aa1ce721
--- /dev/null
+++ b/tests/skills/test_qa_team_skill.py
@@ -0,0 +1,87 @@
+"""Regression tests for the qa-team skill rename and alias layout."""
+
+from pathlib import Path
+
+import yaml
+
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+SKILL_MIRRORS = [
+    REPO_ROOT / ".claude" / "skills",
+    REPO_ROOT / "amplifier-bundle" / "skills",
+    REPO_ROOT / "docs" / "claude" / "skills",
+]
+
+
+def load_frontmatter(path: Path) -> dict:
+    """Load YAML frontmatter from a markdown file."""
+    content = path.read_text(encoding="utf-8")
+    parts = content.split("---", 2)
+    assert len(parts) >= 3, f"{path} is missing closed YAML frontmatter"
+    metadata = yaml.safe_load(parts[1])
+    assert isinstance(metadata, dict), f"{path} frontmatter should parse to a mapping"
+    return metadata
+
+
+def test_known_skills_registry_includes_qa_team():
+    """The known-skills registry should expose qa-team as a first-class skill."""
+    known_skills = (REPO_ROOT / "src" / "amplihack" / "known_skills.py").read_text(
+        encoding="utf-8"
+    )
+    assert '"qa-team"' in known_skills
+    assert '"outside-in-testing"' in known_skills
+
+
+def test_bundle_registers_both_primary_and_alias():
+    """The bundle index should advertise both the new primary skill and alias."""
+    bundle = (REPO_ROOT / "amplifier-bundle" / "bundle.md").read_text(encoding="utf-8")
+    assert "outside-in-testing: { path: skills/outside-in-testing/SKILL.md }" in bundle
+    assert "qa-team: { path: skills/qa-team/SKILL.md }" in bundle
+
+
+def test_qa_team_primary_skill_is_present_in_all_mirrors():
+    """All shipped skill mirrors should contain qa-team with the new frontmatter name."""
+    for skills_dir in SKILL_MIRRORS:
+        skill_file = skills_dir / "qa-team" / "SKILL.md"
+        metadata = load_frontmatter(skill_file)
+        content = skill_file.read_text(encoding="utf-8")
+
+        assert metadata["name"] == "qa-team"
+        assert "--observable" in content
+        assert "--ssh-target" in content
+        assert "--shadow-mode" in content
+        assert "outside-in-testing" in content
+
+
+def test_outside_in_testing_alias_points_to_qa_team_in_all_mirrors():
+    """The legacy skill name should remain available as an alias that redirects to qa-team."""
+    for skills_dir in SKILL_MIRRORS:
+        alias_dir = skills_dir / "outside-in-testing"
+        alias_skill = alias_dir / "SKILL.md"
+        metadata = load_frontmatter(alias_skill)
+        content = alias_skill.read_text(encoding="utf-8")
+
+        assert metadata["name"] == "outside-in-testing"
+        assert "alias for `qa-team`" in content
+
+        for name in ["README.md", "examples", "scripts", "tests"]:
+            alias_path = alias_dir / name
+            assert alias_path.is_symlink(), f"{alias_path} should be a symlink"
+
+
+def test_workflow_and_profile_prefer_qa_team_name():
+    """Core workflow surfaces should now recommend qa-team for new invocations."""
+    default_workflow = (
+        REPO_ROOT / "amplifier-bundle" / "recipes" / "default-workflow.yaml"
+    ).read_text(encoding="utf-8")
+    coding_profile = (REPO_ROOT / ".claude" / "profiles" / "coding.yaml").read_text(
+        encoding="utf-8"
+    )
+    generator_skill = (
+        REPO_ROOT / ".claude" / "skills" / "e2e-outside-in-test-generator" / "SKILL.md"
+    ).read_text(encoding="utf-8")
+
+    assert 'Skill(skill="qa-team")' in default_workflow
+    assert "`outside-in-testing` remains an alias" in default_workflow
+    assert '- "qa-team"' in coding_profile
+    assert "qa-team (primary methodology validation" in generator_skill

From 3c62ee4501b8bf9acfd55ec98d97b81f8063002f Mon Sep 17 00:00:00 2001
From: Ubuntu
 <azureuser@devy.yb0a3bvkdghunmsjr4s3fnfhra.phxx.internal.cloudapp.net>
Date: Tue, 10 Mar 2026 15:08:03 +0000
Subject: [PATCH 2/4] Sync smart-test qa-team references

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 amplifier-bundle/skills/smart-test/README.md | 2 +-
 amplifier-bundle/skills/smart-test/SKILL.md  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/amplifier-bundle/skills/smart-test/README.md b/amplifier-bundle/skills/smart-test/README.md
index 868b90ac9..80e95d3a8 100644
--- a/amplifier-bundle/skills/smart-test/README.md
+++ b/amplifier-bundle/skills/smart-test/README.md
@@ -94,7 +94,7 @@ User: Rebuild test mapping cache
 ## Related Skills
 
 - `test-gap-analyzer`: Find untested code
-- `outside-in-testing`: Create E2E tests
+- `qa-team`: Create E2E and parity tests (`outside-in-testing` alias supported)
 - `pre-commit-diagnostic`: Fix hook failures
 
 ---
diff --git a/amplifier-bundle/skills/smart-test/SKILL.md b/amplifier-bundle/skills/smart-test/SKILL.md
index 5aa0126c6..17d54a568 100644
--- a/amplifier-bundle/skills/smart-test/SKILL.md
+++ b/amplifier-bundle/skills/smart-test/SKILL.md
@@ -28,7 +28,7 @@ invokes:
   - type: skill
     name: test-gap-analyzer
   - type: skill
-    name: outside-in-testing
+    name: qa-team
   - type: skill
     name: pre-commit-diagnostic
   - type: subagent
@@ -360,7 +360,7 @@ Works with existing pytest markers from pyproject.toml:
 ## Complementary Skills
 
 - **test-gap-analyzer**: Identifies missing tests
-- **outside-in-testing**: Creates E2E test scenarios
+- **qa-team**: Creates E2E and parity test scenarios (`outside-in-testing` alias supported)
 - **tester agent**: Writes new tests for gaps
 - **pre-commit-diagnostic**: Fixes pre-commit failures
 

From 2d30edd4b7fcc255d798fd01008274fa738ceb58 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Tue, 10 Mar 2026 15:16:08 +0000
Subject: [PATCH 3/4] [skip ci] chore: Auto-bump patch version

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 0de4dec46..701f70797 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,7 @@ backend-path = ["."]
 
 [project]
 name = "amplihack"
-version = "0.6.20"
+version = "0.6.21"
 description = "Amplifier bundle for agentic coding with comprehensive skills, recipes, and workflows"
 requires-python = ">=3.11"
 dependencies = [

From d6a43b34fb5bfacd62de0355e032e03a77101884 Mon Sep 17 00:00:00 2001
From: Ubuntu
 <azureuser@devy.yb0a3bvkdghunmsjr4s3fnfhra.phxx.internal.cloudapp.net>
Date: Tue, 10 Mar 2026 15:17:05 +0000
Subject: [PATCH 4/4] Sync bundle skill drift blockers

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 amplifier-bundle/skills/common/verification/verify_skill.py      | 1 -
 amplifier-bundle/skills/context-management/automation.py         | 1 -
 .../skills/pm-architect/scripts/generate_daily_status.py         | 1 -
 .../skills/pm-architect/scripts/generate_roadmap_review.py       | 1 -
 amplifier-bundle/skills/pm-architect/scripts/triage_pr.py        | 1 -
 5 files changed, 5 deletions(-)

diff --git a/amplifier-bundle/skills/common/verification/verify_skill.py b/amplifier-bundle/skills/common/verification/verify_skill.py
index a7d2db19e..50d5d100e 100755
--- a/amplifier-bundle/skills/common/verification/verify_skill.py
+++ b/amplifier-bundle/skills/common/verification/verify_skill.py
@@ -33,7 +33,6 @@ def check_python_package(package: str) -> tuple[bool, str]:
         version = getattr(mod, "__version__", "unknown")
         return True, f"Installed (v{version})"
     except ImportError:
-        print(f"WARNING: {package} not available", file=sys.stderr)
         return False, "Not installed"
 
 
diff --git a/amplifier-bundle/skills/context-management/automation.py b/amplifier-bundle/skills/context-management/automation.py
index 4ca6b5606..8ecc6f5c1 100644
--- a/amplifier-bundle/skills/context-management/automation.py
+++ b/amplifier-bundle/skills/context-management/automation.py
@@ -20,7 +20,6 @@
         TokenMonitor,
     )
 except ImportError:
-    print("WARNING: context_management not available", file=sys.stderr)
     # Fallback for when running from hooks
     from .context_extractor import ContextExtractor
     from .context_rehydrator import ContextRehydrator
diff --git a/amplifier-bundle/skills/pm-architect/scripts/generate_daily_status.py b/amplifier-bundle/skills/pm-architect/scripts/generate_daily_status.py
index 60f2b9ae4..ea7be0de4 100755
--- a/amplifier-bundle/skills/pm-architect/scripts/generate_daily_status.py
+++ b/amplifier-bundle/skills/pm-architect/scripts/generate_daily_status.py
@@ -21,7 +21,6 @@
 
     CLAUDE_SDK_AVAILABLE = True
 except ImportError:
-    print("WARNING: claude_agent_sdk not available", file=sys.stderr)
     CLAUDE_SDK_AVAILABLE = False
 
 
diff --git a/amplifier-bundle/skills/pm-architect/scripts/generate_roadmap_review.py b/amplifier-bundle/skills/pm-architect/scripts/generate_roadmap_review.py
index b1bb36e72..32323e414 100755
--- a/amplifier-bundle/skills/pm-architect/scripts/generate_roadmap_review.py
+++ b/amplifier-bundle/skills/pm-architect/scripts/generate_roadmap_review.py
@@ -21,7 +21,6 @@
 
     CLAUDE_SDK_AVAILABLE = True
 except ImportError:
-    print("WARNING: claude_agent_sdk not available", file=sys.stderr)
     CLAUDE_SDK_AVAILABLE = False
 
 
diff --git a/amplifier-bundle/skills/pm-architect/scripts/triage_pr.py b/amplifier-bundle/skills/pm-architect/scripts/triage_pr.py
index 2cd73d62c..8d25ce07f 100755
--- a/amplifier-bundle/skills/pm-architect/scripts/triage_pr.py
+++ b/amplifier-bundle/skills/pm-architect/scripts/triage_pr.py
@@ -21,7 +21,6 @@
 
     CLAUDE_SDK_AVAILABLE = True
 except ImportError:
-    print("WARNING: claude_agent_sdk not available", file=sys.stderr)
     CLAUDE_SDK_AVAILABLE = False