From 2839b798fccb1dd8c25158b4153b15416ab15329 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Mar 2026 14:23:42 +0000 Subject: [PATCH 1/4] Rename outside-in-testing skill to qa-team Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .claude/profiles/coding.yaml | 1 + .../e2e-outside-in-test-generator/SKILL.md | 4 +- .claude/skills/outside-in-testing/README.md | 766 +----- .claude/skills/outside-in-testing/SKILL.md | 2045 +--------------- .claude/skills/outside-in-testing/examples | 1 + .claude/skills/outside-in-testing/scripts | 1 + .claude/skills/outside-in-testing/tests | 1 + .claude/skills/qa-team/README.md | 794 +++++++ .claude/skills/qa-team/SKILL.md | 2100 +++++++++++++++++ .../examples/cli/calculator-basic.yaml | 0 .../examples/cli/cli-error-handling.yaml | 0 .../examples/cli/cli-interactive-session.yaml | 0 .../custom-comprehension-agent.yaml | 0 .../custom-reporter-integration.yaml | 0 .../electron/electron-ipc-testing.yaml | 0 .../electron/electron-menu-testing.yaml | 0 .../electron/multi-window-coordination.yaml | 0 .../electron/single-window-basic.yaml | 0 .../examples/tui/file-manager-navigation.yaml | 0 .../examples/tui/tui-form-validation.yaml | 0 .../tui/tui-performance-monitoring.yaml | 0 .../examples/web/dashboard-smoke-test.yaml | 0 .../examples/web/web-authentication-flow.yaml | 0 .../examples/web/web-visual-regression.yaml | 0 .../scripts/check-freshness.py | 0 .../tests/test_skill_examples.py | 3 +- .claude/skills/shadow-testing/README.md | 6 +- .claude/skills/shadow-testing/SKILL.md | 6 +- .claude/skills/smart-test/README.md | 2 +- .claude/skills/smart-test/SKILL.md | 4 +- .claude/workflow/DEFAULT_WORKFLOW.md | 2 +- amplifier-bundle/bundle.md | 1 + .../recipes/default-workflow.yaml | 14 +- .../recipes/smart-orchestrator.yaml | 6 +- .../skills/outside-in-testing/README.md | 766 +----- .../skills/outside-in-testing/SKILL.md | 2045 +--------------- .../skills/outside-in-testing/examples | 1 + .../skills/outside-in-testing/scripts | 1 + .../skills/outside-in-testing/tests | 1 + amplifier-bundle/skills/qa-team/README.md | 794 +++++++ amplifier-bundle/skills/qa-team/SKILL.md | 2100 +++++++++++++++++ .../examples/cli/calculator-basic.yaml | 0 .../examples/cli/cli-error-handling.yaml | 0 .../examples/cli/cli-interactive-session.yaml | 0 .../custom-comprehension-agent.yaml | 0 .../custom-reporter-integration.yaml | 0 .../electron/electron-ipc-testing.yaml | 0 .../electron/electron-menu-testing.yaml | 0 .../electron/multi-window-coordination.yaml | 0 .../electron/single-window-basic.yaml | 0 .../examples/tui/file-manager-navigation.yaml | 0 .../examples/tui/tui-form-validation.yaml | 0 .../tui/tui-performance-monitoring.yaml | 0 .../examples/web/dashboard-smoke-test.yaml | 0 .../examples/web/web-authentication-flow.yaml | 0 .../examples/web/web-visual-regression.yaml | 0 .../scripts/check-freshness.py | 0 .../tests/test_skill_examples.py | 3 +- .../skills/outside-in-testing/README.md | 766 +----- .../claude/skills/outside-in-testing/SKILL.md | 2045 +--------------- .../claude/skills/outside-in-testing/examples | 1 + docs/claude/skills/outside-in-testing/scripts | 1 + docs/claude/skills/outside-in-testing/tests | 1 + docs/claude/skills/qa-team/README.md | 794 +++++++ docs/claude/skills/qa-team/SKILL.md | 2100 +++++++++++++++++ .../examples/cli/calculator-basic.yaml | 0 .../examples/cli/cli-error-handling.yaml | 0 .../examples/cli/cli-interactive-session.yaml | 0 .../custom-comprehension-agent.yaml | 0 .../custom-reporter-integration.yaml | 0 .../electron/electron-ipc-testing.yaml | 0 .../electron/electron-menu-testing.yaml | 0 .../electron/multi-window-coordination.yaml | 0 .../electron/single-window-basic.yaml | 0 .../examples/tui/file-manager-navigation.yaml | 0 .../examples/tui/tui-form-validation.yaml | 0 .../tui/tui-performance-monitoring.yaml | 0 .../examples/web/dashboard-smoke-test.yaml | 0 .../examples/web/web-authentication-flow.yaml | 0 .../examples/web/web-visual-regression.yaml | 0 .../scripts/check-freshness.py | 0 .../tests/test_skill_examples.py | 3 +- src/amplihack/known_skills.py | 1 + tests/skills/test_qa_team_skill.py | 87 + 84 files changed, 8836 insertions(+), 8431 deletions(-) mode change 100644 => 120000 .claude/skills/outside-in-testing/README.md create mode 120000 .claude/skills/outside-in-testing/examples create mode 120000 .claude/skills/outside-in-testing/scripts create mode 120000 .claude/skills/outside-in-testing/tests create mode 100644 .claude/skills/qa-team/README.md create mode 100644 .claude/skills/qa-team/SKILL.md rename .claude/skills/{outside-in-testing => qa-team}/examples/cli/calculator-basic.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/cli/cli-error-handling.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/cli/cli-interactive-session.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-comprehension-agent.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-reporter-integration.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/electron/electron-ipc-testing.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/electron/electron-menu-testing.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/electron/multi-window-coordination.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/electron/single-window-basic.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/tui/file-manager-navigation.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/tui/tui-form-validation.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/tui/tui-performance-monitoring.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/web/dashboard-smoke-test.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/web/web-authentication-flow.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/examples/web/web-visual-regression.yaml (100%) rename .claude/skills/{outside-in-testing => qa-team}/scripts/check-freshness.py (100%) rename .claude/skills/{outside-in-testing => qa-team}/tests/test_skill_examples.py (98%) mode change 100644 => 120000 amplifier-bundle/skills/outside-in-testing/README.md create mode 120000 amplifier-bundle/skills/outside-in-testing/examples create mode 120000 amplifier-bundle/skills/outside-in-testing/scripts create mode 120000 amplifier-bundle/skills/outside-in-testing/tests create mode 100644 amplifier-bundle/skills/qa-team/README.md create mode 100644 amplifier-bundle/skills/qa-team/SKILL.md rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/cli/calculator-basic.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/cli/cli-error-handling.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/cli/cli-interactive-session.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-comprehension-agent.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-reporter-integration.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/electron/electron-ipc-testing.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/electron/electron-menu-testing.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/electron/multi-window-coordination.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/electron/single-window-basic.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/tui/file-manager-navigation.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/tui/tui-form-validation.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/tui/tui-performance-monitoring.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/web/dashboard-smoke-test.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/web/web-authentication-flow.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/examples/web/web-visual-regression.yaml (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/scripts/check-freshness.py (100%) rename amplifier-bundle/skills/{outside-in-testing => qa-team}/tests/test_skill_examples.py (98%) mode change 100644 => 120000 docs/claude/skills/outside-in-testing/README.md create mode 120000 docs/claude/skills/outside-in-testing/examples create mode 120000 docs/claude/skills/outside-in-testing/scripts create mode 120000 docs/claude/skills/outside-in-testing/tests create mode 100644 docs/claude/skills/qa-team/README.md create mode 100644 docs/claude/skills/qa-team/SKILL.md rename docs/claude/skills/{outside-in-testing => qa-team}/examples/cli/calculator-basic.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/cli/cli-error-handling.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/cli/cli-interactive-session.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-comprehension-agent.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/custom-agents/custom-reporter-integration.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/electron/electron-ipc-testing.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/electron/electron-menu-testing.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/electron/multi-window-coordination.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/electron/single-window-basic.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/tui/file-manager-navigation.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/tui/tui-form-validation.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/tui/tui-performance-monitoring.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/web/dashboard-smoke-test.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/web/web-authentication-flow.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/examples/web/web-visual-regression.yaml (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/scripts/check-freshness.py (100%) rename docs/claude/skills/{outside-in-testing => qa-team}/tests/test_skill_examples.py (98%) create mode 100644 tests/skills/test_qa_team_skill.py diff --git a/.claude/profiles/coding.yaml b/.claude/profiles/coding.yaml index ff67611fb..567953969 100644 --- a/.claude/profiles/coding.yaml +++ b/.claude/profiles/coding.yaml @@ -42,6 +42,7 @@ components: - "creative" - "research" include: + - "qa-team" - "outside-in-testing" - "design-patterns-expert" diff --git a/.claude/skills/e2e-outside-in-test-generator/SKILL.md b/.claude/skills/e2e-outside-in-test-generator/SKILL.md index 7204ab786..1f9a5893f 100644 --- a/.claude/skills/e2e-outside-in-test-generator/SKILL.md +++ b/.claude/skills/e2e-outside-in-test-generator/SKILL.md @@ -21,7 +21,7 @@ requires: [] invokes: - test-gap-analyzer (test coverage analysis) - shadow-testing (parallel test execution) - - outside-in-testing (methodology validation) + - qa-team (primary methodology validation; outside-in-testing remains an alias) output_location: e2e/ --- @@ -614,7 +614,7 @@ The skill: 3. Runs tests against both environments 4. Reports discrepancies -**With outside-in-testing methodology:** +**With qa-team methodology (formerly outside-in-testing):** The skill inherently follows outside-in testing: diff --git a/.claude/skills/outside-in-testing/README.md b/.claude/skills/outside-in-testing/README.md deleted file mode 100644 index 9eb17ee84..000000000 --- a/.claude/skills/outside-in-testing/README.md +++ /dev/null @@ -1,765 +0,0 @@ -# Outside-In Testing Skill - -## Overview - -The Outside-In Testing Skill helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate. - -**Key Benefits**: - -- Tests survive refactoring (implementation changes don't break tests) -- Readable by non-developers (declarative YAML format) -- Platform-agnostic (same structure for CLI, TUI, Web, Electron) -- AI-powered execution (agents handle complex interactions) -- Evidence-based validation (screenshots, logs, output captures) - -## What is Outside-In Testing? - -**Traditional Testing** (Inside-Out): - -```python -# Knows internal implementation -def test_user_service(): - service = UserService() - user = service.create_user("test@example.com") - assert user.id is not None - assert user.email == "test@example.com" - assert user.created_at <= datetime.now() # Internal state -``` - -**Outside-In Testing**: - -```yaml -# Only knows external behavior -scenario: - name: "User Registration" - type: web - steps: - - action: navigate - url: "/register" - - action: type - selector: "#email" - value: "test@example.com" - - action: click - selector: "button[type=submit]" - - action: verify_url - contains: "/welcome" -``` - -The outside-in test verifies the same functionality but: - -- Doesn't depend on internal classes (`UserService`) -- Doesn't check internal state (`created_at`, `id`) -- Tests from user's perspective (what they see and do) -- Remains valid even if implementation completely changes - -## When to Use This Skill - -### Perfect Scenarios - -1. **Smoke Testing** - Quickly verify critical paths work -2. **Acceptance Testing** - Validate features meet requirements -3. **Regression Testing** - Ensure changes don't break existing behavior -4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach -5. **Refactoring Safety** - Tests protect behavior during rewrites -6. **Documentation as Tests** - YAML doubles as executable specifications - -### Complementary to Unit Tests - -Outside-in tests work best alongside unit tests: - -- **Unit Tests** (60%): Internal logic, edge cases, error handling -- **Integration Tests** (30%): Component interactions, API contracts -- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths - -Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation. - -## Quick Start - -### 1. Install Framework - -**Option A: From GitHub (Recommended - Latest)** - -```bash -# Install globally -npm install -g github:rysweet/gadugi-agentic-test - -# Or use with npx -npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml - -# Or clone and build -git clone https://github.com/rysweet/gadugi-agentic-test -cd gadugi-agentic-test -npm install -npm run build -node dist/cli.js run scenarios/your-test.yaml -``` - -**Option B: From npm (when published)** - -```bash -npm install -g gadugi-agentic-test -gadugi-test run test.yaml -``` - -**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below. - -### 2. Create Your First Test - -Save as `test-hello.yaml`: - -```yaml -scenario: - name: "Hello World Test" - description: "Verify application prints greeting" - type: cli - - steps: - - action: launch - target: "./hello-world" - - - action: verify_output - contains: "Hello, World!" - - - action: verify_exit_code - expected: 0 -``` - -### 3. Run the Test - -**If installed globally**: - -```bash -gadugi-test run test-hello.yaml -``` - -**If using from source**: - -```bash -cd /path/to/gadugi-agentic-test -node dist/cli.js run /path/to/test-hello.yaml -``` - -**Run all tests in directory**: - -```bash -node dist/cli.js run -d ./my-test-scenarios -``` - -### 4. Review Results - -The framework generates evidence in `./evidence/`: - -- Execution logs -- Output captures -- Screenshots (for TUI/Web/Electron) -- Timing data -- HTML report - -## Supported Application Types - -### CLI (Command-Line Interface) - -Test command-line tools, scripts, and utilities: - -```yaml -scenario: - name: "Git Status Test" - type: cli - steps: - - action: launch - target: "git" - args: ["status"] - - action: verify_output - contains: "On branch" -``` - -**Common Use Cases**: - -- Package managers (npm, pip, cargo) -- Build tools (make, gradle, webpack) -- DevOps tools (docker, kubectl, terraform) -- Custom CLI applications - -### TUI (Terminal User Interface) - -Test interactive terminal applications: - -```yaml -scenario: - name: "TUI Navigation" - type: tui - steps: - - action: launch - target: "./file-manager" - - action: send_keypress - value: "down" - times: 3 - - action: verify_screen - contains: "> documents/" -``` - -**Common Use Cases**: - -- System monitors (htop, top) -- Text editors (vim, nano) -- File managers (ranger, midnight commander) -- Custom TUI dashboards - -### Web Applications - -Test browser-based applications: - -```yaml -scenario: - name: "Web Dashboard Test" - type: web - steps: - - action: navigate - url: "http://localhost:3000" - - action: verify_element - selector: "h1" - contains: "Dashboard" -``` - -**Common Use Cases**: - -- SPAs (React, Vue, Angular apps) -- Admin panels -- E-commerce sites -- SaaS applications - -### Electron Applications - -Test desktop apps built with Electron: - -```yaml -scenario: - name: "Desktop App Test" - type: electron - steps: - - action: launch - target: "./dist/my-app" - - action: verify_window - title: "My Application" -``` - -**Common Use Cases**: - -- Code editors (VS Code-like apps) -- Chat applications (Slack, Discord clones) -- Productivity tools -- Custom desktop applications - -## Progressive Learning Path - -The skill teaches testing in three levels: - -### Level 1: Fundamentals (Start Here) - -- Basic test structure (YAML anatomy) -- Single-action tests -- Simple verification -- Smoke tests - -**Examples**: - -- `examples/cli/calculator-basic.yaml` -- `examples/tui/file-manager-navigation.yaml` -- `examples/web/dashboard-smoke-test.yaml` -- `examples/electron/single-window-basic.yaml` - -### Level 2: Intermediate - -- Multi-step workflows -- Conditional logic -- Error handling -- Variables and templating - -**Examples**: - -- `examples/cli/cli-error-handling.yaml` -- `examples/tui/tui-form-validation.yaml` -- `examples/web/web-authentication-flow.yaml` -- `examples/electron/multi-window-coordination.yaml` - -### Level 3: Advanced - -- Custom comprehension agents -- Visual regression testing -- Performance validation -- IPC testing (Electron) - -**Examples**: - -- `examples/tui/tui-performance-monitoring.yaml` -- `examples/electron/electron-ipc-testing.yaml` -- `examples/custom-agents/custom-comprehension-agent.yaml` -- `examples/custom-agents/custom-reporter-integration.yaml` - -## Example Library - -This skill includes **15 complete working examples**: - -### CLI (3 examples) - -- Basic calculator operations [Level 1] -- Error handling and recovery [Level 2] -- Interactive session management [Level 2] - -### TUI (3 examples) - -- File manager navigation [Level 1] -- Form validation [Level 2] -- Performance monitoring [Level 3] - -### Web (3 examples) - -- Dashboard smoke test [Level 1] -- Authentication flow [Level 2] -- Visual regression [Level 2] - -### Electron (4 examples) - -- Single window basics [Level 1] -- Multi-window coordination [Level 2] -- Menu interactions [Level 2] -- IPC testing [Level 3] - -### Custom Agents (2 examples) - -- Domain-specific comprehension [Level 3] -- Custom reporting [Level 3] - -All examples include: - -- Complete working YAML -- Inline documentation -- Expected output -- Prerequisites -- Level indicators - -## Using This Skill in Claude - -### Invoke the Skill - -``` -Claude, use the outside-in-testing skill to create a CLI test for my calculator app. - -Claude, use outside-in-testing to generate web tests for user login. - -Claude, create Electron tests using outside-in-testing for my desktop app. -``` - -### What You'll Receive - -1. **Complete YAML test scenario** matching your requirements -2. **Inline comments** explaining each section -3. **Best practices** applied (timeouts, waits, verification) -4. **Appropriate complexity** (Level 1, 2, or 3 based on needs) -5. **Instructions** for running the test - -### Example Interaction - -**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard" - -**Claude** (using this skill): Generates a complete Level 2 YAML scenario with: - -- Navigation to login page -- Form filling (email, password) -- Submit button click -- URL verification (redirected to dashboard) -- Element verification (user profile visible) -- Screenshot capture -- Proper timeouts and waits - -## Integration with Amplihack Philosophy - -This skill embodies amplihack's core principles: - -### Ruthless Simplicity - -- Declarative YAML over complex code -- Minimal boilerplate -- Focus on behavior, not implementation - -### Modular Design (Bricks & Studs) - -- Self-contained test scenarios -- Clear action contracts -- Composable test steps - -### Zero-BS Implementation - -- No stubs or placeholders -- Every example is runnable -- Clear, actionable error messages - -### Outside-In Thinking - -- User perspective first -- Implementation-agnostic tests -- Behavior-driven validation - -## Best Practices - -### 1. Start Simple - -Begin with basic smoke tests, then add complexity: - -```yaml -# Level 1: Smoke test -steps: - - action: launch - target: "./app" - - action: verify_output - contains: "Ready" - -# Level 2: Add interaction -steps: - - action: launch - target: "./app" - - action: send_input - value: "command\n" - - action: verify_output - contains: "Success" -``` - -### 2. Use Descriptive Names - -```yaml -# Good -scenario: - name: "User Login - Valid Credentials" - description: "Verifies successful login with email and password" - -# Bad -scenario: - name: "Test 1" -``` - -### 3. Verify Critical Paths Only - -Don't test every detail. Focus on user-facing behavior: - -```yaml -# Good - User-visible behavior -- action: verify_element - selector: ".welcome-message" - contains: "Welcome back" - -# Bad - Implementation detail -- action: verify_element - selector: ".cache-status" - contains: "initialized" -``` - -### 4. Always Wait for Dynamic Content - -```yaml -# Good - Wait before verification -- action: click - selector: ".load-data" -- action: wait_for_element - selector: ".data-table" - timeout: 10s -- action: verify_element - selector: ".data-table" - -# Bad - May fail due to timing -- action: click - selector: ".load-data" -- action: verify_element - selector: ".data-table" # Might not exist yet! -``` - -### 5. Clean Up After Tests - -```yaml -steps: - # Test steps... - -cleanup: - - action: delete_file - path: "./test-data.json" - - action: stop_application -``` - -## Troubleshooting - -### Installation Issues - -**Problem**: `@types/node-pty` not found error - -**Solution**: This was fixed in gadugi-agentic-test. If you see this: - -```bash -# Update to latest version -npm install -g github:rysweet/gadugi-agentic-test - -# Or if you cloned, pull latest: -git pull origin main -npm install -npm run build -``` - -**Problem**: `tsc: command not found` when building - -**Solution**: TypeScript not installed - -```bash -npm install # Installs all dependencies including TypeScript -npm run build # Now will work -``` - -### Test Times Out - -**Problem**: Test exceeds timeout and fails - -**Solution**: Increase timeout for slow operations - -```yaml -- action: wait_for_element - selector: ".slow-loading-data" - timeout: 30s # Generous timeout -``` - -### Scenario Format Issues - -**Problem**: "Scenario must have a name" error - -**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`: - -```yaml -# WRONG (won't load) -scenario: - name: "My Test" - steps: [...] - -# RIGHT -name: "My Test" -description: "What this tests" -version: "1.0.0" -config: - timeout: 120000 -steps: [...] -``` - -### Element Not Found - -**Problem**: Cannot find element to interact with - -**Solutions**: - -1. Use `wait_for_element` before interaction -2. Verify selector is correct -3. Check if element is in iframe - -```yaml -- action: wait_for_element - selector: ".target" - timeout: 10s -- action: click - selector: ".target" -``` - -### Flaky Tests in CI - -**Problem**: Tests pass locally but fail in CI - -**Solutions**: - -1. Add longer timeouts for CI environments -2. Set explicit viewport sizes -3. Wait for application readiness - -```yaml -scenario: - environment: - viewport: - width: 1920 - height: 1080 - - steps: - - action: wait_for_element - selector: ".app-ready" - timeout: 30s # Generous for CI -``` - -## Framework Version Check - -This skill embeds gadugi-agentic-test version **0.1.0**. - -To check for newer versions: - -```bash -python scripts/check-freshness.py -``` - -The script compares the embedded version against the latest GitHub release and notifies you of new features. - -## Related Skills - -- **test-gap-analyzer**: Find untested code paths (unit test focus) -- **philosophy-guardian**: Review test philosophy compliance -- **pr-review-assistant**: Include tests in PR reviews -- **module-spec-generator**: Generate specs with test scenarios - -## Resources - -### Documentation - -- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation -- **Examples**: `examples/` - 15 complete working examples -- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test - -### Getting Help - -- Review examples in `examples/` directory -- Check `SKILL.md` for detailed explanations -- See troubleshooting section in `SKILL.md` -- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues - -## Quick Reference - -### Basic Test Template - -```yaml -scenario: - name: "Test Name" - description: "What this verifies" - type: cli | tui | web | electron - - prerequisites: - - "Condition 1" - - steps: - - action: launch - target: "./app" - - - action: verify_output - contains: "Expected" - - cleanup: - - action: stop_application -``` - -### Common Actions - -**CLI**: - -- `launch` - Start application -- `send_input` - Send text -- `verify_output` - Check output -- `verify_exit_code` - Validate exit code - -**TUI**: - -- `send_keypress` - Send keys -- `verify_screen` - Check screen -- `capture_screenshot` - Save screenshot - -**Web**: - -- `navigate` - Go to URL -- `click` - Click element -- `type` - Type text -- `verify_element` - Check element - -**Electron**: - -- `window_action` - Control windows -- `menu_click` - Click menus -- `dialog_action` - Handle dialogs -- All web actions - -## Success Stories - -Outside-in testing shines when: - -1. **Refactoring**: Change implementation without updating tests -2. **Collaboration**: Non-developers can read and understand tests -3. **Documentation**: Tests serve as executable specifications -4. **Regression Prevention**: Catch breaking changes in critical flows -5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron - -Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen. - ---- - -**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation. - -## Real-World Example: Testing amplihack Guide Agent - -Based on actual testing of amplihack's guide agent, here's a complete working example: - -### Scenario: Naive Student Learning Flow - -```yaml -name: "Guide Agent - Beginner First Question" -description: "Test how guide responds to complete beginner" -version: "1.0.0" - -config: - timeout: 180000 # 3 minutes for AI response - retries: 1 - parallel: false - -agents: - - name: "student-cli" - type: "system" - config: - shell: "bash" - cwd: "/tmp/test-student" - timeout: 180000 - capture_output: true - -steps: - - name: "Student asks: What is amplihack?" - agent: "student-cli" - action: "execute_command" - params: - command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100' - expect: - exit_code: 0 - stdout_contains: - - "amplihack" - - "AI" - timeout: 180000 - - - name: "Verify guide gives immediate action" - agent: "student-cli" - action: "execute_command" - params: - command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md" - expect: - exit_code: 0 - timeout: 5000 - -metadata: - tags: ["guide-agent", "beginner", "real-world"] - priority: "high" -``` - -### What This Tests - -1. **Installation via uvx** - Tests users can run without installing -2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works -3. **Beginner-friendly response** - Checks for immediate actionable command -4. **Interactive elements** - Looks for TRY IT prompts - -### Running This Test - -```bash -cd gadugi-agentic-test -node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose -``` - -### What We Learned - -**From testing amplihack guide agent**: - -- Long-running AI commands need 180s+ timeouts -- Testing in clean `/tmp` directory avoids state pollution -- Combining `uvx --from git+...` with gadugi tests unreleased branches -- Checking file content (guide.md) verifies features beyond just output -- Real-world tests exposed gaps (guide showing bash commands in REPL context) diff --git a/.claude/skills/outside-in-testing/README.md b/.claude/skills/outside-in-testing/README.md new file mode 120000 index 000000000..40402e77f --- /dev/null +++ b/.claude/skills/outside-in-testing/README.md @@ -0,0 +1 @@ +../qa-team/README.md \ No newline at end of file diff --git a/.claude/skills/outside-in-testing/SKILL.md b/.claude/skills/outside-in-testing/SKILL.md index 91526566b..891b6fb02 100644 --- a/.claude/skills/outside-in-testing/SKILL.md +++ b/.claude/skills/outside-in-testing/SKILL.md @@ -1,2045 +1,16 @@ --- name: outside-in-testing description: | - Generates agentic outside-in tests using gadugi-agentic-test framework for CLI, TUI, Web, and Electron apps. - Use when you need behavior-driven tests that verify external interfaces without internal implementation knowledge. - Creates YAML test scenarios that AI agents execute, observe, and validate against expected outcomes. - Supports progressive complexity from simple smoke tests to advanced multi-step workflows. -version: 1.0.0 -embedded_framework_version: 0.1.0 -github_repo: https://github.com/rysweet/gadugi-agentic-test -issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 + Deprecated compatibility alias for qa-team. + Use when existing recipes or agents still invoke outside-in-testing by name. + Redirects future work to the qa-team skill for outside-in validation and parity loops. +version: 1.1.0 --- -# Outside-In Testing Skill +# outside-in-testing (Alias) -## Purpose [LEVEL 1] +`outside-in-testing` is now a compatibility alias for `qa-team`. -This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate. +Use `qa-team` for all new work. This alias remains so existing workflows, recipes, and skills that still invoke `outside-in-testing` continue to resolve cleanly while the rename propagates. -**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details. - -## When to Use This Skill [LEVEL 1] - -### Perfect For - -- **Smoke Tests**: Quick validation that critical user flows work -- **Behavior-Driven Testing**: Verify features from user perspective -- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron -- **Refactoring Safety**: Tests remain valid when implementation changes -- **AI-Powered Testing**: Let agents handle complex interactions -- **Documentation as Tests**: YAML scenarios double as executable specs - -### Use This Skill When - -- Starting a new project and defining expected behaviors -- Refactoring code and need tests that won't break with internal changes -- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps) -- Writing acceptance criteria that can be automatically verified -- Need tests that non-developers can read and understand -- Want to catch regressions in critical user workflows -- Testing complex multi-step interactions - -### Don't Use This Skill When - -- Need unit tests for internal functions (use test-gap-analyzer instead) -- Testing performance or load characteristics -- Need precise timing or concurrency control -- Testing non-interactive batch processes -- Implementation details matter more than behavior - -## Core Concepts [LEVEL 1] - -### Outside-In Testing Philosophy - -**Traditional Inside-Out Testing**: - -```python -# Tightly coupled to implementation -def test_calculator_add(): - calc = Calculator() - result = calc.add(2, 3) - assert result == 5 - assert calc.history == [(2, 3, 5)] # Knows internal state -``` - -**Agentic Outside-In Testing**: - -```yaml -# Implementation-agnostic behavior verification -scenario: - name: "Calculator Addition" - steps: - - action: launch - target: "./calculator" - - action: send_input - value: "add 2 3" - - action: verify_output - contains: "Result: 5" -``` - -**Benefits**: - -- Tests survive refactoring (internal changes don't break tests) -- Readable by non-developers (YAML is declarative) -- Platform-agnostic (same structure for CLI/TUI/Web/Electron) -- AI agents handle complexity (navigation, timing, screenshots) - -### The Gadugi Agentic Test Framework [LEVEL 2] - -Gadugi-agentic-test is a Python framework that: - -1. **Parses YAML test scenarios** with declarative steps -2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents) -3. **Executes actions** (launch, input, click, wait, verify) -4. **Collects evidence** (screenshots, logs, output captures) -5. **Validates outcomes** against expected results -6. **Generates reports** with evidence trails - -**Architecture**: - -``` -YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine - ↓ - [CLI Agent, TUI Agent, Web Agent, Electron Agent] - ↓ - Observers → Comprehension Agent - ↓ - Evidence Report -``` - -### Progressive Disclosure Levels [LEVEL 1] - -This skill teaches testing in three levels: - -- **Level 1: Fundamentals** - Basic single-action tests, simple verification -- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling -- **Level 3: Advanced** - Custom agents, visual regression, performance validation - -Each example is marked with its level. Start at Level 1 and progress as needed. - -## Quick Start [LEVEL 1] - -### Installation - -**Prerequisites (for native module compilation):** - -```bash -# macOS -xcode-select --install - -# Ubuntu/Debian -sudo apt-get install -y build-essential python3 - -# Windows: Install Visual Studio Build Tools with "Desktop development with C++" -``` - -**Install the framework:** - -```bash -# Install globally for CLI access -npm install -g @gadugi/agentic-test - -# Or install locally in your project -npm install @gadugi/agentic-test - -# Verify installation -gadugi-test --version -``` - -### Your First Test (CLI Example) - -Create `test-hello.yaml`: - -```yaml -scenario: - name: "Hello World CLI Test" - description: "Verify CLI prints greeting" - type: cli - - prerequisites: - - "./hello-world executable exists" - - steps: - - action: launch - target: "./hello-world" - - - action: verify_output - contains: "Hello, World!" - - - action: verify_exit_code - expected: 0 -``` - -Run the test: - -```bash -gadugi-test run test-hello.yaml -``` - -Output: - -``` -✓ Scenario: Hello World CLI Test - ✓ Step 1: Launched ./hello-world - ✓ Step 2: Output contains "Hello, World!" - ✓ Step 3: Exit code is 0 - -PASSED (3/3 steps successful) -Evidence saved to: ./evidence/test-hello-20250116-093045/ -``` - -### Understanding the YAML Structure [LEVEL 1] - -Every test scenario has this structure: - -```yaml -scenario: - name: "Descriptive test name" - description: "What this test verifies" - type: cli | tui | web | electron - - # Optional metadata - tags: [smoke, critical, auth] - timeout: 30s - - # What must be true before test runs - prerequisites: - - "Condition 1" - - "Condition 2" - - # The test steps (executed sequentially) - steps: - - action: action_name - parameter1: value1 - parameter2: value2 - - - action: verify_something - expected: value - - # Optional cleanup - cleanup: - - action: stop_application -``` - -## Application Types and Agents [LEVEL 2] - -### CLI Applications [LEVEL 1] - -**Use Case**: Command-line tools, scripts, build tools, package managers - -**Supported Actions**: - -- `launch` - Start the CLI program -- `send_input` - Send text or commands via stdin -- `send_signal` - Send OS signals (SIGINT, SIGTERM) -- `wait_for_output` - Wait for specific text in stdout/stderr -- `verify_output` - Check stdout/stderr contains/matches expected text -- `verify_exit_code` - Validate process exit code -- `capture_output` - Save output for later verification - -**Example** (see `examples/cli/calculator-basic.yaml`): - -```yaml -scenario: - name: "CLI Calculator Basic Operations" - type: cli - - steps: - - action: launch - target: "./calculator" - args: ["--mode", "interactive"] - - - action: send_input - value: "add 5 3\n" - - - action: verify_output - contains: "Result: 8" - timeout: 2s - - - action: send_input - value: "multiply 4 7\n" - - - action: verify_output - contains: "Result: 28" - - - action: send_input - value: "exit\n" - - - action: verify_exit_code - expected: 0 -``` - -### TUI Applications [LEVEL 1] - -**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs) - -**Supported Actions**: - -- `launch` - Start TUI application -- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.) -- `wait_for_screen` - Wait for specific text to appear on screen -- `verify_screen` - Check screen contents match expectations -- `capture_screenshot` - Save terminal screenshot (ANSI art) -- `navigate_menu` - Navigate menu structures -- `fill_form` - Fill TUI form fields - -**Example** (see `examples/tui/file-manager-navigation.yaml`): - -```yaml -scenario: - name: "TUI File Manager Navigation" - type: tui - - steps: - - action: launch - target: "./file-manager" - - - action: wait_for_screen - contains: "File Manager v1.0" - timeout: 3s - - - action: send_keypress - value: "down" - times: 3 - - - action: verify_screen - contains: "> documents/" - description: "Third item should be selected" - - - action: send_keypress - value: "enter" - - - action: wait_for_screen - contains: "documents/" - timeout: 2s - - - action: capture_screenshot - save_as: "documents-view.txt" -``` - -### Web Applications [LEVEL 1] - -**Use Case**: Web apps, dashboards, SPAs, admin panels - -**Supported Actions**: - -- `navigate` - Go to URL -- `click` - Click element by selector or text -- `type` - Type into input fields -- `wait_for_element` - Wait for element to appear -- `verify_element` - Check element exists/contains text -- `verify_url` - Validate current URL -- `screenshot` - Capture browser screenshot -- `scroll` - Scroll page or element - -**Example** (see `examples/web/dashboard-smoke-test.yaml`): - -```yaml -scenario: - name: "Dashboard Smoke Test" - type: web - - steps: - - action: navigate - url: "http://localhost:3000/dashboard" - - - action: wait_for_element - selector: "h1.dashboard-title" - timeout: 5s - - - action: verify_element - selector: "h1.dashboard-title" - contains: "Analytics Dashboard" - - - action: verify_element - selector: ".widget-stats" - count: 4 - description: "Should have 4 stat widgets" - - - action: click - selector: "button.refresh-data" - - - action: wait_for_element - selector: ".loading-spinner" - disappears: true - timeout: 10s - - - action: screenshot - save_as: "dashboard-loaded.png" -``` - -### Electron Applications [LEVEL 2] - -**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones) - -**Supported Actions**: - -- `launch` - Start Electron app -- `window_action` - Interact with windows (focus, minimize, close) -- `menu_click` - Click application menu items -- `dialog_action` - Handle native dialogs (open file, save, confirm) -- `ipc_send` - Send IPC message to main process -- `verify_window` - Check window state/properties -- All web actions (since Electron uses Chromium) - -**Example** (see `examples/electron/single-window-basic.yaml`): - -```yaml -scenario: - name: "Electron Single Window Test" - type: electron - - steps: - - action: launch - target: "./dist/my-app" - wait_for_window: true - timeout: 10s - - - action: verify_window - title: "My Application" - visible: true - - - action: menu_click - path: ["File", "New Document"] - - - action: wait_for_element - selector: ".document-editor" - - - action: type - selector: ".document-editor" - value: "Hello from test" - - - action: menu_click - path: ["File", "Save"] - - - action: dialog_action - type: save_file - filename: "test-document.txt" - - - action: verify_window - title_contains: "test-document.txt" -``` - -## Test Scenario Anatomy [LEVEL 2] - -### Metadata Section - -```yaml -scenario: - name: "Clear descriptive name" - description: "Detailed explanation of what this test verifies" - type: cli | tui | web | electron - - # Optional fields - tags: [smoke, regression, auth, payment] - priority: high | medium | low - timeout: 60s # Overall scenario timeout - retry_on_failure: 2 # Retry count - - # Environment requirements - environment: - variables: - API_URL: "http://localhost:8080" - DEBUG: "true" - files: - - "./config.json must exist" -``` - -### Prerequisites - -Prerequisites are conditions that must be true before the test runs. The framework validates these before execution. - -```yaml -prerequisites: - - "./application binary exists" - - "Port 8080 is available" - - "Database is running" - - "User account test@example.com exists" - - "File ./test-data.json exists" -``` - -If prerequisites fail, the test is skipped (not failed). - -### Steps - -Steps execute sequentially. Each step has: - -- **action**: Required - the action to perform -- **Parameters**: Action-specific parameters -- **description**: Optional - human-readable explanation -- **timeout**: Optional - step-specific timeout -- **continue_on_failure**: Optional - don't fail scenario if step fails - -```yaml -steps: - # Simple action - - action: launch - target: "./app" - - # Action with multiple parameters - - action: verify_output - contains: "Success" - timeout: 5s - description: "App should print success message" - - # Continue even if this fails - - action: click - selector: ".optional-button" - continue_on_failure: true -``` - -### Verification Actions [LEVEL 1] - -Verification actions check expected outcomes. They fail the test if expectations aren't met. - -**Common Verifications**: - -```yaml -# CLI: Check output contains text -- action: verify_output - contains: "Expected text" - -# CLI: Check output matches regex -- action: verify_output - matches: "Result: \\d+" - -# CLI: Check exit code -- action: verify_exit_code - expected: 0 - -# Web/TUI: Check element exists -- action: verify_element - selector: ".success-message" - -# Web/TUI: Check element contains text -- action: verify_element - selector: "h1" - contains: "Welcome" - -# Web: Check URL -- action: verify_url - equals: "http://localhost:3000/dashboard" - -# Web: Check element count -- action: verify_element - selector: ".list-item" - count: 5 - -# Electron: Check window state -- action: verify_window - title: "My App" - visible: true - focused: true -``` - -### Cleanup Section - -Cleanup runs after all steps complete (success or failure). Use for teardown actions. - -```yaml -cleanup: - - action: stop_application - force: true - - - action: delete_file - path: "./temp-test-data.json" - - - action: reset_database - connection: "test_db" -``` - -## Advanced Patterns [LEVEL 2] - -### Conditional Logic - -Execute steps based on conditions: - -```yaml -steps: - - action: launch - target: "./app" - - - action: verify_output - contains: "Login required" - id: login_check - - # Only run if login_check passed - - action: send_input - value: "login admin password123\n" - condition: login_check.passed -``` - -### Variables and Templating [LEVEL 2] - -Define variables and use them throughout the scenario: - -```yaml -scenario: - name: "Test with Variables" - type: cli - - variables: - username: "testuser" - api_url: "http://localhost:8080" - - steps: - - action: launch - target: "./app" - args: ["--api", "${api_url}"] - - - action: send_input - value: "login ${username}\n" - - - action: verify_output - contains: "Welcome, ${username}!" -``` - -### Loops and Repetition [LEVEL 2] - -Repeat actions multiple times: - -```yaml -steps: - - action: launch - target: "./app" - - # Repeat action N times - - action: send_keypress - value: "down" - times: 5 - - # Loop over list - - action: send_input - value: "${item}\n" - for_each: - - "apple" - - "banana" - - "cherry" -``` - -### Error Handling [LEVEL 2] - -Handle expected errors gracefully: - -```yaml -steps: - - action: send_input - value: "invalid command\n" - - # Verify error message appears - - action: verify_output - contains: "Error: Unknown command" - expected_failure: true - - # App should still be running - - action: verify_running - expected: true -``` - -### Multi-Step Workflows [LEVEL 2] - -Complex scenarios with multiple phases: - -```yaml -scenario: - name: "E-commerce Purchase Flow" - type: web - - steps: - # Phase 1: Authentication - - action: navigate - url: "http://localhost:3000/login" - - - action: type - selector: "#username" - value: "test@example.com" - - - action: type - selector: "#password" - value: "password123" - - - action: click - selector: "button[type=submit]" - - - action: wait_for_url - contains: "/dashboard" - - # Phase 2: Product Selection - - action: navigate - url: "http://localhost:3000/products" - - - action: click - text: "Add to Cart" - nth: 1 - - - action: verify_element - selector: ".cart-badge" - contains: "1" - - # Phase 3: Checkout - - action: click - selector: ".cart-icon" - - - action: click - text: "Proceed to Checkout" - - - action: fill_form - fields: - "#shipping-address": "123 Test St" - "#city": "Testville" - "#zip": "12345" - - - action: click - selector: "#place-order" - - - action: wait_for_element - selector: ".order-confirmation" - timeout: 10s - - - action: verify_element - selector: ".order-number" - exists: true -``` - -## Level 3: Advanced Topics [LEVEL 3] - -### Custom Comprehension Agents - -The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic. - -**Default Comprehension Agent**: - -- Observes raw output (text, HTML, screenshots) -- Applies general reasoning to verify expectations -- Returns pass/fail with explanation - -**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`): - -```yaml -scenario: - name: "Financial Dashboard Test with Custom Agent" - type: web - - # Define custom comprehension logic - comprehension_agent: - model: "gpt-4" - system_prompt: | - You are a financial data validator. When verifying dashboard content: - 1. All monetary values must use proper formatting ($1,234.56) - 2. Percentages must include % symbol - 3. Dates must be in MM/DD/YYYY format - 4. Negative values must be red - 5. Chart data must be logically consistent - - Be strict about formatting and data consistency. - - examples: - - input: "Total Revenue: 45000" - output: "FAIL - Missing currency symbol and comma separator" - - input: "Total Revenue: $45,000.00" - output: "PASS - Correctly formatted" - - steps: - - action: navigate - url: "http://localhost:3000/financial-dashboard" - - - action: verify_element - selector: ".revenue-widget" - use_custom_comprehension: true - description: "Revenue should be properly formatted" -``` - -### Visual Regression Testing [LEVEL 3] - -Compare screenshots against baseline images: - -```yaml -scenario: - name: "Visual Regression - Homepage" - type: web - - steps: - - action: navigate - url: "http://localhost:3000" - - - action: wait_for_element - selector: ".page-loaded" - - - action: screenshot - save_as: "homepage.png" - - - action: visual_compare - screenshot: "homepage.png" - baseline: "./baselines/homepage-baseline.png" - threshold: 0.05 # 5% difference allowed - highlight_differences: true -``` - -### Performance Validation [LEVEL 3] - -Measure and validate performance metrics: - -```yaml -scenario: - name: "Performance - Dashboard Load Time" - type: web - - performance: - metrics: - - page_load_time - - first_contentful_paint - - time_to_interactive - - steps: - - action: navigate - url: "http://localhost:3000/dashboard" - measure_timing: true - - - action: verify_performance - metric: page_load_time - less_than: 3000 # 3 seconds - - - action: verify_performance - metric: first_contentful_paint - less_than: 1500 # 1.5 seconds -``` - -### Multi-Window Coordination (Electron) [LEVEL 3] - -Test applications with multiple windows: - -```yaml -scenario: - name: "Multi-Window Chat Application" - type: electron - - steps: - - action: launch - target: "./chat-app" - - - action: menu_click - path: ["Window", "New Chat"] - - - action: verify_window - count: 2 - - - action: window_action - window: 1 - action: focus - - - action: type - selector: ".message-input" - value: "Hello from window 1" - - - action: click - selector: ".send-button" - - - action: window_action - window: 2 - action: focus - - - action: wait_for_element - selector: ".message" - contains: "Hello from window 1" - timeout: 5s -``` - -### IPC Testing (Electron) [LEVEL 3] - -Test Inter-Process Communication between renderer and main: - -```yaml -scenario: - name: "Electron IPC Communication" - type: electron - - steps: - - action: launch - target: "./my-app" - - - action: ipc_send - channel: "get-system-info" - - - action: ipc_expect - channel: "system-info-reply" - timeout: 3s - - - action: verify_ipc_payload - contains: - platform: "darwin" - arch: "x64" -``` - -### Custom Reporters [LEVEL 3] - -Generate custom test reports: - -```yaml -scenario: - name: "Test with Custom Reporting" - type: cli - - reporting: - format: custom - template: "./report-template.html" - include: - - screenshots - - logs - - timing_data - - video_recording - - email: - enabled: true - recipients: ["team@example.com"] - on_failure_only: true - - steps: - # ... test steps ... -``` - -## Framework Integration [LEVEL 2] - -### Running Tests - -**Single test**: - -```bash -gadugi-test run test-scenario.yaml -``` - -**Multiple tests**: - -```bash -gadugi-test run tests/*.yaml -``` - -**With options**: - -```bash -gadugi-test run test.yaml \ - --verbose \ - --evidence-dir ./test-evidence \ - --retry 2 \ - --timeout 60s -``` - -### CI/CD Integration - -**GitHub Actions** (`.github/workflows/agentic-tests.yml`): - -```yaml -name: Agentic Tests - -on: [push, pull_request] - -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Install gadugi-agentic-test - run: npm install -g @gadugi/agentic-test - - - name: Run tests - run: gadugi-test run tests/agentic/*.yaml - - - name: Upload evidence - if: always() - uses: actions/upload-artifact@v3 - with: - name: test-evidence - path: ./evidence/ -``` - -### Evidence Collection - -The framework automatically collects evidence for debugging: - -``` -evidence/ - scenario-name-20250116-093045/ - ├── scenario.yaml # Original test scenario - ├── execution-log.json # Detailed execution log - ├── screenshots/ # All captured screenshots - │ ├── step-1.png - │ ├── step-3.png - │ └── step-5.png - ├── output-captures/ # CLI/TUI output - │ ├── stdout.txt - │ └── stderr.txt - ├── timing.json # Performance metrics - └── report.html # Human-readable report -``` - -## Best Practices [LEVEL 2] - -### 1. Start Simple, Add Complexity - -Begin with basic smoke tests, then add detail: - -```yaml -# Level 1: Basic smoke test -steps: - - action: launch - target: "./app" - - action: verify_output - contains: "Ready" - -# Level 2: Add interaction -steps: - - action: launch - target: "./app" - - action: send_input - value: "command\n" - - action: verify_output - contains: "Success" - -# Level 3: Add error handling and edge cases -steps: - - action: launch - target: "./app" - - action: send_input - value: "invalid\n" - - action: verify_output - contains: "Error" - - action: send_input - value: "command\n" - - action: verify_output - contains: "Success" -``` - -### 2. Use Descriptive Names and Descriptions - -```yaml -# Bad -scenario: - name: "Test 1" - steps: - - action: click - selector: "button" - -# Good -scenario: - name: "User Login Flow - Valid Credentials" - description: "Verifies user can log in with valid email and password" - steps: - - action: click - selector: "button[type=submit]" - description: "Submit login form" -``` - -### 3. Verify Critical Paths Only - -Don't test every tiny detail. Focus on user-facing behavior: - -```yaml -# Bad - Tests implementation details -- action: verify_element - selector: ".internal-cache-status" - contains: "initialized" - -# Good - Tests user-visible behavior -- action: verify_element - selector: ".welcome-message" - contains: "Welcome back" -``` - -### 4. Use Prerequisites for Test Dependencies - -```yaml -scenario: - name: "User Profile Edit" - - prerequisites: - - "User testuser@example.com exists" - - "User is logged in" - - "Database is seeded with test data" - - steps: - # Test assumes prerequisites are met - - action: navigate - url: "/profile" -``` - -### 5. Keep Tests Independent - -Each test should set up its own state and clean up: - -```yaml -scenario: - name: "Create Document" - - steps: - # Create test user (don't assume exists) - - action: api_call - endpoint: "/api/users" - method: POST - data: { email: "test@example.com" } - - # Run test - - action: navigate - url: "/documents/new" - # ... test steps ... - - cleanup: - # Remove test user - - action: api_call - endpoint: "/api/users/test@example.com" - method: DELETE -``` - -### 6. Use Tags for Organization - -```yaml -scenario: - name: "Critical Payment Flow" - tags: [smoke, critical, payment, e2e] - # Run with: gadugi-test run --tags critical -``` - -### 7. Add Timeouts Strategically - -```yaml -steps: - # Quick operations - short timeout - - action: click - selector: "button" - timeout: 2s - - # Network operations - longer timeout - - action: wait_for_element - selector: ".data-loaded" - timeout: 10s - - # Complex operations - generous timeout - - action: verify_element - selector: ".report-generated" - timeout: 60s -``` - -## Testing Strategies [LEVEL 2] - -### Smoke Tests - -Minimal tests that verify critical functionality works: - -```yaml -scenario: - name: "Smoke Test - Application Starts" - tags: [smoke] - - steps: - - action: launch - target: "./app" - - action: verify_output - contains: "Ready" - timeout: 5s -``` - -Run before every commit: `gadugi-test run --tags smoke` - -### Happy Path Tests - -Test the ideal user journey: - -```yaml -scenario: - name: "Happy Path - User Registration" - - steps: - - action: navigate - url: "/register" - - action: type - selector: "#email" - value: "newuser@example.com" - - action: type - selector: "#password" - value: "SecurePass123!" - - action: click - selector: "button[type=submit]" - - action: wait_for_url - contains: "/welcome" -``` - -### Error Path Tests - -Verify error handling: - -```yaml -scenario: - name: "Error Path - Invalid Login" - - steps: - - action: navigate - url: "/login" - - action: type - selector: "#email" - value: "invalid@example.com" - - action: type - selector: "#password" - value: "wrongpassword" - - action: click - selector: "button[type=submit]" - - action: verify_element - selector: ".error-message" - contains: "Invalid credentials" -``` - -### Regression Tests - -Prevent bugs from reappearing: - -```yaml -scenario: - name: "Regression - Issue #123 Password Reset" - tags: [regression, bug-123] - description: "Verifies password reset email is sent (was broken in v1.2)" - - steps: - - action: navigate - url: "/forgot-password" - - action: type - selector: "#email" - value: "user@example.com" - - action: click - selector: "button[type=submit]" - - action: verify_element - selector: ".success-message" - contains: "Reset email sent" -``` - -## Philosophy Alignment [LEVEL 2] - -This skill follows amplihack's core principles: - -### Ruthless Simplicity - -- **YAML over code**: Declarative tests are simpler than programmatic tests -- **No implementation details**: Tests describe WHAT, not HOW -- **Minimal boilerplate**: Each test is focused and concise - -### Modular Design (Bricks & Studs) - -- **Self-contained scenarios**: Each YAML file is independent -- **Clear contracts**: Steps have well-defined inputs/outputs -- **Composable actions**: Reuse actions across different test types - -### Zero-BS Implementation - -- **No stubs**: Every example in this skill is a complete, runnable test -- **Working defaults**: Tests run with minimal configuration -- **Clear errors**: Framework provides actionable error messages - -### Outside-In Thinking - -- **User perspective**: Tests verify behavior users care about -- **Implementation agnostic**: Refactoring doesn't break tests -- **Behavior-driven**: Focus on outcomes, not internals - -## Common Pitfalls and Solutions [LEVEL 2] - -### Pitfall 1: Over-Specifying - -**Problem**: Test breaks when UI changes slightly - -```yaml -# Bad - Too specific -- action: verify_element - selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold" - contains: "Welcome" -``` - -**Solution**: Use flexible selectors - -```yaml -# Good - Focused on behavior -- action: verify_element - selector: ".welcome-message" - contains: "Welcome" -``` - -### Pitfall 2: Missing Waits - -**Problem**: Test fails intermittently due to timing - -```yaml -# Bad - No wait for async operation -- action: click - selector: ".load-data-button" -- action: verify_element - selector: ".data-table" # May not exist yet! -``` - -**Solution**: Always wait for dynamic content - -```yaml -# Good - Wait for element to appear -- action: click - selector: ".load-data-button" -- action: wait_for_element - selector: ".data-table" - timeout: 10s -- action: verify_element - selector: ".data-table" -``` - -### Pitfall 3: Testing Implementation Details - -**Problem**: Test coupled to internal state - -```yaml -# Bad - Tests internal cache state -- action: verify_output - contains: "Cache hit ratio: 85%" -``` - -**Solution**: Test user-visible behavior - -```yaml -# Good - Tests response time -- action: verify_response_time - less_than: 100ms - description: "Fast response indicates caching works" -``` - -### Pitfall 4: Flaky Assertions - -**Problem**: Assertions depend on exact timing or formatting - -```yaml -# Bad - Exact timestamp match will fail -- action: verify_output - contains: "Created at: 2025-11-16 09:30:45" -``` - -**Solution**: Use flexible patterns - -```yaml -# Good - Match pattern, not exact value -- action: verify_output - matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}" -``` - -### Pitfall 5: Not Cleaning Up - -**Problem**: Tests leave artifacts that affect future runs - -```yaml -# Bad - No cleanup -steps: - - action: create_file - path: "./test-data.json" - - action: launch - target: "./app" -``` - -**Solution**: Always use cleanup section - -```yaml -# Good - Cleanup ensures clean slate -steps: - - action: create_file - path: "./test-data.json" - - action: launch - target: "./app" - -cleanup: - - action: delete_file - path: "./test-data.json" -``` - -## Example Library [LEVEL 1] - -This skill includes 15 complete working examples organized by application type and complexity level: - -### CLI Examples - -1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations -2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery -3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI - -### TUI Examples - -4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation -5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation -6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing - -### Web Examples - -7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification -8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow -9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing - -### Electron Examples - -10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test -11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration -12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions -13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing - -### Custom Agent Examples - -14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic -15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting - -See `examples/` directory for full example code with inline documentation. - -## Framework Freshness Check [LEVEL 3] - -This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists: - -```bash -# Run the freshness check script -python scripts/check-freshness.py - -# Output if outdated: -# WARNING: Embedded framework version is 0.1.0 -# Latest GitHub version is 0.2.5 -# -# New features in 0.2.5: -# - Native Playwright support for web testing -# - Video recording for all test types -# - Parallel test execution -# -# Update with: npm update -g @gadugi/agentic-test -``` - -The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements. - -**When to Update This Skill**: - -- New framework version adds significant features -- Breaking changes in YAML schema -- New application types supported -- Agent capabilities expand - -## Integration with Other Skills [LEVEL 2] - -### Works Well With - -**test-gap-analyzer**: - -- Use test-gap-analyzer to find untested functions -- Write outside-in tests for critical user-facing paths -- Use unit tests (from test-gap-analyzer) for internal functions - -**philosophy-guardian**: - -- Ensure test YAML follows ruthless simplicity -- Verify tests focus on behavior, not implementation - -**pr-review-assistant**: - -- Include outside-in tests in PR reviews -- Verify tests cover changed functionality -- Check test readability and clarity - -**module-spec-generator**: - -- Generate module specs that include outside-in test scenarios -- Use specs as templates for test YAML - -### Example Combined Workflow - -```bash -# 1. Analyze coverage gaps -claude "Use test-gap-analyzer on ./src" - -# 2. Write outside-in tests for critical paths -claude "Use outside-in-testing to create web tests for authentication" - -# 3. Verify philosophy compliance -claude "Use philosophy-guardian to review new test files" - -# 4. Include in PR -git add tests/agentic/ -git commit -m "Add outside-in tests for auth flow" -``` - -## Troubleshooting [LEVEL 2] - -### Test Times Out - -**Symptom**: Test exceeds timeout and fails - -**Causes**: - -- Application takes longer to start than expected -- Network requests are slow -- Element never appears (incorrect selector) - -**Solutions**: - -```yaml -# Increase timeout -- action: wait_for_element - selector: ".slow-loading-element" - timeout: 30s # Increase from default - -# Add intermediate verification -- action: launch - target: "./app" -- action: wait_for_output - contains: "Initializing..." - timeout: 5s -- action: wait_for_output - contains: "Ready" - timeout: 20s -``` - -### Element Not Found - -**Symptom**: `verify_element` or `click` fails with "element not found" - -**Causes**: - -- Incorrect CSS selector -- Element not yet rendered (timing issue) -- Element in iframe or shadow DOM - -**Solutions**: - -```yaml -# Add wait before interaction -- action: wait_for_element - selector: ".target-element" - timeout: 10s -- action: click - selector: ".target-element" - -# Use more specific selector -- action: click - selector: "button[data-testid='submit-button']" - -# Handle iframe -- action: switch_to_iframe - selector: "iframe#payment-frame" -- action: click - selector: ".pay-now-button" -``` - -### Test Passes Locally, Fails in CI - -**Symptom**: Test works on dev machine but fails in CI environment - -**Causes**: - -- Different screen size (web/Electron) -- Missing dependencies -- Timing differences (slower CI machines) -- Environment variable differences - -**Solutions**: - -```yaml -# Set explicit viewport size (web/Electron) -scenario: - environment: - viewport: - width: 1920 - height: 1080 - -# Add longer timeouts in CI -- action: wait_for_element - selector: ".element" - timeout: 30s # Generous for CI - -# Verify prerequisites -prerequisites: - - "Chrome browser installed" - - "Environment variable API_KEY is set" -``` - -### Output Doesn't Match Expected - -**Symptom**: `verify_output` fails even though output looks correct - -**Causes**: - -- Extra whitespace or newlines -- ANSI color codes in output -- Case sensitivity - -**Solutions**: - -```yaml -# Use flexible matching -- action: verify_output - matches: "Result:\\s+Success" # Allow flexible whitespace - -# Strip ANSI codes -- action: verify_output - contains: "Success" - strip_ansi: true - -# Case-insensitive match -- action: verify_output - contains: "success" - case_sensitive: false -``` - -## Reference: Action Catalog [LEVEL 3] - -### CLI Actions - -| Action | Parameters | Description | -| ------------------ | -------------------------------- | -------------------------------------- | -| `launch` | `target`, `args`, `cwd`, `env` | Start CLI application | -| `send_input` | `value`, `delay` | Send text to stdin | -| `send_signal` | `signal` | Send OS signal (SIGINT, SIGTERM, etc.) | -| `wait_for_output` | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr | -| `verify_output` | `contains`, `matches`, `stream` | Check output content | -| `verify_exit_code` | `expected` | Validate exit code | -| `capture_output` | `save_as`, `stream` | Save output to file | - -### TUI Actions - -| Action | Parameters | Description | -| -------------------- | --------------------------------- | ------------------------ | -| `launch` | `target`, `args`, `terminal_size` | Start TUI application | -| `send_keypress` | `value`, `times`, `modifiers` | Send keyboard input | -| `wait_for_screen` | `contains`, `timeout` | Wait for text on screen | -| `verify_screen` | `contains`, `matches`, `region` | Check screen content | -| `capture_screenshot` | `save_as` | Save terminal screenshot | -| `navigate_menu` | `path` | Navigate menu structure | -| `fill_form` | `fields` | Fill TUI form fields | - -### Web Actions - -| Action | Parameters | Description | -| ------------------ | ----------------------------------------- | ---------------------- | -| `navigate` | `url`, `wait_for_load` | Go to URL | -| `click` | `selector`, `text`, `nth` | Click element | -| `type` | `selector`, `value`, `delay` | Type into input | -| `wait_for_element` | `selector`, `timeout`, `disappears` | Wait for element | -| `verify_element` | `selector`, `contains`, `count`, `exists` | Check element state | -| `verify_url` | `equals`, `contains`, `matches` | Validate URL | -| `screenshot` | `save_as`, `selector`, `full_page` | Capture screenshot | -| `scroll` | `selector`, `direction`, `amount` | Scroll page/element | -| `select_option` | `selector`, `value` | Select dropdown option | -| `checkbox` | `selector`, `checked` | Check/uncheck checkbox | - -### Electron Actions - -| Action | Parameters | Description | -| --------------- | -------------------------------------- | -------------------------- | -| `launch` | `target`, `args`, `wait_for_window` | Start Electron app | -| `window_action` | `window`, `action` | Interact with windows | -| `menu_click` | `path` | Click menu items | -| `dialog_action` | `type`, `action`, `filename` | Handle dialogs | -| `ipc_send` | `channel`, `data` | Send IPC message | -| `ipc_expect` | `channel`, `timeout` | Wait for IPC message | -| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state | -| All web actions | | Electron includes Chromium | - -### Common Parameters - -| Parameter | Type | Description | -| --------------------- | ---------- | ------------------------------------ | -| `timeout` | Duration | Maximum wait time (e.g., "5s", "2m") | -| `description` | String | Human-readable step explanation | -| `continue_on_failure` | Boolean | Don't fail scenario if step fails | -| `id` | String | Step identifier for conditionals | -| `condition` | Expression | Execute step only if condition true | - -## Quick Reference: YAML Template [LEVEL 1] - -```yaml -scenario: - # Required fields - name: "Test Name" - description: "What this test verifies" - type: cli | tui | web | electron - - # Optional metadata - tags: [smoke, critical] - timeout: 60s - - # What must be true before running - prerequisites: - - "Condition 1" - - "Condition 2" - - # Environment setup - environment: - variables: - VAR_NAME: "value" - - # Variables for templating - variables: - username: "testuser" - - # Test steps (executed in order) - steps: - - action: launch - target: "./app" - - - action: send_input - value: "command\n" - - - action: verify_output - contains: "Success" - timeout: 5s - - # Cleanup (always runs) - cleanup: - - action: stop_application -``` - -## Skill Usage Examples [LEVEL 1] - -### Invoke This Skill - -``` -Claude, use the outside-in-testing skill to create a CLI test for my calculator app. - -Claude, use outside-in-testing to generate web tests for the login flow. - -Claude, create TUI tests using the outside-in-testing skill for file navigation. -``` - -### What You'll Get - -1. **Complete YAML test scenario** matching your requirements -2. **Inline documentation** explaining each section -3. **Best practices applied** (timeouts, waits, verification) -4. **Appropriate complexity level** (1, 2, or 3 based on needs) -5. **Instructions for running** the test - -### Example Request and Response - -**User**: "Create a web test for user login with valid credentials" - -**Claude** (using this skill): - -```yaml -scenario: - name: "User Login - Valid Credentials" - description: "Verifies user can successfully log in with valid email and password" - type: web - tags: [smoke, auth, critical] - - prerequisites: - - "User test@example.com exists with password 'TestPass123!'" - - "Application is running on http://localhost:3000" - - steps: - # Navigate to login page - - action: navigate - url: "http://localhost:3000/login" - description: "Open login page" - - # Wait for page to fully load - - action: wait_for_element - selector: "form.login-form" - timeout: 5s - description: "Wait for login form to render" - - # Fill in email field - - action: type - selector: "input[name='email']" - value: "test@example.com" - description: "Enter email address" - - # Fill in password field - - action: type - selector: "input[name='password']" - value: "TestPass123!" - description: "Enter password" - - # Submit form - - action: click - selector: "button[type='submit']" - description: "Click login button" - - # Wait for redirect to dashboard - - action: wait_for_url - contains: "/dashboard" - timeout: 10s - description: "Should redirect to dashboard after successful login" - - # Verify user is logged in - - action: verify_element - selector: ".user-profile" - contains: "test@example.com" - description: "User email should appear in profile widget" - - # Capture evidence - - action: screenshot - save_as: "successful-login.png" - description: "Save screenshot of logged-in state" -# To run this test: -# 1. Save as test-login-valid.yaml -# 2. Run: gadugi-test run test-login-valid.yaml -# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/ -``` - -## Related Resources [LEVEL 1] - -### Official Documentation - -- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test -- **Framework Docs**: See repo README and docs/ folder -- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 - -## Level 4: Shadow Environment Integration [LEVEL 4] - -Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments. - -### Why Use Shadow Environments for Testing - -1. **Clean State**: Fresh container, no host pollution -2. **Local Changes**: Test uncommitted code exactly as-is -3. **Multi-Repo**: Coordinate changes across multiple repos -4. **CI Parity**: What shadow sees ≈ what CI will see - -### Shadow Testing Workflow - -For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests: - -#### Pattern 1: CLI Tests in Shadow (Amplifier) - -```python -# Create shadow with your local library changes -shadow.create(local_sources=["~/repos/my-lib:org/my-lib"]) - -# Run outside-in test scenarios inside shadow -shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml") - -# Extract evidence -shadow.extract(shadow_id, "/evidence", "./test-evidence") - -# Cleanup -shadow.destroy(shadow_id) -``` - -#### Pattern 2: CLI Tests in Shadow (Standalone) - -```bash -# Create shadow with local changes -amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test - -# Run your test scenarios -amplifier-shadow exec test "gadugi-test run test-scenario.yaml" - -# Extract results -amplifier-shadow extract test /evidence ./test-evidence - -# Cleanup -amplifier-shadow destroy test -``` - -#### Pattern 3: Multi-Repo Integration Test - -```yaml -# test-multi-repo.yaml -scenario: - name: "Multi-Repo Integration Test" - type: cli - - prerequisites: - - "Shadow environment with core-lib and cli-tool" - - steps: - - action: launch - target: "cli-tool" - - - action: send_input - value: "process --lib core-lib\n" - - - action: verify_output - contains: "Success: Using core-lib" -``` - -```bash -# Setup shadow with both repos -amplifier-shadow create \ - --local ~/repos/core-lib:org/core-lib \ - --local ~/repos/cli-tool:org/cli-tool \ - --name multi-test - -# Run test that exercises both -amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml" -``` - -#### Pattern 4: Web App Testing in Shadow - -```yaml -# test-web-app.yaml -scenario: - name: "Web App with Local Library" - type: web - - steps: - - action: navigate - url: "http://localhost:3000" - - - action: click - selector: "button.process" - - - action: verify_element - selector: ".result" - contains: "Processed with v2.0" # Your local version -``` - -```bash -# Shadow with library changes -amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test - -# Start web app inside shadow (uses your local lib) -amplifier-shadow exec web-test " - cd /workspace && - git clone https://github.com/org/web-app && - cd web-app && - npm install && # Pulls your local my-lib via git URL rewriting - npm start & -" - -# Wait for app to start, then run tests -amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml" -``` - -### Verification Best Practices - -When running tests in shadow, always verify your local sources are being used: - -```bash -# After shadow.create, check snapshot commits -shadow.status(shadow_id) -# Shows: snapshot_commits: {"org/my-lib": "abc1234..."} - -# When your test installs dependencies, verify commit matches -# Look in test output for: my-lib @ git+...@abc1234 -``` - -### Complete Example: Library Change Validation - -```yaml -# test-library-change.yaml - Outside-in test -scenario: - name: "Validate Library Breaking Change" - type: cli - description: "Test that dependent app still works with new library API" - - steps: - - action: launch - target: "/workspace/org/dependent-app/cli.py" - - - action: send_input - value: "process data.json\n" - - - action: verify_output - contains: "Processed successfully" - description: "New library API should still work" - - - action: verify_exit_code - expected: 0 -``` - -```bash -# Complete workflow -# 1. Create shadow with your breaking change -amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test - -# 2. Install dependent app (pulls your local lib) -amplifier-shadow exec breaking-test " - cd /workspace && - git clone https://github.com/org/dependent-app && - cd dependent-app && - pip install -e . && # This installs git+https://github.com/org/my-lib (your local version) - echo 'Ready to test' -" - -# 3. Run outside-in test -amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml" - -# If test passes, your breaking change is compatible! -# If test fails, you've caught the issue before pushing -``` - -### When to Use Shadow Integration - -Use shadow + outside-in tests when: - -- ✅ Testing library changes with dependent projects -- ✅ Validating multi-repo coordinated changes -- ✅ Need clean-state validation before pushing -- ✅ Want to catch integration issues early -- ✅ Testing that setup/install procedures work - -Don't use shadow for: - -- ❌ Simple unit tests (too much overhead) -- ❌ Tests of already-committed code (shadow adds no value) -- ❌ Performance testing (container overhead skews results) - -### Learn More - -For complete shadow environment documentation, including: - -- Shell scripts for DIY setup -- Docker Compose examples -- Multi-language support (Python, Node, Rust, Go) -- Troubleshooting and verification techniques - -**Load the shadow-testing skill**: - -``` -Claude, use the shadow-testing skill to set up a shadow environment -``` - -Or for Amplifier users, the shadow tool is built-in: - -```python -shadow.create(local_sources=["~/repos/lib:org/lib"]) -``` - ---- - -### Related Skills - -- **shadow-testing**: Complete shadow environment setup and usage -- **test-gap-analyzer**: Find untested code paths -- **philosophy-guardian**: Review test philosophy compliance -- **pr-review-assistant**: Include tests in PR reviews -- **module-spec-generator**: Generate specs with test scenarios - -### Further Reading - -- Outside-in vs inside-out testing approaches -- Behavior-driven development (BDD) principles -- AI-powered testing best practices -- Test automation patterns -- Shadow environment testing methodology - -## Changelog [LEVEL 3] - -### Version 1.1.0 (2026-01-29) - -- **NEW**: Level 4 - Shadow Environment Integration -- Added complete shadow testing workflow patterns -- Integration examples for Amplifier native and standalone CLI -- Multi-repo integration test patterns -- Web app testing in shadow environments -- Complete workflow example for library change validation -- References to shadow-testing skill for deep-dive documentation - -### Version 1.0.0 (2025-11-16) - -- Initial skill release -- Support for CLI, TUI, Web, and Electron applications -- 15 complete working examples -- Progressive disclosure levels (1, 2, 3) -- Embedded gadugi-agentic-test framework documentation (v0.1.0) -- Freshness check script for version monitoring -- Full integration with amplihack philosophy -- Comprehensive troubleshooting guide -- Action reference catalog - ---- - -**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows. - -Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen. +See `../qa-team/README.md` and `../qa-team/SKILL.md` for the primary documentation. diff --git a/.claude/skills/outside-in-testing/examples b/.claude/skills/outside-in-testing/examples new file mode 120000 index 000000000..68c765545 --- /dev/null +++ b/.claude/skills/outside-in-testing/examples @@ -0,0 +1 @@ +../qa-team/examples \ No newline at end of file diff --git a/.claude/skills/outside-in-testing/scripts b/.claude/skills/outside-in-testing/scripts new file mode 120000 index 000000000..ff9bde766 --- /dev/null +++ b/.claude/skills/outside-in-testing/scripts @@ -0,0 +1 @@ +../qa-team/scripts \ No newline at end of file diff --git a/.claude/skills/outside-in-testing/tests b/.claude/skills/outside-in-testing/tests new file mode 120000 index 000000000..371fb2568 --- /dev/null +++ b/.claude/skills/outside-in-testing/tests @@ -0,0 +1 @@ +../qa-team/tests \ No newline at end of file diff --git a/.claude/skills/qa-team/README.md b/.claude/skills/qa-team/README.md new file mode 100644 index 000000000..51e32504a --- /dev/null +++ b/.claude/skills/qa-team/README.md @@ -0,0 +1,794 @@ +# QA Team Skill + +## Overview + +QA Team is the renamed primary skill for outside-in validation. It helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation, and it now also covers side-by-side parity loops for legacy-vs-new or A-vs-B comparisons. + +**Key Benefits**: + +- Tests survive refactoring (implementation changes don't break tests) +- Readable by non-developers (declarative YAML format) +- Platform-agnostic (same structure for CLI, TUI, Web, Electron) +- AI-powered execution (agents handle complex interactions) +- Evidence-based validation (screenshots, logs, output captures) + +## What is Outside-In Testing? + +**Traditional Testing** (Inside-Out): + +```python +# Knows internal implementation +def test_user_service(): + service = UserService() + user = service.create_user("test@example.com") + assert user.id is not None + assert user.email == "test@example.com" + assert user.created_at <= datetime.now() # Internal state +``` + +**Outside-In Testing**: + +```yaml +# Only knows external behavior +scenario: + name: "User Registration" + type: web + steps: + - action: navigate + url: "/register" + - action: type + selector: "#email" + value: "test@example.com" + - action: click + selector: "button[type=submit]" + - action: verify_url + contains: "/welcome" +``` + +The outside-in test verifies the same functionality but: + +- Doesn't depend on internal classes (`UserService`) +- Doesn't check internal state (`created_at`, `id`) +- Tests from user's perspective (what they see and do) +- Remains valid even if implementation completely changes + +## When to Use This Skill + +### Perfect Scenarios + +1. **Smoke Testing** - Quickly verify critical paths work +2. **Acceptance Testing** - Validate features meet requirements +3. **Regression Testing** - Ensure changes don't break existing behavior +4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach +5. **Refactoring Safety** - Tests protect behavior during rewrites +6. **Documentation as Tests** - YAML doubles as executable specifications + +### Complementary to Unit Tests + +Outside-in tests work best alongside unit tests: + +- **Unit Tests** (60%): Internal logic, edge cases, error handling +- **Integration Tests** (30%): Component interactions, API contracts +- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths + +Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation. + +## Parity, Shadow, and A/B Comparison + +Use QA Team when you need more than a single implementation test: + +- compare legacy vs replacement behavior side by side +- run paired observable tmux sessions with `--observable` +- execute the same parity suite remotely with `--ssh-target azlin` +- log rollout divergences with `--shadow-mode --shadow-log ...` + +Example local parity command: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary +``` + +Example shadow-mode command: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary \ + --shadow-mode \ + --shadow-log /tmp/feature-shadow.jsonl +``` + +## Quick Start + +### 1. Install Framework + +**Option A: From GitHub (Recommended - Latest)** + +```bash +# Install globally +npm install -g github:rysweet/gadugi-agentic-test + +# Or use with npx +npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml + +# Or clone and build +git clone https://github.com/rysweet/gadugi-agentic-test +cd gadugi-agentic-test +npm install +npm run build +node dist/cli.js run scenarios/your-test.yaml +``` + +**Option B: From npm (when published)** + +```bash +npm install -g gadugi-agentic-test +gadugi-test run test.yaml +``` + +**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below. + +### 2. Create Your First Test + +Save as `test-hello.yaml`: + +```yaml +scenario: + name: "Hello World Test" + description: "Verify application prints greeting" + type: cli + + steps: + - action: launch + target: "./hello-world" + + - action: verify_output + contains: "Hello, World!" + + - action: verify_exit_code + expected: 0 +``` + +### 3. Run the Test + +**If installed globally**: + +```bash +gadugi-test run test-hello.yaml +``` + +**If using from source**: + +```bash +cd /path/to/gadugi-agentic-test +node dist/cli.js run /path/to/test-hello.yaml +``` + +**Run all tests in directory**: + +```bash +node dist/cli.js run -d ./my-test-scenarios +``` + +### 4. Review Results + +The framework generates evidence in `./evidence/`: + +- Execution logs +- Output captures +- Screenshots (for TUI/Web/Electron) +- Timing data +- HTML report + +## Supported Application Types + +### CLI (Command-Line Interface) + +Test command-line tools, scripts, and utilities: + +```yaml +scenario: + name: "Git Status Test" + type: cli + steps: + - action: launch + target: "git" + args: ["status"] + - action: verify_output + contains: "On branch" +``` + +**Common Use Cases**: + +- Package managers (npm, pip, cargo) +- Build tools (make, gradle, webpack) +- DevOps tools (docker, kubectl, terraform) +- Custom CLI applications + +### TUI (Terminal User Interface) + +Test interactive terminal applications: + +```yaml +scenario: + name: "TUI Navigation" + type: tui + steps: + - action: launch + target: "./file-manager" + - action: send_keypress + value: "down" + times: 3 + - action: verify_screen + contains: "> documents/" +``` + +**Common Use Cases**: + +- System monitors (htop, top) +- Text editors (vim, nano) +- File managers (ranger, midnight commander) +- Custom TUI dashboards + +### Web Applications + +Test browser-based applications: + +```yaml +scenario: + name: "Web Dashboard Test" + type: web + steps: + - action: navigate + url: "http://localhost:3000" + - action: verify_element + selector: "h1" + contains: "Dashboard" +``` + +**Common Use Cases**: + +- SPAs (React, Vue, Angular apps) +- Admin panels +- E-commerce sites +- SaaS applications + +### Electron Applications + +Test desktop apps built with Electron: + +```yaml +scenario: + name: "Desktop App Test" + type: electron + steps: + - action: launch + target: "./dist/my-app" + - action: verify_window + title: "My Application" +``` + +**Common Use Cases**: + +- Code editors (VS Code-like apps) +- Chat applications (Slack, Discord clones) +- Productivity tools +- Custom desktop applications + +## Progressive Learning Path + +The skill teaches testing in three levels: + +### Level 1: Fundamentals (Start Here) + +- Basic test structure (YAML anatomy) +- Single-action tests +- Simple verification +- Smoke tests + +**Examples**: + +- `examples/cli/calculator-basic.yaml` +- `examples/tui/file-manager-navigation.yaml` +- `examples/web/dashboard-smoke-test.yaml` +- `examples/electron/single-window-basic.yaml` + +### Level 2: Intermediate + +- Multi-step workflows +- Conditional logic +- Error handling +- Variables and templating + +**Examples**: + +- `examples/cli/cli-error-handling.yaml` +- `examples/tui/tui-form-validation.yaml` +- `examples/web/web-authentication-flow.yaml` +- `examples/electron/multi-window-coordination.yaml` + +### Level 3: Advanced + +- Custom comprehension agents +- Visual regression testing +- Performance validation +- IPC testing (Electron) + +**Examples**: + +- `examples/tui/tui-performance-monitoring.yaml` +- `examples/electron/electron-ipc-testing.yaml` +- `examples/custom-agents/custom-comprehension-agent.yaml` +- `examples/custom-agents/custom-reporter-integration.yaml` + +## Example Library + +This skill includes **15 complete working examples**: + +### CLI (3 examples) + +- Basic calculator operations [Level 1] +- Error handling and recovery [Level 2] +- Interactive session management [Level 2] + +### TUI (3 examples) + +- File manager navigation [Level 1] +- Form validation [Level 2] +- Performance monitoring [Level 3] + +### Web (3 examples) + +- Dashboard smoke test [Level 1] +- Authentication flow [Level 2] +- Visual regression [Level 2] + +### Electron (4 examples) + +- Single window basics [Level 1] +- Multi-window coordination [Level 2] +- Menu interactions [Level 2] +- IPC testing [Level 3] + +### Custom Agents (2 examples) + +- Domain-specific comprehension [Level 3] +- Custom reporting [Level 3] + +All examples include: + +- Complete working YAML +- Inline documentation +- Expected output +- Prerequisites +- Level indicators + +## Using This Skill in Claude + +### Invoke the Skill + +``` +Claude, use the qa-team skill to create a CLI test for my calculator app. + +Claude, use qa-team to generate web tests for user login. + +Claude, create Electron tests using qa-team for my desktop app. +``` + +### What You'll Receive + +1. **Complete YAML test scenario** matching your requirements +2. **Inline comments** explaining each section +3. **Best practices** applied (timeouts, waits, verification) +4. **Appropriate complexity** (Level 1, 2, or 3 based on needs) +5. **Instructions** for running the test + +### Example Interaction + +**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard" + +**Claude** (using this skill): Generates a complete Level 2 YAML scenario with: + +- Navigation to login page +- Form filling (email, password) +- Submit button click +- URL verification (redirected to dashboard) +- Element verification (user profile visible) +- Screenshot capture +- Proper timeouts and waits + +## Integration with Amplihack Philosophy + +This skill embodies amplihack's core principles: + +### Ruthless Simplicity + +- Declarative YAML over complex code +- Minimal boilerplate +- Focus on behavior, not implementation + +### Modular Design (Bricks & Studs) + +- Self-contained test scenarios +- Clear action contracts +- Composable test steps + +### Zero-BS Implementation + +- No stubs or placeholders +- Every example is runnable +- Clear, actionable error messages + +### Outside-In Thinking + +- User perspective first +- Implementation-agnostic tests +- Behavior-driven validation + +## Best Practices + +### 1. Start Simple + +Begin with basic smoke tests, then add complexity: + +```yaml +# Level 1: Smoke test +steps: + - action: launch + target: "./app" + - action: verify_output + contains: "Ready" + +# Level 2: Add interaction +steps: + - action: launch + target: "./app" + - action: send_input + value: "command\n" + - action: verify_output + contains: "Success" +``` + +### 2. Use Descriptive Names + +```yaml +# Good +scenario: + name: "User Login - Valid Credentials" + description: "Verifies successful login with email and password" + +# Bad +scenario: + name: "Test 1" +``` + +### 3. Verify Critical Paths Only + +Don't test every detail. Focus on user-facing behavior: + +```yaml +# Good - User-visible behavior +- action: verify_element + selector: ".welcome-message" + contains: "Welcome back" + +# Bad - Implementation detail +- action: verify_element + selector: ".cache-status" + contains: "initialized" +``` + +### 4. Always Wait for Dynamic Content + +```yaml +# Good - Wait before verification +- action: click + selector: ".load-data" +- action: wait_for_element + selector: ".data-table" + timeout: 10s +- action: verify_element + selector: ".data-table" + +# Bad - May fail due to timing +- action: click + selector: ".load-data" +- action: verify_element + selector: ".data-table" # Might not exist yet! +``` + +### 5. Clean Up After Tests + +```yaml +steps: + # Test steps... + +cleanup: + - action: delete_file + path: "./test-data.json" + - action: stop_application +``` + +## Troubleshooting + +### Installation Issues + +**Problem**: `@types/node-pty` not found error + +**Solution**: This was fixed in gadugi-agentic-test. If you see this: + +```bash +# Update to latest version +npm install -g github:rysweet/gadugi-agentic-test + +# Or if you cloned, pull latest: +git pull origin main +npm install +npm run build +``` + +**Problem**: `tsc: command not found` when building + +**Solution**: TypeScript not installed + +```bash +npm install # Installs all dependencies including TypeScript +npm run build # Now will work +``` + +### Test Times Out + +**Problem**: Test exceeds timeout and fails + +**Solution**: Increase timeout for slow operations + +```yaml +- action: wait_for_element + selector: ".slow-loading-data" + timeout: 30s # Generous timeout +``` + +### Scenario Format Issues + +**Problem**: "Scenario must have a name" error + +**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`: + +```yaml +# WRONG (won't load) +scenario: + name: "My Test" + steps: [...] + +# RIGHT +name: "My Test" +description: "What this tests" +version: "1.0.0" +config: + timeout: 120000 +steps: [...] +``` + +### Element Not Found + +**Problem**: Cannot find element to interact with + +**Solutions**: + +1. Use `wait_for_element` before interaction +2. Verify selector is correct +3. Check if element is in iframe + +```yaml +- action: wait_for_element + selector: ".target" + timeout: 10s +- action: click + selector: ".target" +``` + +### Flaky Tests in CI + +**Problem**: Tests pass locally but fail in CI + +**Solutions**: + +1. Add longer timeouts for CI environments +2. Set explicit viewport sizes +3. Wait for application readiness + +```yaml +scenario: + environment: + viewport: + width: 1920 + height: 1080 + + steps: + - action: wait_for_element + selector: ".app-ready" + timeout: 30s # Generous for CI +``` + +## Framework Version Check + +This skill embeds gadugi-agentic-test version **0.1.0**. + +To check for newer versions: + +```bash +python scripts/check-freshness.py +``` + +The script compares the embedded version against the latest GitHub release and notifies you of new features. + +## Related Skills + +- **test-gap-analyzer**: Find untested code paths (unit test focus) +- **philosophy-guardian**: Review test philosophy compliance +- **pr-review-assistant**: Include tests in PR reviews +- **module-spec-generator**: Generate specs with test scenarios + +## Resources + +### Documentation + +- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation +- **Examples**: `examples/` - 15 complete working examples +- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test + +### Getting Help + +- Review examples in `examples/` directory +- Check `SKILL.md` for detailed explanations +- See troubleshooting section in `SKILL.md` +- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues + +## Quick Reference + +### Basic Test Template + +```yaml +scenario: + name: "Test Name" + description: "What this verifies" + type: cli | tui | web | electron + + prerequisites: + - "Condition 1" + + steps: + - action: launch + target: "./app" + + - action: verify_output + contains: "Expected" + + cleanup: + - action: stop_application +``` + +### Common Actions + +**CLI**: + +- `launch` - Start application +- `send_input` - Send text +- `verify_output` - Check output +- `verify_exit_code` - Validate exit code + +**TUI**: + +- `send_keypress` - Send keys +- `verify_screen` - Check screen +- `capture_screenshot` - Save screenshot + +**Web**: + +- `navigate` - Go to URL +- `click` - Click element +- `type` - Type text +- `verify_element` - Check element + +**Electron**: + +- `window_action` - Control windows +- `menu_click` - Click menus +- `dialog_action` - Handle dialogs +- All web actions + +## Success Stories + +Outside-in testing shines when: + +1. **Refactoring**: Change implementation without updating tests +2. **Collaboration**: Non-developers can read and understand tests +3. **Documentation**: Tests serve as executable specifications +4. **Regression Prevention**: Catch breaking changes in critical flows +5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron + +Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen. + +--- + +**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation. + +## Real-World Example: Testing amplihack Guide Agent + +Based on actual testing of amplihack's guide agent, here's a complete working example: + +### Scenario: Naive Student Learning Flow + +```yaml +name: "Guide Agent - Beginner First Question" +description: "Test how guide responds to complete beginner" +version: "1.0.0" + +config: + timeout: 180000 # 3 minutes for AI response + retries: 1 + parallel: false + +agents: + - name: "student-cli" + type: "system" + config: + shell: "bash" + cwd: "/tmp/test-student" + timeout: 180000 + capture_output: true + +steps: + - name: "Student asks: What is amplihack?" + agent: "student-cli" + action: "execute_command" + params: + command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100' + expect: + exit_code: 0 + stdout_contains: + - "amplihack" + - "AI" + timeout: 180000 + + - name: "Verify guide gives immediate action" + agent: "student-cli" + action: "execute_command" + params: + command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md" + expect: + exit_code: 0 + timeout: 5000 + +metadata: + tags: ["guide-agent", "beginner", "real-world"] + priority: "high" +``` + +### What This Tests + +1. **Installation via uvx** - Tests users can run without installing +2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works +3. **Beginner-friendly response** - Checks for immediate actionable command +4. **Interactive elements** - Looks for TRY IT prompts + +### Running This Test + +```bash +cd gadugi-agentic-test +node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose +``` + +### What We Learned + +**From testing amplihack guide agent**: + +- Long-running AI commands need 180s+ timeouts +- Testing in clean `/tmp` directory avoids state pollution +- Combining `uvx --from git+...` with gadugi tests unreleased branches +- Checking file content (guide.md) verifies features beyond just output +- Real-world tests exposed gaps (guide showing bash commands in REPL context) diff --git a/.claude/skills/qa-team/SKILL.md b/.claude/skills/qa-team/SKILL.md new file mode 100644 index 000000000..f75aa884d --- /dev/null +++ b/.claude/skills/qa-team/SKILL.md @@ -0,0 +1,2100 @@ +--- +name: qa-team +description: | + QA team for outside-in validation, side-by-side parity loops, and A/B behavioral comparison. + Use when you need behavior-driven tests, legacy-vs-new comparison, or rollout shadow validation. + Creates executable scenarios and parity workflows that agents can observe, compare, and iterate on. + Supports local, observable tmux, remote SSH, and shadow-mode divergence logging patterns. +version: 1.1.0 +embedded_framework_version: 0.1.0 +github_repo: https://github.com/rysweet/gadugi-agentic-test +issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 +--- + +# QA Team Skill + +## Purpose [LEVEL 1] + +This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate. + +**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details. + +## When to Use This Skill [LEVEL 1] + +### Perfect For + +- **Smoke Tests**: Quick validation that critical user flows work +- **Behavior-Driven Testing**: Verify features from user perspective +- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron +- **Refactoring Safety**: Tests remain valid when implementation changes +- **AI-Powered Testing**: Let agents handle complex interactions +- **Documentation as Tests**: YAML scenarios double as executable specs + +### Use This Skill When + +- Starting a new project and defining expected behaviors +- Refactoring code and need tests that won't break with internal changes +- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps) +- Writing acceptance criteria that can be automatically verified +- Need tests that non-developers can read and understand +- Want to catch regressions in critical user workflows +- Testing complex multi-step interactions + +### Don't Use This Skill When + +- Need unit tests for internal functions (use test-gap-analyzer instead) +- Testing performance or load characteristics +- Need precise timing or concurrency control +- Testing non-interactive batch processes +- Implementation details matter more than behavior + +## Core Concepts [LEVEL 1] + +### Outside-In Testing Philosophy + +**Traditional Inside-Out Testing**: + +```python +# Tightly coupled to implementation +def test_calculator_add(): + calc = Calculator() + result = calc.add(2, 3) + assert result == 5 + assert calc.history == [(2, 3, 5)] # Knows internal state +``` + +**Agentic Outside-In Testing**: + +```yaml +# Implementation-agnostic behavior verification +scenario: + name: "Calculator Addition" + steps: + - action: launch + target: "./calculator" + - action: send_input + value: "add 2 3" + - action: verify_output + contains: "Result: 5" +``` + +**Benefits**: + +- Tests survive refactoring (internal changes don't break tests) +- Readable by non-developers (YAML is declarative) +- Platform-agnostic (same structure for CLI/TUI/Web/Electron) +- AI agents handle complexity (navigation, timing, screenshots) + +### The Gadugi Agentic Test Framework [LEVEL 2] + +Gadugi-agentic-test is a Python framework that: + +1. **Parses YAML test scenarios** with declarative steps +2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents) +3. **Executes actions** (launch, input, click, wait, verify) +4. **Collects evidence** (screenshots, logs, output captures) +5. **Validates outcomes** against expected results +6. **Generates reports** with evidence trails + +**Architecture**: + +``` +YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine + ↓ + [CLI Agent, TUI Agent, Web Agent, Electron Agent] + ↓ + Observers → Comprehension Agent + ↓ + Evidence Report +``` + +### Progressive Disclosure Levels [LEVEL 1] + +This skill teaches testing in four levels: + +- **Level 1: Fundamentals** - Basic single-action tests, simple verification +- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling +- **Level 3: Advanced** - Custom agents, visual regression, performance validation +- **Level 4: Parity & Shadowing** - Side-by-side A/B comparison, remote observable runs, rollout divergence logging + +Each example is marked with its level. Start at Level 1 and progress as needed. + +## Side-by-Side Parity and A/B Validation [LEVEL 2] + +QA Team is the renamed primary skill for what used to be `outside-in-testing`. Use it for standard outside-in scenarios **and** for parity loops where you must compare a legacy implementation to a replacement, or compare approach A to approach B, as an external user would observe them. + +### Use QA Team for parity work when + +- migrating Python to Rust, old CLI to new CLI, or v1 to v2 behavior +- validating a rewrite before switching defaults +- comparing branch A vs branch B using the same user scenarios +- running observable side-by-side sessions in paired virtual TTYs +- logging rollout divergences in shadow mode without failing the run + +### Recommended parity loop + +1. Define shared user-facing scenarios first. +2. Run both implementations in isolated sandboxes. +3. Compare stdout, stderr, exit code, JSON outputs, and filesystem side effects. +4. Re-run in `--observable` mode when you need paired tmux panes for debugging. +5. Use `--ssh-target ` when parity must happen on a remote environment such as `azlin`. +6. Use `--shadow-mode --shadow-log ` during rollout to log divergences without blocking execution. + +### Command pattern to reuse + +If the repo already has a parity harness, extend it instead of inventing a second one. A good baseline is: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary \ + --observable +``` + +For remote parity: + +```bash +python tests/parity/validate_cli_parity.py \ + --ssh-target azlin \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /remote/path/to/legacy-repo \ + --rust-binary /remote/path/to/new-binary +``` + +For rollout shadow logging: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary \ + --shadow-mode \ + --shadow-log /tmp/feature-shadow.jsonl +``` + +## Quick Start [LEVEL 1] + +### Installation + +**Prerequisites (for native module compilation):** + +```bash +# macOS +xcode-select --install + +# Ubuntu/Debian +sudo apt-get install -y build-essential python3 + +# Windows: Install Visual Studio Build Tools with "Desktop development with C++" +``` + +**Install the framework:** + +```bash +# Install globally for CLI access +npm install -g @gadugi/agentic-test + +# Or install locally in your project +npm install @gadugi/agentic-test + +# Verify installation +gadugi-test --version +``` + +### Your First Test (CLI Example) + +Create `test-hello.yaml`: + +```yaml +scenario: + name: "Hello World CLI Test" + description: "Verify CLI prints greeting" + type: cli + + prerequisites: + - "./hello-world executable exists" + + steps: + - action: launch + target: "./hello-world" + + - action: verify_output + contains: "Hello, World!" + + - action: verify_exit_code + expected: 0 +``` + +Run the test: + +```bash +gadugi-test run test-hello.yaml +``` + +Output: + +``` +✓ Scenario: Hello World CLI Test + ✓ Step 1: Launched ./hello-world + ✓ Step 2: Output contains "Hello, World!" + ✓ Step 3: Exit code is 0 + +PASSED (3/3 steps successful) +Evidence saved to: ./evidence/test-hello-20250116-093045/ +``` + +### Understanding the YAML Structure [LEVEL 1] + +Every test scenario has this structure: + +```yaml +scenario: + name: "Descriptive test name" + description: "What this test verifies" + type: cli | tui | web | electron + + # Optional metadata + tags: [smoke, critical, auth] + timeout: 30s + + # What must be true before test runs + prerequisites: + - "Condition 1" + - "Condition 2" + + # The test steps (executed sequentially) + steps: + - action: action_name + parameter1: value1 + parameter2: value2 + + - action: verify_something + expected: value + + # Optional cleanup + cleanup: + - action: stop_application +``` + +## Application Types and Agents [LEVEL 2] + +### CLI Applications [LEVEL 1] + +**Use Case**: Command-line tools, scripts, build tools, package managers + +**Supported Actions**: + +- `launch` - Start the CLI program +- `send_input` - Send text or commands via stdin +- `send_signal` - Send OS signals (SIGINT, SIGTERM) +- `wait_for_output` - Wait for specific text in stdout/stderr +- `verify_output` - Check stdout/stderr contains/matches expected text +- `verify_exit_code` - Validate process exit code +- `capture_output` - Save output for later verification + +**Example** (see `examples/cli/calculator-basic.yaml`): + +```yaml +scenario: + name: "CLI Calculator Basic Operations" + type: cli + + steps: + - action: launch + target: "./calculator" + args: ["--mode", "interactive"] + + - action: send_input + value: "add 5 3\n" + + - action: verify_output + contains: "Result: 8" + timeout: 2s + + - action: send_input + value: "multiply 4 7\n" + + - action: verify_output + contains: "Result: 28" + + - action: send_input + value: "exit\n" + + - action: verify_exit_code + expected: 0 +``` + +### TUI Applications [LEVEL 1] + +**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs) + +**Supported Actions**: + +- `launch` - Start TUI application +- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.) +- `wait_for_screen` - Wait for specific text to appear on screen +- `verify_screen` - Check screen contents match expectations +- `capture_screenshot` - Save terminal screenshot (ANSI art) +- `navigate_menu` - Navigate menu structures +- `fill_form` - Fill TUI form fields + +**Example** (see `examples/tui/file-manager-navigation.yaml`): + +```yaml +scenario: + name: "TUI File Manager Navigation" + type: tui + + steps: + - action: launch + target: "./file-manager" + + - action: wait_for_screen + contains: "File Manager v1.0" + timeout: 3s + + - action: send_keypress + value: "down" + times: 3 + + - action: verify_screen + contains: "> documents/" + description: "Third item should be selected" + + - action: send_keypress + value: "enter" + + - action: wait_for_screen + contains: "documents/" + timeout: 2s + + - action: capture_screenshot + save_as: "documents-view.txt" +``` + +### Web Applications [LEVEL 1] + +**Use Case**: Web apps, dashboards, SPAs, admin panels + +**Supported Actions**: + +- `navigate` - Go to URL +- `click` - Click element by selector or text +- `type` - Type into input fields +- `wait_for_element` - Wait for element to appear +- `verify_element` - Check element exists/contains text +- `verify_url` - Validate current URL +- `screenshot` - Capture browser screenshot +- `scroll` - Scroll page or element + +**Example** (see `examples/web/dashboard-smoke-test.yaml`): + +```yaml +scenario: + name: "Dashboard Smoke Test" + type: web + + steps: + - action: navigate + url: "http://localhost:3000/dashboard" + + - action: wait_for_element + selector: "h1.dashboard-title" + timeout: 5s + + - action: verify_element + selector: "h1.dashboard-title" + contains: "Analytics Dashboard" + + - action: verify_element + selector: ".widget-stats" + count: 4 + description: "Should have 4 stat widgets" + + - action: click + selector: "button.refresh-data" + + - action: wait_for_element + selector: ".loading-spinner" + disappears: true + timeout: 10s + + - action: screenshot + save_as: "dashboard-loaded.png" +``` + +### Electron Applications [LEVEL 2] + +**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones) + +**Supported Actions**: + +- `launch` - Start Electron app +- `window_action` - Interact with windows (focus, minimize, close) +- `menu_click` - Click application menu items +- `dialog_action` - Handle native dialogs (open file, save, confirm) +- `ipc_send` - Send IPC message to main process +- `verify_window` - Check window state/properties +- All web actions (since Electron uses Chromium) + +**Example** (see `examples/electron/single-window-basic.yaml`): + +```yaml +scenario: + name: "Electron Single Window Test" + type: electron + + steps: + - action: launch + target: "./dist/my-app" + wait_for_window: true + timeout: 10s + + - action: verify_window + title: "My Application" + visible: true + + - action: menu_click + path: ["File", "New Document"] + + - action: wait_for_element + selector: ".document-editor" + + - action: type + selector: ".document-editor" + value: "Hello from test" + + - action: menu_click + path: ["File", "Save"] + + - action: dialog_action + type: save_file + filename: "test-document.txt" + + - action: verify_window + title_contains: "test-document.txt" +``` + +## Test Scenario Anatomy [LEVEL 2] + +### Metadata Section + +```yaml +scenario: + name: "Clear descriptive name" + description: "Detailed explanation of what this test verifies" + type: cli | tui | web | electron + + # Optional fields + tags: [smoke, regression, auth, payment] + priority: high | medium | low + timeout: 60s # Overall scenario timeout + retry_on_failure: 2 # Retry count + + # Environment requirements + environment: + variables: + API_URL: "http://localhost:8080" + DEBUG: "true" + files: + - "./config.json must exist" +``` + +### Prerequisites + +Prerequisites are conditions that must be true before the test runs. The framework validates these before execution. + +```yaml +prerequisites: + - "./application binary exists" + - "Port 8080 is available" + - "Database is running" + - "User account test@example.com exists" + - "File ./test-data.json exists" +``` + +If prerequisites fail, the test is skipped (not failed). + +### Steps + +Steps execute sequentially. Each step has: + +- **action**: Required - the action to perform +- **Parameters**: Action-specific parameters +- **description**: Optional - human-readable explanation +- **timeout**: Optional - step-specific timeout +- **continue_on_failure**: Optional - don't fail scenario if step fails + +```yaml +steps: + # Simple action + - action: launch + target: "./app" + + # Action with multiple parameters + - action: verify_output + contains: "Success" + timeout: 5s + description: "App should print success message" + + # Continue even if this fails + - action: click + selector: ".optional-button" + continue_on_failure: true +``` + +### Verification Actions [LEVEL 1] + +Verification actions check expected outcomes. They fail the test if expectations aren't met. + +**Common Verifications**: + +```yaml +# CLI: Check output contains text +- action: verify_output + contains: "Expected text" + +# CLI: Check output matches regex +- action: verify_output + matches: "Result: \\d+" + +# CLI: Check exit code +- action: verify_exit_code + expected: 0 + +# Web/TUI: Check element exists +- action: verify_element + selector: ".success-message" + +# Web/TUI: Check element contains text +- action: verify_element + selector: "h1" + contains: "Welcome" + +# Web: Check URL +- action: verify_url + equals: "http://localhost:3000/dashboard" + +# Web: Check element count +- action: verify_element + selector: ".list-item" + count: 5 + +# Electron: Check window state +- action: verify_window + title: "My App" + visible: true + focused: true +``` + +### Cleanup Section + +Cleanup runs after all steps complete (success or failure). Use for teardown actions. + +```yaml +cleanup: + - action: stop_application + force: true + + - action: delete_file + path: "./temp-test-data.json" + + - action: reset_database + connection: "test_db" +``` + +## Advanced Patterns [LEVEL 2] + +### Conditional Logic + +Execute steps based on conditions: + +```yaml +steps: + - action: launch + target: "./app" + + - action: verify_output + contains: "Login required" + id: login_check + + # Only run if login_check passed + - action: send_input + value: "login admin password123\n" + condition: login_check.passed +``` + +### Variables and Templating [LEVEL 2] + +Define variables and use them throughout the scenario: + +```yaml +scenario: + name: "Test with Variables" + type: cli + + variables: + username: "testuser" + api_url: "http://localhost:8080" + + steps: + - action: launch + target: "./app" + args: ["--api", "${api_url}"] + + - action: send_input + value: "login ${username}\n" + + - action: verify_output + contains: "Welcome, ${username}!" +``` + +### Loops and Repetition [LEVEL 2] + +Repeat actions multiple times: + +```yaml +steps: + - action: launch + target: "./app" + + # Repeat action N times + - action: send_keypress + value: "down" + times: 5 + + # Loop over list + - action: send_input + value: "${item}\n" + for_each: + - "apple" + - "banana" + - "cherry" +``` + +### Error Handling [LEVEL 2] + +Handle expected errors gracefully: + +```yaml +steps: + - action: send_input + value: "invalid command\n" + + # Verify error message appears + - action: verify_output + contains: "Error: Unknown command" + expected_failure: true + + # App should still be running + - action: verify_running + expected: true +``` + +### Multi-Step Workflows [LEVEL 2] + +Complex scenarios with multiple phases: + +```yaml +scenario: + name: "E-commerce Purchase Flow" + type: web + + steps: + # Phase 1: Authentication + - action: navigate + url: "http://localhost:3000/login" + + - action: type + selector: "#username" + value: "test@example.com" + + - action: type + selector: "#password" + value: "password123" + + - action: click + selector: "button[type=submit]" + + - action: wait_for_url + contains: "/dashboard" + + # Phase 2: Product Selection + - action: navigate + url: "http://localhost:3000/products" + + - action: click + text: "Add to Cart" + nth: 1 + + - action: verify_element + selector: ".cart-badge" + contains: "1" + + # Phase 3: Checkout + - action: click + selector: ".cart-icon" + + - action: click + text: "Proceed to Checkout" + + - action: fill_form + fields: + "#shipping-address": "123 Test St" + "#city": "Testville" + "#zip": "12345" + + - action: click + selector: "#place-order" + + - action: wait_for_element + selector: ".order-confirmation" + timeout: 10s + + - action: verify_element + selector: ".order-number" + exists: true +``` + +## Level 3: Advanced Topics [LEVEL 3] + +### Custom Comprehension Agents + +The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic. + +**Default Comprehension Agent**: + +- Observes raw output (text, HTML, screenshots) +- Applies general reasoning to verify expectations +- Returns pass/fail with explanation + +**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`): + +```yaml +scenario: + name: "Financial Dashboard Test with Custom Agent" + type: web + + # Define custom comprehension logic + comprehension_agent: + model: "gpt-4" + system_prompt: | + You are a financial data validator. When verifying dashboard content: + 1. All monetary values must use proper formatting ($1,234.56) + 2. Percentages must include % symbol + 3. Dates must be in MM/DD/YYYY format + 4. Negative values must be red + 5. Chart data must be logically consistent + + Be strict about formatting and data consistency. + + examples: + - input: "Total Revenue: 45000" + output: "FAIL - Missing currency symbol and comma separator" + - input: "Total Revenue: $45,000.00" + output: "PASS - Correctly formatted" + + steps: + - action: navigate + url: "http://localhost:3000/financial-dashboard" + + - action: verify_element + selector: ".revenue-widget" + use_custom_comprehension: true + description: "Revenue should be properly formatted" +``` + +### Visual Regression Testing [LEVEL 3] + +Compare screenshots against baseline images: + +```yaml +scenario: + name: "Visual Regression - Homepage" + type: web + + steps: + - action: navigate + url: "http://localhost:3000" + + - action: wait_for_element + selector: ".page-loaded" + + - action: screenshot + save_as: "homepage.png" + + - action: visual_compare + screenshot: "homepage.png" + baseline: "./baselines/homepage-baseline.png" + threshold: 0.05 # 5% difference allowed + highlight_differences: true +``` + +### Performance Validation [LEVEL 3] + +Measure and validate performance metrics: + +```yaml +scenario: + name: "Performance - Dashboard Load Time" + type: web + + performance: + metrics: + - page_load_time + - first_contentful_paint + - time_to_interactive + + steps: + - action: navigate + url: "http://localhost:3000/dashboard" + measure_timing: true + + - action: verify_performance + metric: page_load_time + less_than: 3000 # 3 seconds + + - action: verify_performance + metric: first_contentful_paint + less_than: 1500 # 1.5 seconds +``` + +### Multi-Window Coordination (Electron) [LEVEL 3] + +Test applications with multiple windows: + +```yaml +scenario: + name: "Multi-Window Chat Application" + type: electron + + steps: + - action: launch + target: "./chat-app" + + - action: menu_click + path: ["Window", "New Chat"] + + - action: verify_window + count: 2 + + - action: window_action + window: 1 + action: focus + + - action: type + selector: ".message-input" + value: "Hello from window 1" + + - action: click + selector: ".send-button" + + - action: window_action + window: 2 + action: focus + + - action: wait_for_element + selector: ".message" + contains: "Hello from window 1" + timeout: 5s +``` + +### IPC Testing (Electron) [LEVEL 3] + +Test Inter-Process Communication between renderer and main: + +```yaml +scenario: + name: "Electron IPC Communication" + type: electron + + steps: + - action: launch + target: "./my-app" + + - action: ipc_send + channel: "get-system-info" + + - action: ipc_expect + channel: "system-info-reply" + timeout: 3s + + - action: verify_ipc_payload + contains: + platform: "darwin" + arch: "x64" +``` + +### Custom Reporters [LEVEL 3] + +Generate custom test reports: + +```yaml +scenario: + name: "Test with Custom Reporting" + type: cli + + reporting: + format: custom + template: "./report-template.html" + include: + - screenshots + - logs + - timing_data + - video_recording + + email: + enabled: true + recipients: ["team@example.com"] + on_failure_only: true + + steps: + # ... test steps ... +``` + +## Framework Integration [LEVEL 2] + +### Running Tests + +**Single test**: + +```bash +gadugi-test run test-scenario.yaml +``` + +**Multiple tests**: + +```bash +gadugi-test run tests/*.yaml +``` + +**With options**: + +```bash +gadugi-test run test.yaml \ + --verbose \ + --evidence-dir ./test-evidence \ + --retry 2 \ + --timeout 60s +``` + +### CI/CD Integration + +**GitHub Actions** (`.github/workflows/agentic-tests.yml`): + +```yaml +name: Agentic Tests + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install gadugi-agentic-test + run: npm install -g @gadugi/agentic-test + + - name: Run tests + run: gadugi-test run tests/agentic/*.yaml + + - name: Upload evidence + if: always() + uses: actions/upload-artifact@v3 + with: + name: test-evidence + path: ./evidence/ +``` + +### Evidence Collection + +The framework automatically collects evidence for debugging: + +``` +evidence/ + scenario-name-20250116-093045/ + ├── scenario.yaml # Original test scenario + ├── execution-log.json # Detailed execution log + ├── screenshots/ # All captured screenshots + │ ├── step-1.png + │ ├── step-3.png + │ └── step-5.png + ├── output-captures/ # CLI/TUI output + │ ├── stdout.txt + │ └── stderr.txt + ├── timing.json # Performance metrics + └── report.html # Human-readable report +``` + +## Best Practices [LEVEL 2] + +### 1. Start Simple, Add Complexity + +Begin with basic smoke tests, then add detail: + +```yaml +# Level 1: Basic smoke test +steps: + - action: launch + target: "./app" + - action: verify_output + contains: "Ready" + +# Level 2: Add interaction +steps: + - action: launch + target: "./app" + - action: send_input + value: "command\n" + - action: verify_output + contains: "Success" + +# Level 3: Add error handling and edge cases +steps: + - action: launch + target: "./app" + - action: send_input + value: "invalid\n" + - action: verify_output + contains: "Error" + - action: send_input + value: "command\n" + - action: verify_output + contains: "Success" +``` + +### 2. Use Descriptive Names and Descriptions + +```yaml +# Bad +scenario: + name: "Test 1" + steps: + - action: click + selector: "button" + +# Good +scenario: + name: "User Login Flow - Valid Credentials" + description: "Verifies user can log in with valid email and password" + steps: + - action: click + selector: "button[type=submit]" + description: "Submit login form" +``` + +### 3. Verify Critical Paths Only + +Don't test every tiny detail. Focus on user-facing behavior: + +```yaml +# Bad - Tests implementation details +- action: verify_element + selector: ".internal-cache-status" + contains: "initialized" + +# Good - Tests user-visible behavior +- action: verify_element + selector: ".welcome-message" + contains: "Welcome back" +``` + +### 4. Use Prerequisites for Test Dependencies + +```yaml +scenario: + name: "User Profile Edit" + + prerequisites: + - "User testuser@example.com exists" + - "User is logged in" + - "Database is seeded with test data" + + steps: + # Test assumes prerequisites are met + - action: navigate + url: "/profile" +``` + +### 5. Keep Tests Independent + +Each test should set up its own state and clean up: + +```yaml +scenario: + name: "Create Document" + + steps: + # Create test user (don't assume exists) + - action: api_call + endpoint: "/api/users" + method: POST + data: { email: "test@example.com" } + + # Run test + - action: navigate + url: "/documents/new" + # ... test steps ... + + cleanup: + # Remove test user + - action: api_call + endpoint: "/api/users/test@example.com" + method: DELETE +``` + +### 6. Use Tags for Organization + +```yaml +scenario: + name: "Critical Payment Flow" + tags: [smoke, critical, payment, e2e] + # Run with: gadugi-test run --tags critical +``` + +### 7. Add Timeouts Strategically + +```yaml +steps: + # Quick operations - short timeout + - action: click + selector: "button" + timeout: 2s + + # Network operations - longer timeout + - action: wait_for_element + selector: ".data-loaded" + timeout: 10s + + # Complex operations - generous timeout + - action: verify_element + selector: ".report-generated" + timeout: 60s +``` + +## Testing Strategies [LEVEL 2] + +### Smoke Tests + +Minimal tests that verify critical functionality works: + +```yaml +scenario: + name: "Smoke Test - Application Starts" + tags: [smoke] + + steps: + - action: launch + target: "./app" + - action: verify_output + contains: "Ready" + timeout: 5s +``` + +Run before every commit: `gadugi-test run --tags smoke` + +### Happy Path Tests + +Test the ideal user journey: + +```yaml +scenario: + name: "Happy Path - User Registration" + + steps: + - action: navigate + url: "/register" + - action: type + selector: "#email" + value: "newuser@example.com" + - action: type + selector: "#password" + value: "SecurePass123!" + - action: click + selector: "button[type=submit]" + - action: wait_for_url + contains: "/welcome" +``` + +### Error Path Tests + +Verify error handling: + +```yaml +scenario: + name: "Error Path - Invalid Login" + + steps: + - action: navigate + url: "/login" + - action: type + selector: "#email" + value: "invalid@example.com" + - action: type + selector: "#password" + value: "wrongpassword" + - action: click + selector: "button[type=submit]" + - action: verify_element + selector: ".error-message" + contains: "Invalid credentials" +``` + +### Regression Tests + +Prevent bugs from reappearing: + +```yaml +scenario: + name: "Regression - Issue #123 Password Reset" + tags: [regression, bug-123] + description: "Verifies password reset email is sent (was broken in v1.2)" + + steps: + - action: navigate + url: "/forgot-password" + - action: type + selector: "#email" + value: "user@example.com" + - action: click + selector: "button[type=submit]" + - action: verify_element + selector: ".success-message" + contains: "Reset email sent" +``` + +## Philosophy Alignment [LEVEL 2] + +This skill follows amplihack's core principles: + +### Ruthless Simplicity + +- **YAML over code**: Declarative tests are simpler than programmatic tests +- **No implementation details**: Tests describe WHAT, not HOW +- **Minimal boilerplate**: Each test is focused and concise + +### Modular Design (Bricks & Studs) + +- **Self-contained scenarios**: Each YAML file is independent +- **Clear contracts**: Steps have well-defined inputs/outputs +- **Composable actions**: Reuse actions across different test types + +### Zero-BS Implementation + +- **No stubs**: Every example in this skill is a complete, runnable test +- **Working defaults**: Tests run with minimal configuration +- **Clear errors**: Framework provides actionable error messages + +### Outside-In Thinking + +- **User perspective**: Tests verify behavior users care about +- **Implementation agnostic**: Refactoring doesn't break tests +- **Behavior-driven**: Focus on outcomes, not internals + +## Common Pitfalls and Solutions [LEVEL 2] + +### Pitfall 1: Over-Specifying + +**Problem**: Test breaks when UI changes slightly + +```yaml +# Bad - Too specific +- action: verify_element + selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold" + contains: "Welcome" +``` + +**Solution**: Use flexible selectors + +```yaml +# Good - Focused on behavior +- action: verify_element + selector: ".welcome-message" + contains: "Welcome" +``` + +### Pitfall 2: Missing Waits + +**Problem**: Test fails intermittently due to timing + +```yaml +# Bad - No wait for async operation +- action: click + selector: ".load-data-button" +- action: verify_element + selector: ".data-table" # May not exist yet! +``` + +**Solution**: Always wait for dynamic content + +```yaml +# Good - Wait for element to appear +- action: click + selector: ".load-data-button" +- action: wait_for_element + selector: ".data-table" + timeout: 10s +- action: verify_element + selector: ".data-table" +``` + +### Pitfall 3: Testing Implementation Details + +**Problem**: Test coupled to internal state + +```yaml +# Bad - Tests internal cache state +- action: verify_output + contains: "Cache hit ratio: 85%" +``` + +**Solution**: Test user-visible behavior + +```yaml +# Good - Tests response time +- action: verify_response_time + less_than: 100ms + description: "Fast response indicates caching works" +``` + +### Pitfall 4: Flaky Assertions + +**Problem**: Assertions depend on exact timing or formatting + +```yaml +# Bad - Exact timestamp match will fail +- action: verify_output + contains: "Created at: 2025-11-16 09:30:45" +``` + +**Solution**: Use flexible patterns + +```yaml +# Good - Match pattern, not exact value +- action: verify_output + matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}" +``` + +### Pitfall 5: Not Cleaning Up + +**Problem**: Tests leave artifacts that affect future runs + +```yaml +# Bad - No cleanup +steps: + - action: create_file + path: "./test-data.json" + - action: launch + target: "./app" +``` + +**Solution**: Always use cleanup section + +```yaml +# Good - Cleanup ensures clean slate +steps: + - action: create_file + path: "./test-data.json" + - action: launch + target: "./app" + +cleanup: + - action: delete_file + path: "./test-data.json" +``` + +## Example Library [LEVEL 1] + +This skill includes 15 complete working examples organized by application type and complexity level: + +### CLI Examples + +1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations +2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery +3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI + +### TUI Examples + +4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation +5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation +6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing + +### Web Examples + +7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification +8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow +9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing + +### Electron Examples + +10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test +11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration +12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions +13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing + +### Custom Agent Examples + +14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic +15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting + +See `examples/` directory for full example code with inline documentation. + +## Framework Freshness Check [LEVEL 3] + +This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists: + +```bash +# Run the freshness check script +python scripts/check-freshness.py + +# Output if outdated: +# WARNING: Embedded framework version is 0.1.0 +# Latest GitHub version is 0.2.5 +# +# New features in 0.2.5: +# - Native Playwright support for web testing +# - Video recording for all test types +# - Parallel test execution +# +# Update with: npm update -g @gadugi/agentic-test +``` + +The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements. + +**When to Update This Skill**: + +- New framework version adds significant features +- Breaking changes in YAML schema +- New application types supported +- Agent capabilities expand + +## Integration with Other Skills [LEVEL 2] + +### Works Well With + +**test-gap-analyzer**: + +- Use test-gap-analyzer to find untested functions +- Write outside-in tests for critical user-facing paths +- Use unit tests (from test-gap-analyzer) for internal functions + +**philosophy-guardian**: + +- Ensure test YAML follows ruthless simplicity +- Verify tests focus on behavior, not implementation + +**pr-review-assistant**: + +- Include outside-in tests in PR reviews +- Verify tests cover changed functionality +- Check test readability and clarity + +**module-spec-generator**: + +- Generate module specs that include outside-in test scenarios +- Use specs as templates for test YAML + +### Example Combined Workflow + +```bash +# 1. Analyze coverage gaps +claude "Use test-gap-analyzer on ./src" + +# 2. Write outside-in tests for critical paths +claude "Use qa-team to create web tests for authentication" + +# 3. Verify philosophy compliance +claude "Use philosophy-guardian to review new test files" + +# 4. Include in PR +git add tests/agentic/ +git commit -m "Add outside-in tests for auth flow" +``` + +## Troubleshooting [LEVEL 2] + +### Test Times Out + +**Symptom**: Test exceeds timeout and fails + +**Causes**: + +- Application takes longer to start than expected +- Network requests are slow +- Element never appears (incorrect selector) + +**Solutions**: + +```yaml +# Increase timeout +- action: wait_for_element + selector: ".slow-loading-element" + timeout: 30s # Increase from default + +# Add intermediate verification +- action: launch + target: "./app" +- action: wait_for_output + contains: "Initializing..." + timeout: 5s +- action: wait_for_output + contains: "Ready" + timeout: 20s +``` + +### Element Not Found + +**Symptom**: `verify_element` or `click` fails with "element not found" + +**Causes**: + +- Incorrect CSS selector +- Element not yet rendered (timing issue) +- Element in iframe or shadow DOM + +**Solutions**: + +```yaml +# Add wait before interaction +- action: wait_for_element + selector: ".target-element" + timeout: 10s +- action: click + selector: ".target-element" + +# Use more specific selector +- action: click + selector: "button[data-testid='submit-button']" + +# Handle iframe +- action: switch_to_iframe + selector: "iframe#payment-frame" +- action: click + selector: ".pay-now-button" +``` + +### Test Passes Locally, Fails in CI + +**Symptom**: Test works on dev machine but fails in CI environment + +**Causes**: + +- Different screen size (web/Electron) +- Missing dependencies +- Timing differences (slower CI machines) +- Environment variable differences + +**Solutions**: + +```yaml +# Set explicit viewport size (web/Electron) +scenario: + environment: + viewport: + width: 1920 + height: 1080 + +# Add longer timeouts in CI +- action: wait_for_element + selector: ".element" + timeout: 30s # Generous for CI + +# Verify prerequisites +prerequisites: + - "Chrome browser installed" + - "Environment variable API_KEY is set" +``` + +### Output Doesn't Match Expected + +**Symptom**: `verify_output` fails even though output looks correct + +**Causes**: + +- Extra whitespace or newlines +- ANSI color codes in output +- Case sensitivity + +**Solutions**: + +```yaml +# Use flexible matching +- action: verify_output + matches: "Result:\\s+Success" # Allow flexible whitespace + +# Strip ANSI codes +- action: verify_output + contains: "Success" + strip_ansi: true + +# Case-insensitive match +- action: verify_output + contains: "success" + case_sensitive: false +``` + +## Reference: Action Catalog [LEVEL 3] + +### CLI Actions + +| Action | Parameters | Description | +| ------------------ | -------------------------------- | -------------------------------------- | +| `launch` | `target`, `args`, `cwd`, `env` | Start CLI application | +| `send_input` | `value`, `delay` | Send text to stdin | +| `send_signal` | `signal` | Send OS signal (SIGINT, SIGTERM, etc.) | +| `wait_for_output` | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr | +| `verify_output` | `contains`, `matches`, `stream` | Check output content | +| `verify_exit_code` | `expected` | Validate exit code | +| `capture_output` | `save_as`, `stream` | Save output to file | + +### TUI Actions + +| Action | Parameters | Description | +| -------------------- | --------------------------------- | ------------------------ | +| `launch` | `target`, `args`, `terminal_size` | Start TUI application | +| `send_keypress` | `value`, `times`, `modifiers` | Send keyboard input | +| `wait_for_screen` | `contains`, `timeout` | Wait for text on screen | +| `verify_screen` | `contains`, `matches`, `region` | Check screen content | +| `capture_screenshot` | `save_as` | Save terminal screenshot | +| `navigate_menu` | `path` | Navigate menu structure | +| `fill_form` | `fields` | Fill TUI form fields | + +### Web Actions + +| Action | Parameters | Description | +| ------------------ | ----------------------------------------- | ---------------------- | +| `navigate` | `url`, `wait_for_load` | Go to URL | +| `click` | `selector`, `text`, `nth` | Click element | +| `type` | `selector`, `value`, `delay` | Type into input | +| `wait_for_element` | `selector`, `timeout`, `disappears` | Wait for element | +| `verify_element` | `selector`, `contains`, `count`, `exists` | Check element state | +| `verify_url` | `equals`, `contains`, `matches` | Validate URL | +| `screenshot` | `save_as`, `selector`, `full_page` | Capture screenshot | +| `scroll` | `selector`, `direction`, `amount` | Scroll page/element | +| `select_option` | `selector`, `value` | Select dropdown option | +| `checkbox` | `selector`, `checked` | Check/uncheck checkbox | + +### Electron Actions + +| Action | Parameters | Description | +| --------------- | -------------------------------------- | -------------------------- | +| `launch` | `target`, `args`, `wait_for_window` | Start Electron app | +| `window_action` | `window`, `action` | Interact with windows | +| `menu_click` | `path` | Click menu items | +| `dialog_action` | `type`, `action`, `filename` | Handle dialogs | +| `ipc_send` | `channel`, `data` | Send IPC message | +| `ipc_expect` | `channel`, `timeout` | Wait for IPC message | +| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state | +| All web actions | | Electron includes Chromium | + +### Common Parameters + +| Parameter | Type | Description | +| --------------------- | ---------- | ------------------------------------ | +| `timeout` | Duration | Maximum wait time (e.g., "5s", "2m") | +| `description` | String | Human-readable step explanation | +| `continue_on_failure` | Boolean | Don't fail scenario if step fails | +| `id` | String | Step identifier for conditionals | +| `condition` | Expression | Execute step only if condition true | + +## Quick Reference: YAML Template [LEVEL 1] + +```yaml +scenario: + # Required fields + name: "Test Name" + description: "What this test verifies" + type: cli | tui | web | electron + + # Optional metadata + tags: [smoke, critical] + timeout: 60s + + # What must be true before running + prerequisites: + - "Condition 1" + - "Condition 2" + + # Environment setup + environment: + variables: + VAR_NAME: "value" + + # Variables for templating + variables: + username: "testuser" + + # Test steps (executed in order) + steps: + - action: launch + target: "./app" + + - action: send_input + value: "command\n" + + - action: verify_output + contains: "Success" + timeout: 5s + + # Cleanup (always runs) + cleanup: + - action: stop_application +``` + +## Skill Usage Examples [LEVEL 1] + +### Invoke This Skill + +``` +Claude, use the qa-team skill to create a CLI test for my calculator app. + +Claude, use qa-team to generate web tests for the login flow. + +Claude, create TUI tests using the qa-team skill for file navigation. +``` + +### What You'll Get + +1. **Complete YAML test scenario** matching your requirements +2. **Inline documentation** explaining each section +3. **Best practices applied** (timeouts, waits, verification) +4. **Appropriate complexity level** (1, 2, or 3 based on needs) +5. **Instructions for running** the test + +### Example Request and Response + +**User**: "Create a web test for user login with valid credentials" + +**Claude** (using this skill): + +```yaml +scenario: + name: "User Login - Valid Credentials" + description: "Verifies user can successfully log in with valid email and password" + type: web + tags: [smoke, auth, critical] + + prerequisites: + - "User test@example.com exists with password 'TestPass123!'" + - "Application is running on http://localhost:3000" + + steps: + # Navigate to login page + - action: navigate + url: "http://localhost:3000/login" + description: "Open login page" + + # Wait for page to fully load + - action: wait_for_element + selector: "form.login-form" + timeout: 5s + description: "Wait for login form to render" + + # Fill in email field + - action: type + selector: "input[name='email']" + value: "test@example.com" + description: "Enter email address" + + # Fill in password field + - action: type + selector: "input[name='password']" + value: "TestPass123!" + description: "Enter password" + + # Submit form + - action: click + selector: "button[type='submit']" + description: "Click login button" + + # Wait for redirect to dashboard + - action: wait_for_url + contains: "/dashboard" + timeout: 10s + description: "Should redirect to dashboard after successful login" + + # Verify user is logged in + - action: verify_element + selector: ".user-profile" + contains: "test@example.com" + description: "User email should appear in profile widget" + + # Capture evidence + - action: screenshot + save_as: "successful-login.png" + description: "Save screenshot of logged-in state" +# To run this test: +# 1. Save as test-login-valid.yaml +# 2. Run: gadugi-test run test-login-valid.yaml +# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/ +``` + +## Related Resources [LEVEL 1] + +### Official Documentation + +- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test +- **Framework Docs**: See repo README and docs/ folder +- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 + +## Level 4: Shadow Environment Integration [LEVEL 4] + +Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments. + +### Why Use Shadow Environments for Testing + +1. **Clean State**: Fresh container, no host pollution +2. **Local Changes**: Test uncommitted code exactly as-is +3. **Multi-Repo**: Coordinate changes across multiple repos +4. **CI Parity**: What shadow sees ≈ what CI will see + +### Shadow Testing Workflow + +For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests: + +#### Pattern 1: CLI Tests in Shadow (Amplifier) + +```python +# Create shadow with your local library changes +shadow.create(local_sources=["~/repos/my-lib:org/my-lib"]) + +# Run outside-in test scenarios inside shadow +shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml") + +# Extract evidence +shadow.extract(shadow_id, "/evidence", "./test-evidence") + +# Cleanup +shadow.destroy(shadow_id) +``` + +#### Pattern 2: CLI Tests in Shadow (Standalone) + +```bash +# Create shadow with local changes +amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test + +# Run your test scenarios +amplifier-shadow exec test "gadugi-test run test-scenario.yaml" + +# Extract results +amplifier-shadow extract test /evidence ./test-evidence + +# Cleanup +amplifier-shadow destroy test +``` + +#### Pattern 3: Multi-Repo Integration Test + +```yaml +# test-multi-repo.yaml +scenario: + name: "Multi-Repo Integration Test" + type: cli + + prerequisites: + - "Shadow environment with core-lib and cli-tool" + + steps: + - action: launch + target: "cli-tool" + + - action: send_input + value: "process --lib core-lib\n" + + - action: verify_output + contains: "Success: Using core-lib" +``` + +```bash +# Setup shadow with both repos +amplifier-shadow create \ + --local ~/repos/core-lib:org/core-lib \ + --local ~/repos/cli-tool:org/cli-tool \ + --name multi-test + +# Run test that exercises both +amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml" +``` + +#### Pattern 4: Web App Testing in Shadow + +```yaml +# test-web-app.yaml +scenario: + name: "Web App with Local Library" + type: web + + steps: + - action: navigate + url: "http://localhost:3000" + + - action: click + selector: "button.process" + + - action: verify_element + selector: ".result" + contains: "Processed with v2.0" # Your local version +``` + +```bash +# Shadow with library changes +amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test + +# Start web app inside shadow (uses your local lib) +amplifier-shadow exec web-test " + cd /workspace && + git clone https://github.com/org/web-app && + cd web-app && + npm install && # Pulls your local my-lib via git URL rewriting + npm start & +" + +# Wait for app to start, then run tests +amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml" +``` + +### Verification Best Practices + +When running tests in shadow, always verify your local sources are being used: + +```bash +# After shadow.create, check snapshot commits +shadow.status(shadow_id) +# Shows: snapshot_commits: {"org/my-lib": "abc1234..."} + +# When your test installs dependencies, verify commit matches +# Look in test output for: my-lib @ git+...@abc1234 +``` + +### Complete Example: Library Change Validation + +```yaml +# test-library-change.yaml - Outside-in test +scenario: + name: "Validate Library Breaking Change" + type: cli + description: "Test that dependent app still works with new library API" + + steps: + - action: launch + target: "/workspace/org/dependent-app/cli.py" + + - action: send_input + value: "process data.json\n" + + - action: verify_output + contains: "Processed successfully" + description: "New library API should still work" + + - action: verify_exit_code + expected: 0 +``` + +```bash +# Complete workflow +# 1. Create shadow with your breaking change +amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test + +# 2. Install dependent app (pulls your local lib) +amplifier-shadow exec breaking-test " + cd /workspace && + git clone https://github.com/org/dependent-app && + cd dependent-app && + pip install -e . && # This installs git+https://github.com/org/my-lib (your local version) + echo 'Ready to test' +" + +# 3. Run outside-in test +amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml" + +# If test passes, your breaking change is compatible! +# If test fails, you've caught the issue before pushing +``` + +### When to Use Shadow Integration + +Use shadow + outside-in tests when: + +- ✅ Testing library changes with dependent projects +- ✅ Validating multi-repo coordinated changes +- ✅ Need clean-state validation before pushing +- ✅ Want to catch integration issues early +- ✅ Testing that setup/install procedures work + +Don't use shadow for: + +- ❌ Simple unit tests (too much overhead) +- ❌ Tests of already-committed code (shadow adds no value) +- ❌ Performance testing (container overhead skews results) + +### Learn More + +For complete shadow environment documentation, including: + +- Shell scripts for DIY setup +- Docker Compose examples +- Multi-language support (Python, Node, Rust, Go) +- Troubleshooting and verification techniques + +**Load the shadow-testing skill**: + +``` +Claude, use the shadow-testing skill to set up a shadow environment +``` + +Or for Amplifier users, the shadow tool is built-in: + +```python +shadow.create(local_sources=["~/repos/lib:org/lib"]) +``` + +--- + +### Related Skills + +- **shadow-testing**: Complete shadow environment setup and usage +- **test-gap-analyzer**: Find untested code paths +- **philosophy-guardian**: Review test philosophy compliance +- **pr-review-assistant**: Include tests in PR reviews +- **module-spec-generator**: Generate specs with test scenarios + +### Further Reading + +- Outside-in vs inside-out testing approaches +- Behavior-driven development (BDD) principles +- AI-powered testing best practices +- Test automation patterns +- Shadow environment testing methodology + +## Changelog [LEVEL 3] + +### Version 1.1.0 (2026-01-29) + +- **NEW**: Level 4 - Shadow Environment Integration +- Added complete shadow testing workflow patterns +- Integration examples for Amplifier native and standalone CLI +- Multi-repo integration test patterns +- Web app testing in shadow environments +- Complete workflow example for library change validation +- References to shadow-testing skill for deep-dive documentation + +### Version 1.0.0 (2025-11-16) + +- Initial skill release +- Support for CLI, TUI, Web, and Electron applications +- 15 complete working examples +- Progressive disclosure levels (1, 2, 3) +- Embedded gadugi-agentic-test framework documentation (v0.1.0) +- Freshness check script for version monitoring +- Full integration with amplihack philosophy +- Comprehensive troubleshooting guide +- Action reference catalog + +--- + +**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows. + +Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen. diff --git a/.claude/skills/outside-in-testing/examples/cli/calculator-basic.yaml b/.claude/skills/qa-team/examples/cli/calculator-basic.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/cli/calculator-basic.yaml rename to .claude/skills/qa-team/examples/cli/calculator-basic.yaml diff --git a/.claude/skills/outside-in-testing/examples/cli/cli-error-handling.yaml b/.claude/skills/qa-team/examples/cli/cli-error-handling.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/cli/cli-error-handling.yaml rename to .claude/skills/qa-team/examples/cli/cli-error-handling.yaml diff --git a/.claude/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml b/.claude/skills/qa-team/examples/cli/cli-interactive-session.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml rename to .claude/skills/qa-team/examples/cli/cli-interactive-session.yaml diff --git a/.claude/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml b/.claude/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml rename to .claude/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml diff --git a/.claude/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml b/.claude/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml rename to .claude/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml diff --git a/.claude/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml b/.claude/skills/qa-team/examples/electron/electron-ipc-testing.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml rename to .claude/skills/qa-team/examples/electron/electron-ipc-testing.yaml diff --git a/.claude/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml b/.claude/skills/qa-team/examples/electron/electron-menu-testing.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml rename to .claude/skills/qa-team/examples/electron/electron-menu-testing.yaml diff --git a/.claude/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml b/.claude/skills/qa-team/examples/electron/multi-window-coordination.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml rename to .claude/skills/qa-team/examples/electron/multi-window-coordination.yaml diff --git a/.claude/skills/outside-in-testing/examples/electron/single-window-basic.yaml b/.claude/skills/qa-team/examples/electron/single-window-basic.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/electron/single-window-basic.yaml rename to .claude/skills/qa-team/examples/electron/single-window-basic.yaml diff --git a/.claude/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml b/.claude/skills/qa-team/examples/tui/file-manager-navigation.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml rename to .claude/skills/qa-team/examples/tui/file-manager-navigation.yaml diff --git a/.claude/skills/outside-in-testing/examples/tui/tui-form-validation.yaml b/.claude/skills/qa-team/examples/tui/tui-form-validation.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/tui/tui-form-validation.yaml rename to .claude/skills/qa-team/examples/tui/tui-form-validation.yaml diff --git a/.claude/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml b/.claude/skills/qa-team/examples/tui/tui-performance-monitoring.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml rename to .claude/skills/qa-team/examples/tui/tui-performance-monitoring.yaml diff --git a/.claude/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml b/.claude/skills/qa-team/examples/web/dashboard-smoke-test.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml rename to .claude/skills/qa-team/examples/web/dashboard-smoke-test.yaml diff --git a/.claude/skills/outside-in-testing/examples/web/web-authentication-flow.yaml b/.claude/skills/qa-team/examples/web/web-authentication-flow.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/web/web-authentication-flow.yaml rename to .claude/skills/qa-team/examples/web/web-authentication-flow.yaml diff --git a/.claude/skills/outside-in-testing/examples/web/web-visual-regression.yaml b/.claude/skills/qa-team/examples/web/web-visual-regression.yaml similarity index 100% rename from .claude/skills/outside-in-testing/examples/web/web-visual-regression.yaml rename to .claude/skills/qa-team/examples/web/web-visual-regression.yaml diff --git a/.claude/skills/outside-in-testing/scripts/check-freshness.py b/.claude/skills/qa-team/scripts/check-freshness.py similarity index 100% rename from .claude/skills/outside-in-testing/scripts/check-freshness.py rename to .claude/skills/qa-team/scripts/check-freshness.py diff --git a/.claude/skills/outside-in-testing/tests/test_skill_examples.py b/.claude/skills/qa-team/tests/test_skill_examples.py similarity index 98% rename from .claude/skills/outside-in-testing/tests/test_skill_examples.py rename to .claude/skills/qa-team/tests/test_skill_examples.py index 5b57472e6..7ffeb669b 100644 --- a/.claude/skills/outside-in-testing/tests/test_skill_examples.py +++ b/.claude/skills/qa-team/tests/test_skill_examples.py @@ -1,5 +1,5 @@ """ -Tests for outside-in-testing skill example YAML files. +Tests for qa-team skill example YAML files. Validates that all example YAML files are: - Valid YAML syntax @@ -276,6 +276,7 @@ def test_skill_has_yaml_frontmatter(self): # Check required frontmatter fields assert "name" in metadata, "Frontmatter missing 'name'" + assert metadata["name"] == "qa-team", "Frontmatter name should be 'qa-team'" assert "description" in metadata, "Frontmatter missing 'description'" assert "version" in metadata, "Frontmatter missing 'version'" assert "embedded_framework_version" in metadata, ( diff --git a/.claude/skills/shadow-testing/README.md b/.claude/skills/shadow-testing/README.md index 2967fb55b..3f39991a4 100644 --- a/.claude/skills/shadow-testing/README.md +++ b/.claude/skills/shadow-testing/README.md @@ -143,7 +143,7 @@ amplifier-shadow exec test "gadugi-agentic-test run test-scenario.yaml" amplifier-shadow extract test /evidence ./test-evidence ``` -See the `outside-in-testing` skill Level 4 for complete integration examples. +See the `qa-team` skill Level 4 for complete integration examples (`outside-in-testing` remains an alias). ## Use Cases @@ -223,7 +223,7 @@ Shadow environments use this architecture: ## Related Skills -- **outside-in-testing** - Agentic behavior-driven tests (enhanced with Level 4 shadow integration) +- **qa-team** - Agentic behavior-driven tests (legacy alias: `outside-in-testing`) - **test-gap-analyzer** - Find untested code paths - **philosophy-guardian** - Verify scripts follow ruthless simplicity @@ -243,7 +243,7 @@ Shadow environments use this architecture: - Generalizable shell scripts for DIY setup - Docker Compose examples for all use cases - Multi-language support (Python, Node, Rust, Go) -- Integration patterns with outside-in-testing +- Integration patterns with qa-team / outside-in-testing alias - Philosophy alignment with ruthless simplicity ## Contributing diff --git a/.claude/skills/shadow-testing/SKILL.md b/.claude/skills/shadow-testing/SKILL.md index 99b140c45..cb3dc8d4c 100644 --- a/.claude/skills/shadow-testing/SKILL.md +++ b/.claude/skills/shadow-testing/SKILL.md @@ -647,7 +647,7 @@ amplifier-shadow exec test "gadugi-agentic-test run test-scenario.yaml" amplifier-shadow extract test /evidence ./test-evidence ``` -See the `outside-in-testing` skill for complete integration examples. +See the `qa-team` skill for complete integration examples (`outside-in-testing` remains an alias). ## Best Practices [LEVEL 2] @@ -893,7 +893,7 @@ amplifier-shadow extract test /workspace/results ./local-results ## Related Skills [LEVEL 1] -- **outside-in-testing**: Run agentic tests in shadow environments +- **qa-team**: Run agentic tests in shadow environments (legacy name: `outside-in-testing`) - **test-gap-analyzer**: Find untested code paths (complement shadow testing) - **philosophy-guardian**: Verify shadow scripts follow ruthless simplicity @@ -919,7 +919,7 @@ When shadow tests fail: - Shell scripts for standalone usage - Docker Compose examples for CI integration - Complete CLI reference and troubleshooting guide -- Integration patterns with outside-in-testing +- Integration patterns with qa-team / outside-in-testing alias - Philosophy alignment with ruthless simplicity --- diff --git a/.claude/skills/smart-test/README.md b/.claude/skills/smart-test/README.md index 868b90ac9..80e95d3a8 100644 --- a/.claude/skills/smart-test/README.md +++ b/.claude/skills/smart-test/README.md @@ -94,7 +94,7 @@ User: Rebuild test mapping cache ## Related Skills - `test-gap-analyzer`: Find untested code -- `outside-in-testing`: Create E2E tests +- `qa-team`: Create E2E and parity tests (`outside-in-testing` alias supported) - `pre-commit-diagnostic`: Fix hook failures --- diff --git a/.claude/skills/smart-test/SKILL.md b/.claude/skills/smart-test/SKILL.md index 5aa0126c6..17d54a568 100644 --- a/.claude/skills/smart-test/SKILL.md +++ b/.claude/skills/smart-test/SKILL.md @@ -28,7 +28,7 @@ invokes: - type: skill name: test-gap-analyzer - type: skill - name: outside-in-testing + name: qa-team - type: skill name: pre-commit-diagnostic - type: subagent @@ -360,7 +360,7 @@ Works with existing pytest markers from pyproject.toml: ## Complementary Skills - **test-gap-analyzer**: Identifies missing tests -- **outside-in-testing**: Creates E2E test scenarios +- **qa-team**: Creates E2E and parity test scenarios (`outside-in-testing` alias supported) - **tester agent**: Writes new tests for gaps - **pre-commit-diagnostic**: Fixes pre-commit failures diff --git a/.claude/workflow/DEFAULT_WORKFLOW.md b/.claude/workflow/DEFAULT_WORKFLOW.md index 04d5aaa4a..05d6fdc79 100644 --- a/.claude/workflow/DEFAULT_WORKFLOW.md +++ b/.claude/workflow/DEFAULT_WORKFLOW.md @@ -771,7 +771,7 @@ Step 13 validates technical functionality locally. Step 19 validates real-world **For CLI/TUI applications:** -- [ ] Use `/outside-in-testing` skill for guided CLI/TUI testing workflow +- [ ] Use `/qa-team` skill for guided CLI/TUI testing workflow (`/outside-in-testing` remains an alias) - [ ] Test in fresh terminal session with production-like environment - [ ] Execute actual commands with various flags and inputs - [ ] Verify output formatting and error messages match expectations diff --git a/amplifier-bundle/bundle.md b/amplifier-bundle/bundle.md index 39607376a..effa306c0 100644 --- a/amplifier-bundle/bundle.md +++ b/amplifier-bundle/bundle.md @@ -86,6 +86,7 @@ skills: microsoft-agent-framework: { path: skills/microsoft-agent-framework/skill.md } module-spec-generator: { path: skills/module-spec-generator/SKILL.md } outside-in-testing: { path: skills/outside-in-testing/SKILL.md } + qa-team: { path: skills/qa-team/SKILL.md } remote-work: { path: skills/remote-work/SKILL.md } skill-builder: { path: skills/skill-builder/SKILL.md } test-gap-analyzer: { path: skills/test-gap-analyzer/SKILL.md } diff --git a/amplifier-bundle/recipes/default-workflow.yaml b/amplifier-bundle/recipes/default-workflow.yaml index 0e228292c..10f5fe0e9 100644 --- a/amplifier-bundle/recipes/default-workflow.yaml +++ b/amplifier-bundle/recipes/default-workflow.yaml @@ -824,7 +824,7 @@ steps: # ========================================================================== # STEP 13: MANDATORY OUTSIDE-IN TESTING - # Use the outside-in-testing skill to test this PR as a user would from + # Use the qa-team skill (outside-in-testing alias supported) to test this PR as a user would from # the PR branch. No bash echo — the agent must actually execute the tests. # ========================================================================== - id: "step-13-local-testing" @@ -836,7 +836,7 @@ steps: **Repository:** {{repo_path}} **Branch:** Run `git branch --show-current` to get the current branch name. - You MUST perform outside-in testing using the `outside-in-testing` skill + You MUST perform outside-in testing using the `qa-team` skill (`outside-in-testing` remains an alias) to verify this change as a real user would — from the PR branch, not just the working directory. @@ -852,11 +852,11 @@ steps: git remote get-url origin ``` - 3. **Invoke the `outside-in-testing` skill** to generate and execute + 3. **Invoke the `qa-team` skill** to generate and execute agentic outside-in tests for this change. Pass the PR branch name and repository URL so tests run against the actual branch: ``` - Skill(skill="outside-in-testing") + Skill(skill="qa-team") ``` 4. **Execute at least 2 test scenarios:** @@ -1061,10 +1061,10 @@ steps: ### Phase 1: Run Outside-In Tests - Use the `outside-in-testing` skill to test from the PR branch: + Use the `qa-team` skill to test from the PR branch (`outside-in-testing` alias also works): ``` - Skill(skill="outside-in-testing") + Skill(skill="qa-team") ``` Test using the PR branch: @@ -1139,7 +1139,7 @@ steps: echo "The outside-in testing step (step-13-local-testing) must be completed" && \ echo "before the PR review phase can begin." && \ echo "" && \ - echo "Required: Invoke the outside-in-testing skill and document at least" && \ + echo "Required: Invoke the qa-team skill (outside-in-testing alias also works) and document at least" && \ echo "2 test scenarios in the PR description under 'Step 13: Local Testing Results'." && \ exit 1 ; \ fi && \ diff --git a/amplifier-bundle/recipes/smart-orchestrator.yaml b/amplifier-bundle/recipes/smart-orchestrator.yaml index 2223a2649..f7047143e 100644 --- a/amplifier-bundle/recipes/smart-orchestrator.yaml +++ b/amplifier-bundle/recipes/smart-orchestrator.yaml @@ -554,7 +554,7 @@ steps: For parallel workstreams: inspect the round results for evidence that each workstream completed, and check whether the log references mention outside-in - testing being performed (look for "Step 13", "outside-in-testing", or + testing being performed (look for "Step 13", "qa-team", "outside-in-testing", or "local testing" references in the results). ## Validation Steps @@ -566,7 +566,7 @@ steps: 2. For each workstream, determine whether outside-in testing was performed: - Look for "Step 13: Local Testing Results" evidence - - Look for `outside-in-testing` skill invocation + - Look for `qa-team` or `outside-in-testing` skill invocation - Look for at least 2 test scenario results (PASS/FAIL) 3. Report your findings in this format: @@ -579,7 +579,7 @@ steps: For each workstream: - Workstream: - - outside-in-testing skill invoked: YES / NO / UNKNOWN + - qa-team / outside-in-testing skill invoked: YES / NO / UNKNOWN - Step 13 results documented: YES / NO / UNKNOWN - Test scenarios executed: - VERDICT: PASS / FAIL / CANNOT_VERIFY diff --git a/amplifier-bundle/skills/outside-in-testing/README.md b/amplifier-bundle/skills/outside-in-testing/README.md deleted file mode 100644 index 9eb17ee84..000000000 --- a/amplifier-bundle/skills/outside-in-testing/README.md +++ /dev/null @@ -1,765 +0,0 @@ -# Outside-In Testing Skill - -## Overview - -The Outside-In Testing Skill helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate. - -**Key Benefits**: - -- Tests survive refactoring (implementation changes don't break tests) -- Readable by non-developers (declarative YAML format) -- Platform-agnostic (same structure for CLI, TUI, Web, Electron) -- AI-powered execution (agents handle complex interactions) -- Evidence-based validation (screenshots, logs, output captures) - -## What is Outside-In Testing? - -**Traditional Testing** (Inside-Out): - -```python -# Knows internal implementation -def test_user_service(): - service = UserService() - user = service.create_user("test@example.com") - assert user.id is not None - assert user.email == "test@example.com" - assert user.created_at <= datetime.now() # Internal state -``` - -**Outside-In Testing**: - -```yaml -# Only knows external behavior -scenario: - name: "User Registration" - type: web - steps: - - action: navigate - url: "/register" - - action: type - selector: "#email" - value: "test@example.com" - - action: click - selector: "button[type=submit]" - - action: verify_url - contains: "/welcome" -``` - -The outside-in test verifies the same functionality but: - -- Doesn't depend on internal classes (`UserService`) -- Doesn't check internal state (`created_at`, `id`) -- Tests from user's perspective (what they see and do) -- Remains valid even if implementation completely changes - -## When to Use This Skill - -### Perfect Scenarios - -1. **Smoke Testing** - Quickly verify critical paths work -2. **Acceptance Testing** - Validate features meet requirements -3. **Regression Testing** - Ensure changes don't break existing behavior -4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach -5. **Refactoring Safety** - Tests protect behavior during rewrites -6. **Documentation as Tests** - YAML doubles as executable specifications - -### Complementary to Unit Tests - -Outside-in tests work best alongside unit tests: - -- **Unit Tests** (60%): Internal logic, edge cases, error handling -- **Integration Tests** (30%): Component interactions, API contracts -- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths - -Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation. - -## Quick Start - -### 1. Install Framework - -**Option A: From GitHub (Recommended - Latest)** - -```bash -# Install globally -npm install -g github:rysweet/gadugi-agentic-test - -# Or use with npx -npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml - -# Or clone and build -git clone https://github.com/rysweet/gadugi-agentic-test -cd gadugi-agentic-test -npm install -npm run build -node dist/cli.js run scenarios/your-test.yaml -``` - -**Option B: From npm (when published)** - -```bash -npm install -g gadugi-agentic-test -gadugi-test run test.yaml -``` - -**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below. - -### 2. Create Your First Test - -Save as `test-hello.yaml`: - -```yaml -scenario: - name: "Hello World Test" - description: "Verify application prints greeting" - type: cli - - steps: - - action: launch - target: "./hello-world" - - - action: verify_output - contains: "Hello, World!" - - - action: verify_exit_code - expected: 0 -``` - -### 3. Run the Test - -**If installed globally**: - -```bash -gadugi-test run test-hello.yaml -``` - -**If using from source**: - -```bash -cd /path/to/gadugi-agentic-test -node dist/cli.js run /path/to/test-hello.yaml -``` - -**Run all tests in directory**: - -```bash -node dist/cli.js run -d ./my-test-scenarios -``` - -### 4. Review Results - -The framework generates evidence in `./evidence/`: - -- Execution logs -- Output captures -- Screenshots (for TUI/Web/Electron) -- Timing data -- HTML report - -## Supported Application Types - -### CLI (Command-Line Interface) - -Test command-line tools, scripts, and utilities: - -```yaml -scenario: - name: "Git Status Test" - type: cli - steps: - - action: launch - target: "git" - args: ["status"] - - action: verify_output - contains: "On branch" -``` - -**Common Use Cases**: - -- Package managers (npm, pip, cargo) -- Build tools (make, gradle, webpack) -- DevOps tools (docker, kubectl, terraform) -- Custom CLI applications - -### TUI (Terminal User Interface) - -Test interactive terminal applications: - -```yaml -scenario: - name: "TUI Navigation" - type: tui - steps: - - action: launch - target: "./file-manager" - - action: send_keypress - value: "down" - times: 3 - - action: verify_screen - contains: "> documents/" -``` - -**Common Use Cases**: - -- System monitors (htop, top) -- Text editors (vim, nano) -- File managers (ranger, midnight commander) -- Custom TUI dashboards - -### Web Applications - -Test browser-based applications: - -```yaml -scenario: - name: "Web Dashboard Test" - type: web - steps: - - action: navigate - url: "http://localhost:3000" - - action: verify_element - selector: "h1" - contains: "Dashboard" -``` - -**Common Use Cases**: - -- SPAs (React, Vue, Angular apps) -- Admin panels -- E-commerce sites -- SaaS applications - -### Electron Applications - -Test desktop apps built with Electron: - -```yaml -scenario: - name: "Desktop App Test" - type: electron - steps: - - action: launch - target: "./dist/my-app" - - action: verify_window - title: "My Application" -``` - -**Common Use Cases**: - -- Code editors (VS Code-like apps) -- Chat applications (Slack, Discord clones) -- Productivity tools -- Custom desktop applications - -## Progressive Learning Path - -The skill teaches testing in three levels: - -### Level 1: Fundamentals (Start Here) - -- Basic test structure (YAML anatomy) -- Single-action tests -- Simple verification -- Smoke tests - -**Examples**: - -- `examples/cli/calculator-basic.yaml` -- `examples/tui/file-manager-navigation.yaml` -- `examples/web/dashboard-smoke-test.yaml` -- `examples/electron/single-window-basic.yaml` - -### Level 2: Intermediate - -- Multi-step workflows -- Conditional logic -- Error handling -- Variables and templating - -**Examples**: - -- `examples/cli/cli-error-handling.yaml` -- `examples/tui/tui-form-validation.yaml` -- `examples/web/web-authentication-flow.yaml` -- `examples/electron/multi-window-coordination.yaml` - -### Level 3: Advanced - -- Custom comprehension agents -- Visual regression testing -- Performance validation -- IPC testing (Electron) - -**Examples**: - -- `examples/tui/tui-performance-monitoring.yaml` -- `examples/electron/electron-ipc-testing.yaml` -- `examples/custom-agents/custom-comprehension-agent.yaml` -- `examples/custom-agents/custom-reporter-integration.yaml` - -## Example Library - -This skill includes **15 complete working examples**: - -### CLI (3 examples) - -- Basic calculator operations [Level 1] -- Error handling and recovery [Level 2] -- Interactive session management [Level 2] - -### TUI (3 examples) - -- File manager navigation [Level 1] -- Form validation [Level 2] -- Performance monitoring [Level 3] - -### Web (3 examples) - -- Dashboard smoke test [Level 1] -- Authentication flow [Level 2] -- Visual regression [Level 2] - -### Electron (4 examples) - -- Single window basics [Level 1] -- Multi-window coordination [Level 2] -- Menu interactions [Level 2] -- IPC testing [Level 3] - -### Custom Agents (2 examples) - -- Domain-specific comprehension [Level 3] -- Custom reporting [Level 3] - -All examples include: - -- Complete working YAML -- Inline documentation -- Expected output -- Prerequisites -- Level indicators - -## Using This Skill in Claude - -### Invoke the Skill - -``` -Claude, use the outside-in-testing skill to create a CLI test for my calculator app. - -Claude, use outside-in-testing to generate web tests for user login. - -Claude, create Electron tests using outside-in-testing for my desktop app. -``` - -### What You'll Receive - -1. **Complete YAML test scenario** matching your requirements -2. **Inline comments** explaining each section -3. **Best practices** applied (timeouts, waits, verification) -4. **Appropriate complexity** (Level 1, 2, or 3 based on needs) -5. **Instructions** for running the test - -### Example Interaction - -**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard" - -**Claude** (using this skill): Generates a complete Level 2 YAML scenario with: - -- Navigation to login page -- Form filling (email, password) -- Submit button click -- URL verification (redirected to dashboard) -- Element verification (user profile visible) -- Screenshot capture -- Proper timeouts and waits - -## Integration with Amplihack Philosophy - -This skill embodies amplihack's core principles: - -### Ruthless Simplicity - -- Declarative YAML over complex code -- Minimal boilerplate -- Focus on behavior, not implementation - -### Modular Design (Bricks & Studs) - -- Self-contained test scenarios -- Clear action contracts -- Composable test steps - -### Zero-BS Implementation - -- No stubs or placeholders -- Every example is runnable -- Clear, actionable error messages - -### Outside-In Thinking - -- User perspective first -- Implementation-agnostic tests -- Behavior-driven validation - -## Best Practices - -### 1. Start Simple - -Begin with basic smoke tests, then add complexity: - -```yaml -# Level 1: Smoke test -steps: - - action: launch - target: "./app" - - action: verify_output - contains: "Ready" - -# Level 2: Add interaction -steps: - - action: launch - target: "./app" - - action: send_input - value: "command\n" - - action: verify_output - contains: "Success" -``` - -### 2. Use Descriptive Names - -```yaml -# Good -scenario: - name: "User Login - Valid Credentials" - description: "Verifies successful login with email and password" - -# Bad -scenario: - name: "Test 1" -``` - -### 3. Verify Critical Paths Only - -Don't test every detail. Focus on user-facing behavior: - -```yaml -# Good - User-visible behavior -- action: verify_element - selector: ".welcome-message" - contains: "Welcome back" - -# Bad - Implementation detail -- action: verify_element - selector: ".cache-status" - contains: "initialized" -``` - -### 4. Always Wait for Dynamic Content - -```yaml -# Good - Wait before verification -- action: click - selector: ".load-data" -- action: wait_for_element - selector: ".data-table" - timeout: 10s -- action: verify_element - selector: ".data-table" - -# Bad - May fail due to timing -- action: click - selector: ".load-data" -- action: verify_element - selector: ".data-table" # Might not exist yet! -``` - -### 5. Clean Up After Tests - -```yaml -steps: - # Test steps... - -cleanup: - - action: delete_file - path: "./test-data.json" - - action: stop_application -``` - -## Troubleshooting - -### Installation Issues - -**Problem**: `@types/node-pty` not found error - -**Solution**: This was fixed in gadugi-agentic-test. If you see this: - -```bash -# Update to latest version -npm install -g github:rysweet/gadugi-agentic-test - -# Or if you cloned, pull latest: -git pull origin main -npm install -npm run build -``` - -**Problem**: `tsc: command not found` when building - -**Solution**: TypeScript not installed - -```bash -npm install # Installs all dependencies including TypeScript -npm run build # Now will work -``` - -### Test Times Out - -**Problem**: Test exceeds timeout and fails - -**Solution**: Increase timeout for slow operations - -```yaml -- action: wait_for_element - selector: ".slow-loading-data" - timeout: 30s # Generous timeout -``` - -### Scenario Format Issues - -**Problem**: "Scenario must have a name" error - -**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`: - -```yaml -# WRONG (won't load) -scenario: - name: "My Test" - steps: [...] - -# RIGHT -name: "My Test" -description: "What this tests" -version: "1.0.0" -config: - timeout: 120000 -steps: [...] -``` - -### Element Not Found - -**Problem**: Cannot find element to interact with - -**Solutions**: - -1. Use `wait_for_element` before interaction -2. Verify selector is correct -3. Check if element is in iframe - -```yaml -- action: wait_for_element - selector: ".target" - timeout: 10s -- action: click - selector: ".target" -``` - -### Flaky Tests in CI - -**Problem**: Tests pass locally but fail in CI - -**Solutions**: - -1. Add longer timeouts for CI environments -2. Set explicit viewport sizes -3. Wait for application readiness - -```yaml -scenario: - environment: - viewport: - width: 1920 - height: 1080 - - steps: - - action: wait_for_element - selector: ".app-ready" - timeout: 30s # Generous for CI -``` - -## Framework Version Check - -This skill embeds gadugi-agentic-test version **0.1.0**. - -To check for newer versions: - -```bash -python scripts/check-freshness.py -``` - -The script compares the embedded version against the latest GitHub release and notifies you of new features. - -## Related Skills - -- **test-gap-analyzer**: Find untested code paths (unit test focus) -- **philosophy-guardian**: Review test philosophy compliance -- **pr-review-assistant**: Include tests in PR reviews -- **module-spec-generator**: Generate specs with test scenarios - -## Resources - -### Documentation - -- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation -- **Examples**: `examples/` - 15 complete working examples -- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test - -### Getting Help - -- Review examples in `examples/` directory -- Check `SKILL.md` for detailed explanations -- See troubleshooting section in `SKILL.md` -- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues - -## Quick Reference - -### Basic Test Template - -```yaml -scenario: - name: "Test Name" - description: "What this verifies" - type: cli | tui | web | electron - - prerequisites: - - "Condition 1" - - steps: - - action: launch - target: "./app" - - - action: verify_output - contains: "Expected" - - cleanup: - - action: stop_application -``` - -### Common Actions - -**CLI**: - -- `launch` - Start application -- `send_input` - Send text -- `verify_output` - Check output -- `verify_exit_code` - Validate exit code - -**TUI**: - -- `send_keypress` - Send keys -- `verify_screen` - Check screen -- `capture_screenshot` - Save screenshot - -**Web**: - -- `navigate` - Go to URL -- `click` - Click element -- `type` - Type text -- `verify_element` - Check element - -**Electron**: - -- `window_action` - Control windows -- `menu_click` - Click menus -- `dialog_action` - Handle dialogs -- All web actions - -## Success Stories - -Outside-in testing shines when: - -1. **Refactoring**: Change implementation without updating tests -2. **Collaboration**: Non-developers can read and understand tests -3. **Documentation**: Tests serve as executable specifications -4. **Regression Prevention**: Catch breaking changes in critical flows -5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron - -Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen. - ---- - -**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation. - -## Real-World Example: Testing amplihack Guide Agent - -Based on actual testing of amplihack's guide agent, here's a complete working example: - -### Scenario: Naive Student Learning Flow - -```yaml -name: "Guide Agent - Beginner First Question" -description: "Test how guide responds to complete beginner" -version: "1.0.0" - -config: - timeout: 180000 # 3 minutes for AI response - retries: 1 - parallel: false - -agents: - - name: "student-cli" - type: "system" - config: - shell: "bash" - cwd: "/tmp/test-student" - timeout: 180000 - capture_output: true - -steps: - - name: "Student asks: What is amplihack?" - agent: "student-cli" - action: "execute_command" - params: - command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100' - expect: - exit_code: 0 - stdout_contains: - - "amplihack" - - "AI" - timeout: 180000 - - - name: "Verify guide gives immediate action" - agent: "student-cli" - action: "execute_command" - params: - command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md" - expect: - exit_code: 0 - timeout: 5000 - -metadata: - tags: ["guide-agent", "beginner", "real-world"] - priority: "high" -``` - -### What This Tests - -1. **Installation via uvx** - Tests users can run without installing -2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works -3. **Beginner-friendly response** - Checks for immediate actionable command -4. **Interactive elements** - Looks for TRY IT prompts - -### Running This Test - -```bash -cd gadugi-agentic-test -node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose -``` - -### What We Learned - -**From testing amplihack guide agent**: - -- Long-running AI commands need 180s+ timeouts -- Testing in clean `/tmp` directory avoids state pollution -- Combining `uvx --from git+...` with gadugi tests unreleased branches -- Checking file content (guide.md) verifies features beyond just output -- Real-world tests exposed gaps (guide showing bash commands in REPL context) diff --git a/amplifier-bundle/skills/outside-in-testing/README.md b/amplifier-bundle/skills/outside-in-testing/README.md new file mode 120000 index 000000000..40402e77f --- /dev/null +++ b/amplifier-bundle/skills/outside-in-testing/README.md @@ -0,0 +1 @@ +../qa-team/README.md \ No newline at end of file diff --git a/amplifier-bundle/skills/outside-in-testing/SKILL.md b/amplifier-bundle/skills/outside-in-testing/SKILL.md index 91526566b..891b6fb02 100644 --- a/amplifier-bundle/skills/outside-in-testing/SKILL.md +++ b/amplifier-bundle/skills/outside-in-testing/SKILL.md @@ -1,2045 +1,16 @@ --- name: outside-in-testing description: | - Generates agentic outside-in tests using gadugi-agentic-test framework for CLI, TUI, Web, and Electron apps. - Use when you need behavior-driven tests that verify external interfaces without internal implementation knowledge. - Creates YAML test scenarios that AI agents execute, observe, and validate against expected outcomes. - Supports progressive complexity from simple smoke tests to advanced multi-step workflows. -version: 1.0.0 -embedded_framework_version: 0.1.0 -github_repo: https://github.com/rysweet/gadugi-agentic-test -issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 + Deprecated compatibility alias for qa-team. + Use when existing recipes or agents still invoke outside-in-testing by name. + Redirects future work to the qa-team skill for outside-in validation and parity loops. +version: 1.1.0 --- -# Outside-In Testing Skill +# outside-in-testing (Alias) -## Purpose [LEVEL 1] +`outside-in-testing` is now a compatibility alias for `qa-team`. -This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate. +Use `qa-team` for all new work. This alias remains so existing workflows, recipes, and skills that still invoke `outside-in-testing` continue to resolve cleanly while the rename propagates. -**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details. - -## When to Use This Skill [LEVEL 1] - -### Perfect For - -- **Smoke Tests**: Quick validation that critical user flows work -- **Behavior-Driven Testing**: Verify features from user perspective -- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron -- **Refactoring Safety**: Tests remain valid when implementation changes -- **AI-Powered Testing**: Let agents handle complex interactions -- **Documentation as Tests**: YAML scenarios double as executable specs - -### Use This Skill When - -- Starting a new project and defining expected behaviors -- Refactoring code and need tests that won't break with internal changes -- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps) -- Writing acceptance criteria that can be automatically verified -- Need tests that non-developers can read and understand -- Want to catch regressions in critical user workflows -- Testing complex multi-step interactions - -### Don't Use This Skill When - -- Need unit tests for internal functions (use test-gap-analyzer instead) -- Testing performance or load characteristics -- Need precise timing or concurrency control -- Testing non-interactive batch processes -- Implementation details matter more than behavior - -## Core Concepts [LEVEL 1] - -### Outside-In Testing Philosophy - -**Traditional Inside-Out Testing**: - -```python -# Tightly coupled to implementation -def test_calculator_add(): - calc = Calculator() - result = calc.add(2, 3) - assert result == 5 - assert calc.history == [(2, 3, 5)] # Knows internal state -``` - -**Agentic Outside-In Testing**: - -```yaml -# Implementation-agnostic behavior verification -scenario: - name: "Calculator Addition" - steps: - - action: launch - target: "./calculator" - - action: send_input - value: "add 2 3" - - action: verify_output - contains: "Result: 5" -``` - -**Benefits**: - -- Tests survive refactoring (internal changes don't break tests) -- Readable by non-developers (YAML is declarative) -- Platform-agnostic (same structure for CLI/TUI/Web/Electron) -- AI agents handle complexity (navigation, timing, screenshots) - -### The Gadugi Agentic Test Framework [LEVEL 2] - -Gadugi-agentic-test is a Python framework that: - -1. **Parses YAML test scenarios** with declarative steps -2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents) -3. **Executes actions** (launch, input, click, wait, verify) -4. **Collects evidence** (screenshots, logs, output captures) -5. **Validates outcomes** against expected results -6. **Generates reports** with evidence trails - -**Architecture**: - -``` -YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine - ↓ - [CLI Agent, TUI Agent, Web Agent, Electron Agent] - ↓ - Observers → Comprehension Agent - ↓ - Evidence Report -``` - -### Progressive Disclosure Levels [LEVEL 1] - -This skill teaches testing in three levels: - -- **Level 1: Fundamentals** - Basic single-action tests, simple verification -- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling -- **Level 3: Advanced** - Custom agents, visual regression, performance validation - -Each example is marked with its level. Start at Level 1 and progress as needed. - -## Quick Start [LEVEL 1] - -### Installation - -**Prerequisites (for native module compilation):** - -```bash -# macOS -xcode-select --install - -# Ubuntu/Debian -sudo apt-get install -y build-essential python3 - -# Windows: Install Visual Studio Build Tools with "Desktop development with C++" -``` - -**Install the framework:** - -```bash -# Install globally for CLI access -npm install -g @gadugi/agentic-test - -# Or install locally in your project -npm install @gadugi/agentic-test - -# Verify installation -gadugi-test --version -``` - -### Your First Test (CLI Example) - -Create `test-hello.yaml`: - -```yaml -scenario: - name: "Hello World CLI Test" - description: "Verify CLI prints greeting" - type: cli - - prerequisites: - - "./hello-world executable exists" - - steps: - - action: launch - target: "./hello-world" - - - action: verify_output - contains: "Hello, World!" - - - action: verify_exit_code - expected: 0 -``` - -Run the test: - -```bash -gadugi-test run test-hello.yaml -``` - -Output: - -``` -✓ Scenario: Hello World CLI Test - ✓ Step 1: Launched ./hello-world - ✓ Step 2: Output contains "Hello, World!" - ✓ Step 3: Exit code is 0 - -PASSED (3/3 steps successful) -Evidence saved to: ./evidence/test-hello-20250116-093045/ -``` - -### Understanding the YAML Structure [LEVEL 1] - -Every test scenario has this structure: - -```yaml -scenario: - name: "Descriptive test name" - description: "What this test verifies" - type: cli | tui | web | electron - - # Optional metadata - tags: [smoke, critical, auth] - timeout: 30s - - # What must be true before test runs - prerequisites: - - "Condition 1" - - "Condition 2" - - # The test steps (executed sequentially) - steps: - - action: action_name - parameter1: value1 - parameter2: value2 - - - action: verify_something - expected: value - - # Optional cleanup - cleanup: - - action: stop_application -``` - -## Application Types and Agents [LEVEL 2] - -### CLI Applications [LEVEL 1] - -**Use Case**: Command-line tools, scripts, build tools, package managers - -**Supported Actions**: - -- `launch` - Start the CLI program -- `send_input` - Send text or commands via stdin -- `send_signal` - Send OS signals (SIGINT, SIGTERM) -- `wait_for_output` - Wait for specific text in stdout/stderr -- `verify_output` - Check stdout/stderr contains/matches expected text -- `verify_exit_code` - Validate process exit code -- `capture_output` - Save output for later verification - -**Example** (see `examples/cli/calculator-basic.yaml`): - -```yaml -scenario: - name: "CLI Calculator Basic Operations" - type: cli - - steps: - - action: launch - target: "./calculator" - args: ["--mode", "interactive"] - - - action: send_input - value: "add 5 3\n" - - - action: verify_output - contains: "Result: 8" - timeout: 2s - - - action: send_input - value: "multiply 4 7\n" - - - action: verify_output - contains: "Result: 28" - - - action: send_input - value: "exit\n" - - - action: verify_exit_code - expected: 0 -``` - -### TUI Applications [LEVEL 1] - -**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs) - -**Supported Actions**: - -- `launch` - Start TUI application -- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.) -- `wait_for_screen` - Wait for specific text to appear on screen -- `verify_screen` - Check screen contents match expectations -- `capture_screenshot` - Save terminal screenshot (ANSI art) -- `navigate_menu` - Navigate menu structures -- `fill_form` - Fill TUI form fields - -**Example** (see `examples/tui/file-manager-navigation.yaml`): - -```yaml -scenario: - name: "TUI File Manager Navigation" - type: tui - - steps: - - action: launch - target: "./file-manager" - - - action: wait_for_screen - contains: "File Manager v1.0" - timeout: 3s - - - action: send_keypress - value: "down" - times: 3 - - - action: verify_screen - contains: "> documents/" - description: "Third item should be selected" - - - action: send_keypress - value: "enter" - - - action: wait_for_screen - contains: "documents/" - timeout: 2s - - - action: capture_screenshot - save_as: "documents-view.txt" -``` - -### Web Applications [LEVEL 1] - -**Use Case**: Web apps, dashboards, SPAs, admin panels - -**Supported Actions**: - -- `navigate` - Go to URL -- `click` - Click element by selector or text -- `type` - Type into input fields -- `wait_for_element` - Wait for element to appear -- `verify_element` - Check element exists/contains text -- `verify_url` - Validate current URL -- `screenshot` - Capture browser screenshot -- `scroll` - Scroll page or element - -**Example** (see `examples/web/dashboard-smoke-test.yaml`): - -```yaml -scenario: - name: "Dashboard Smoke Test" - type: web - - steps: - - action: navigate - url: "http://localhost:3000/dashboard" - - - action: wait_for_element - selector: "h1.dashboard-title" - timeout: 5s - - - action: verify_element - selector: "h1.dashboard-title" - contains: "Analytics Dashboard" - - - action: verify_element - selector: ".widget-stats" - count: 4 - description: "Should have 4 stat widgets" - - - action: click - selector: "button.refresh-data" - - - action: wait_for_element - selector: ".loading-spinner" - disappears: true - timeout: 10s - - - action: screenshot - save_as: "dashboard-loaded.png" -``` - -### Electron Applications [LEVEL 2] - -**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones) - -**Supported Actions**: - -- `launch` - Start Electron app -- `window_action` - Interact with windows (focus, minimize, close) -- `menu_click` - Click application menu items -- `dialog_action` - Handle native dialogs (open file, save, confirm) -- `ipc_send` - Send IPC message to main process -- `verify_window` - Check window state/properties -- All web actions (since Electron uses Chromium) - -**Example** (see `examples/electron/single-window-basic.yaml`): - -```yaml -scenario: - name: "Electron Single Window Test" - type: electron - - steps: - - action: launch - target: "./dist/my-app" - wait_for_window: true - timeout: 10s - - - action: verify_window - title: "My Application" - visible: true - - - action: menu_click - path: ["File", "New Document"] - - - action: wait_for_element - selector: ".document-editor" - - - action: type - selector: ".document-editor" - value: "Hello from test" - - - action: menu_click - path: ["File", "Save"] - - - action: dialog_action - type: save_file - filename: "test-document.txt" - - - action: verify_window - title_contains: "test-document.txt" -``` - -## Test Scenario Anatomy [LEVEL 2] - -### Metadata Section - -```yaml -scenario: - name: "Clear descriptive name" - description: "Detailed explanation of what this test verifies" - type: cli | tui | web | electron - - # Optional fields - tags: [smoke, regression, auth, payment] - priority: high | medium | low - timeout: 60s # Overall scenario timeout - retry_on_failure: 2 # Retry count - - # Environment requirements - environment: - variables: - API_URL: "http://localhost:8080" - DEBUG: "true" - files: - - "./config.json must exist" -``` - -### Prerequisites - -Prerequisites are conditions that must be true before the test runs. The framework validates these before execution. - -```yaml -prerequisites: - - "./application binary exists" - - "Port 8080 is available" - - "Database is running" - - "User account test@example.com exists" - - "File ./test-data.json exists" -``` - -If prerequisites fail, the test is skipped (not failed). - -### Steps - -Steps execute sequentially. Each step has: - -- **action**: Required - the action to perform -- **Parameters**: Action-specific parameters -- **description**: Optional - human-readable explanation -- **timeout**: Optional - step-specific timeout -- **continue_on_failure**: Optional - don't fail scenario if step fails - -```yaml -steps: - # Simple action - - action: launch - target: "./app" - - # Action with multiple parameters - - action: verify_output - contains: "Success" - timeout: 5s - description: "App should print success message" - - # Continue even if this fails - - action: click - selector: ".optional-button" - continue_on_failure: true -``` - -### Verification Actions [LEVEL 1] - -Verification actions check expected outcomes. They fail the test if expectations aren't met. - -**Common Verifications**: - -```yaml -# CLI: Check output contains text -- action: verify_output - contains: "Expected text" - -# CLI: Check output matches regex -- action: verify_output - matches: "Result: \\d+" - -# CLI: Check exit code -- action: verify_exit_code - expected: 0 - -# Web/TUI: Check element exists -- action: verify_element - selector: ".success-message" - -# Web/TUI: Check element contains text -- action: verify_element - selector: "h1" - contains: "Welcome" - -# Web: Check URL -- action: verify_url - equals: "http://localhost:3000/dashboard" - -# Web: Check element count -- action: verify_element - selector: ".list-item" - count: 5 - -# Electron: Check window state -- action: verify_window - title: "My App" - visible: true - focused: true -``` - -### Cleanup Section - -Cleanup runs after all steps complete (success or failure). Use for teardown actions. - -```yaml -cleanup: - - action: stop_application - force: true - - - action: delete_file - path: "./temp-test-data.json" - - - action: reset_database - connection: "test_db" -``` - -## Advanced Patterns [LEVEL 2] - -### Conditional Logic - -Execute steps based on conditions: - -```yaml -steps: - - action: launch - target: "./app" - - - action: verify_output - contains: "Login required" - id: login_check - - # Only run if login_check passed - - action: send_input - value: "login admin password123\n" - condition: login_check.passed -``` - -### Variables and Templating [LEVEL 2] - -Define variables and use them throughout the scenario: - -```yaml -scenario: - name: "Test with Variables" - type: cli - - variables: - username: "testuser" - api_url: "http://localhost:8080" - - steps: - - action: launch - target: "./app" - args: ["--api", "${api_url}"] - - - action: send_input - value: "login ${username}\n" - - - action: verify_output - contains: "Welcome, ${username}!" -``` - -### Loops and Repetition [LEVEL 2] - -Repeat actions multiple times: - -```yaml -steps: - - action: launch - target: "./app" - - # Repeat action N times - - action: send_keypress - value: "down" - times: 5 - - # Loop over list - - action: send_input - value: "${item}\n" - for_each: - - "apple" - - "banana" - - "cherry" -``` - -### Error Handling [LEVEL 2] - -Handle expected errors gracefully: - -```yaml -steps: - - action: send_input - value: "invalid command\n" - - # Verify error message appears - - action: verify_output - contains: "Error: Unknown command" - expected_failure: true - - # App should still be running - - action: verify_running - expected: true -``` - -### Multi-Step Workflows [LEVEL 2] - -Complex scenarios with multiple phases: - -```yaml -scenario: - name: "E-commerce Purchase Flow" - type: web - - steps: - # Phase 1: Authentication - - action: navigate - url: "http://localhost:3000/login" - - - action: type - selector: "#username" - value: "test@example.com" - - - action: type - selector: "#password" - value: "password123" - - - action: click - selector: "button[type=submit]" - - - action: wait_for_url - contains: "/dashboard" - - # Phase 2: Product Selection - - action: navigate - url: "http://localhost:3000/products" - - - action: click - text: "Add to Cart" - nth: 1 - - - action: verify_element - selector: ".cart-badge" - contains: "1" - - # Phase 3: Checkout - - action: click - selector: ".cart-icon" - - - action: click - text: "Proceed to Checkout" - - - action: fill_form - fields: - "#shipping-address": "123 Test St" - "#city": "Testville" - "#zip": "12345" - - - action: click - selector: "#place-order" - - - action: wait_for_element - selector: ".order-confirmation" - timeout: 10s - - - action: verify_element - selector: ".order-number" - exists: true -``` - -## Level 3: Advanced Topics [LEVEL 3] - -### Custom Comprehension Agents - -The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic. - -**Default Comprehension Agent**: - -- Observes raw output (text, HTML, screenshots) -- Applies general reasoning to verify expectations -- Returns pass/fail with explanation - -**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`): - -```yaml -scenario: - name: "Financial Dashboard Test with Custom Agent" - type: web - - # Define custom comprehension logic - comprehension_agent: - model: "gpt-4" - system_prompt: | - You are a financial data validator. When verifying dashboard content: - 1. All monetary values must use proper formatting ($1,234.56) - 2. Percentages must include % symbol - 3. Dates must be in MM/DD/YYYY format - 4. Negative values must be red - 5. Chart data must be logically consistent - - Be strict about formatting and data consistency. - - examples: - - input: "Total Revenue: 45000" - output: "FAIL - Missing currency symbol and comma separator" - - input: "Total Revenue: $45,000.00" - output: "PASS - Correctly formatted" - - steps: - - action: navigate - url: "http://localhost:3000/financial-dashboard" - - - action: verify_element - selector: ".revenue-widget" - use_custom_comprehension: true - description: "Revenue should be properly formatted" -``` - -### Visual Regression Testing [LEVEL 3] - -Compare screenshots against baseline images: - -```yaml -scenario: - name: "Visual Regression - Homepage" - type: web - - steps: - - action: navigate - url: "http://localhost:3000" - - - action: wait_for_element - selector: ".page-loaded" - - - action: screenshot - save_as: "homepage.png" - - - action: visual_compare - screenshot: "homepage.png" - baseline: "./baselines/homepage-baseline.png" - threshold: 0.05 # 5% difference allowed - highlight_differences: true -``` - -### Performance Validation [LEVEL 3] - -Measure and validate performance metrics: - -```yaml -scenario: - name: "Performance - Dashboard Load Time" - type: web - - performance: - metrics: - - page_load_time - - first_contentful_paint - - time_to_interactive - - steps: - - action: navigate - url: "http://localhost:3000/dashboard" - measure_timing: true - - - action: verify_performance - metric: page_load_time - less_than: 3000 # 3 seconds - - - action: verify_performance - metric: first_contentful_paint - less_than: 1500 # 1.5 seconds -``` - -### Multi-Window Coordination (Electron) [LEVEL 3] - -Test applications with multiple windows: - -```yaml -scenario: - name: "Multi-Window Chat Application" - type: electron - - steps: - - action: launch - target: "./chat-app" - - - action: menu_click - path: ["Window", "New Chat"] - - - action: verify_window - count: 2 - - - action: window_action - window: 1 - action: focus - - - action: type - selector: ".message-input" - value: "Hello from window 1" - - - action: click - selector: ".send-button" - - - action: window_action - window: 2 - action: focus - - - action: wait_for_element - selector: ".message" - contains: "Hello from window 1" - timeout: 5s -``` - -### IPC Testing (Electron) [LEVEL 3] - -Test Inter-Process Communication between renderer and main: - -```yaml -scenario: - name: "Electron IPC Communication" - type: electron - - steps: - - action: launch - target: "./my-app" - - - action: ipc_send - channel: "get-system-info" - - - action: ipc_expect - channel: "system-info-reply" - timeout: 3s - - - action: verify_ipc_payload - contains: - platform: "darwin" - arch: "x64" -``` - -### Custom Reporters [LEVEL 3] - -Generate custom test reports: - -```yaml -scenario: - name: "Test with Custom Reporting" - type: cli - - reporting: - format: custom - template: "./report-template.html" - include: - - screenshots - - logs - - timing_data - - video_recording - - email: - enabled: true - recipients: ["team@example.com"] - on_failure_only: true - - steps: - # ... test steps ... -``` - -## Framework Integration [LEVEL 2] - -### Running Tests - -**Single test**: - -```bash -gadugi-test run test-scenario.yaml -``` - -**Multiple tests**: - -```bash -gadugi-test run tests/*.yaml -``` - -**With options**: - -```bash -gadugi-test run test.yaml \ - --verbose \ - --evidence-dir ./test-evidence \ - --retry 2 \ - --timeout 60s -``` - -### CI/CD Integration - -**GitHub Actions** (`.github/workflows/agentic-tests.yml`): - -```yaml -name: Agentic Tests - -on: [push, pull_request] - -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Install gadugi-agentic-test - run: npm install -g @gadugi/agentic-test - - - name: Run tests - run: gadugi-test run tests/agentic/*.yaml - - - name: Upload evidence - if: always() - uses: actions/upload-artifact@v3 - with: - name: test-evidence - path: ./evidence/ -``` - -### Evidence Collection - -The framework automatically collects evidence for debugging: - -``` -evidence/ - scenario-name-20250116-093045/ - ├── scenario.yaml # Original test scenario - ├── execution-log.json # Detailed execution log - ├── screenshots/ # All captured screenshots - │ ├── step-1.png - │ ├── step-3.png - │ └── step-5.png - ├── output-captures/ # CLI/TUI output - │ ├── stdout.txt - │ └── stderr.txt - ├── timing.json # Performance metrics - └── report.html # Human-readable report -``` - -## Best Practices [LEVEL 2] - -### 1. Start Simple, Add Complexity - -Begin with basic smoke tests, then add detail: - -```yaml -# Level 1: Basic smoke test -steps: - - action: launch - target: "./app" - - action: verify_output - contains: "Ready" - -# Level 2: Add interaction -steps: - - action: launch - target: "./app" - - action: send_input - value: "command\n" - - action: verify_output - contains: "Success" - -# Level 3: Add error handling and edge cases -steps: - - action: launch - target: "./app" - - action: send_input - value: "invalid\n" - - action: verify_output - contains: "Error" - - action: send_input - value: "command\n" - - action: verify_output - contains: "Success" -``` - -### 2. Use Descriptive Names and Descriptions - -```yaml -# Bad -scenario: - name: "Test 1" - steps: - - action: click - selector: "button" - -# Good -scenario: - name: "User Login Flow - Valid Credentials" - description: "Verifies user can log in with valid email and password" - steps: - - action: click - selector: "button[type=submit]" - description: "Submit login form" -``` - -### 3. Verify Critical Paths Only - -Don't test every tiny detail. Focus on user-facing behavior: - -```yaml -# Bad - Tests implementation details -- action: verify_element - selector: ".internal-cache-status" - contains: "initialized" - -# Good - Tests user-visible behavior -- action: verify_element - selector: ".welcome-message" - contains: "Welcome back" -``` - -### 4. Use Prerequisites for Test Dependencies - -```yaml -scenario: - name: "User Profile Edit" - - prerequisites: - - "User testuser@example.com exists" - - "User is logged in" - - "Database is seeded with test data" - - steps: - # Test assumes prerequisites are met - - action: navigate - url: "/profile" -``` - -### 5. Keep Tests Independent - -Each test should set up its own state and clean up: - -```yaml -scenario: - name: "Create Document" - - steps: - # Create test user (don't assume exists) - - action: api_call - endpoint: "/api/users" - method: POST - data: { email: "test@example.com" } - - # Run test - - action: navigate - url: "/documents/new" - # ... test steps ... - - cleanup: - # Remove test user - - action: api_call - endpoint: "/api/users/test@example.com" - method: DELETE -``` - -### 6. Use Tags for Organization - -```yaml -scenario: - name: "Critical Payment Flow" - tags: [smoke, critical, payment, e2e] - # Run with: gadugi-test run --tags critical -``` - -### 7. Add Timeouts Strategically - -```yaml -steps: - # Quick operations - short timeout - - action: click - selector: "button" - timeout: 2s - - # Network operations - longer timeout - - action: wait_for_element - selector: ".data-loaded" - timeout: 10s - - # Complex operations - generous timeout - - action: verify_element - selector: ".report-generated" - timeout: 60s -``` - -## Testing Strategies [LEVEL 2] - -### Smoke Tests - -Minimal tests that verify critical functionality works: - -```yaml -scenario: - name: "Smoke Test - Application Starts" - tags: [smoke] - - steps: - - action: launch - target: "./app" - - action: verify_output - contains: "Ready" - timeout: 5s -``` - -Run before every commit: `gadugi-test run --tags smoke` - -### Happy Path Tests - -Test the ideal user journey: - -```yaml -scenario: - name: "Happy Path - User Registration" - - steps: - - action: navigate - url: "/register" - - action: type - selector: "#email" - value: "newuser@example.com" - - action: type - selector: "#password" - value: "SecurePass123!" - - action: click - selector: "button[type=submit]" - - action: wait_for_url - contains: "/welcome" -``` - -### Error Path Tests - -Verify error handling: - -```yaml -scenario: - name: "Error Path - Invalid Login" - - steps: - - action: navigate - url: "/login" - - action: type - selector: "#email" - value: "invalid@example.com" - - action: type - selector: "#password" - value: "wrongpassword" - - action: click - selector: "button[type=submit]" - - action: verify_element - selector: ".error-message" - contains: "Invalid credentials" -``` - -### Regression Tests - -Prevent bugs from reappearing: - -```yaml -scenario: - name: "Regression - Issue #123 Password Reset" - tags: [regression, bug-123] - description: "Verifies password reset email is sent (was broken in v1.2)" - - steps: - - action: navigate - url: "/forgot-password" - - action: type - selector: "#email" - value: "user@example.com" - - action: click - selector: "button[type=submit]" - - action: verify_element - selector: ".success-message" - contains: "Reset email sent" -``` - -## Philosophy Alignment [LEVEL 2] - -This skill follows amplihack's core principles: - -### Ruthless Simplicity - -- **YAML over code**: Declarative tests are simpler than programmatic tests -- **No implementation details**: Tests describe WHAT, not HOW -- **Minimal boilerplate**: Each test is focused and concise - -### Modular Design (Bricks & Studs) - -- **Self-contained scenarios**: Each YAML file is independent -- **Clear contracts**: Steps have well-defined inputs/outputs -- **Composable actions**: Reuse actions across different test types - -### Zero-BS Implementation - -- **No stubs**: Every example in this skill is a complete, runnable test -- **Working defaults**: Tests run with minimal configuration -- **Clear errors**: Framework provides actionable error messages - -### Outside-In Thinking - -- **User perspective**: Tests verify behavior users care about -- **Implementation agnostic**: Refactoring doesn't break tests -- **Behavior-driven**: Focus on outcomes, not internals - -## Common Pitfalls and Solutions [LEVEL 2] - -### Pitfall 1: Over-Specifying - -**Problem**: Test breaks when UI changes slightly - -```yaml -# Bad - Too specific -- action: verify_element - selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold" - contains: "Welcome" -``` - -**Solution**: Use flexible selectors - -```yaml -# Good - Focused on behavior -- action: verify_element - selector: ".welcome-message" - contains: "Welcome" -``` - -### Pitfall 2: Missing Waits - -**Problem**: Test fails intermittently due to timing - -```yaml -# Bad - No wait for async operation -- action: click - selector: ".load-data-button" -- action: verify_element - selector: ".data-table" # May not exist yet! -``` - -**Solution**: Always wait for dynamic content - -```yaml -# Good - Wait for element to appear -- action: click - selector: ".load-data-button" -- action: wait_for_element - selector: ".data-table" - timeout: 10s -- action: verify_element - selector: ".data-table" -``` - -### Pitfall 3: Testing Implementation Details - -**Problem**: Test coupled to internal state - -```yaml -# Bad - Tests internal cache state -- action: verify_output - contains: "Cache hit ratio: 85%" -``` - -**Solution**: Test user-visible behavior - -```yaml -# Good - Tests response time -- action: verify_response_time - less_than: 100ms - description: "Fast response indicates caching works" -``` - -### Pitfall 4: Flaky Assertions - -**Problem**: Assertions depend on exact timing or formatting - -```yaml -# Bad - Exact timestamp match will fail -- action: verify_output - contains: "Created at: 2025-11-16 09:30:45" -``` - -**Solution**: Use flexible patterns - -```yaml -# Good - Match pattern, not exact value -- action: verify_output - matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}" -``` - -### Pitfall 5: Not Cleaning Up - -**Problem**: Tests leave artifacts that affect future runs - -```yaml -# Bad - No cleanup -steps: - - action: create_file - path: "./test-data.json" - - action: launch - target: "./app" -``` - -**Solution**: Always use cleanup section - -```yaml -# Good - Cleanup ensures clean slate -steps: - - action: create_file - path: "./test-data.json" - - action: launch - target: "./app" - -cleanup: - - action: delete_file - path: "./test-data.json" -``` - -## Example Library [LEVEL 1] - -This skill includes 15 complete working examples organized by application type and complexity level: - -### CLI Examples - -1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations -2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery -3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI - -### TUI Examples - -4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation -5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation -6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing - -### Web Examples - -7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification -8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow -9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing - -### Electron Examples - -10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test -11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration -12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions -13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing - -### Custom Agent Examples - -14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic -15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting - -See `examples/` directory for full example code with inline documentation. - -## Framework Freshness Check [LEVEL 3] - -This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists: - -```bash -# Run the freshness check script -python scripts/check-freshness.py - -# Output if outdated: -# WARNING: Embedded framework version is 0.1.0 -# Latest GitHub version is 0.2.5 -# -# New features in 0.2.5: -# - Native Playwright support for web testing -# - Video recording for all test types -# - Parallel test execution -# -# Update with: npm update -g @gadugi/agentic-test -``` - -The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements. - -**When to Update This Skill**: - -- New framework version adds significant features -- Breaking changes in YAML schema -- New application types supported -- Agent capabilities expand - -## Integration with Other Skills [LEVEL 2] - -### Works Well With - -**test-gap-analyzer**: - -- Use test-gap-analyzer to find untested functions -- Write outside-in tests for critical user-facing paths -- Use unit tests (from test-gap-analyzer) for internal functions - -**philosophy-guardian**: - -- Ensure test YAML follows ruthless simplicity -- Verify tests focus on behavior, not implementation - -**pr-review-assistant**: - -- Include outside-in tests in PR reviews -- Verify tests cover changed functionality -- Check test readability and clarity - -**module-spec-generator**: - -- Generate module specs that include outside-in test scenarios -- Use specs as templates for test YAML - -### Example Combined Workflow - -```bash -# 1. Analyze coverage gaps -claude "Use test-gap-analyzer on ./src" - -# 2. Write outside-in tests for critical paths -claude "Use outside-in-testing to create web tests for authentication" - -# 3. Verify philosophy compliance -claude "Use philosophy-guardian to review new test files" - -# 4. Include in PR -git add tests/agentic/ -git commit -m "Add outside-in tests for auth flow" -``` - -## Troubleshooting [LEVEL 2] - -### Test Times Out - -**Symptom**: Test exceeds timeout and fails - -**Causes**: - -- Application takes longer to start than expected -- Network requests are slow -- Element never appears (incorrect selector) - -**Solutions**: - -```yaml -# Increase timeout -- action: wait_for_element - selector: ".slow-loading-element" - timeout: 30s # Increase from default - -# Add intermediate verification -- action: launch - target: "./app" -- action: wait_for_output - contains: "Initializing..." - timeout: 5s -- action: wait_for_output - contains: "Ready" - timeout: 20s -``` - -### Element Not Found - -**Symptom**: `verify_element` or `click` fails with "element not found" - -**Causes**: - -- Incorrect CSS selector -- Element not yet rendered (timing issue) -- Element in iframe or shadow DOM - -**Solutions**: - -```yaml -# Add wait before interaction -- action: wait_for_element - selector: ".target-element" - timeout: 10s -- action: click - selector: ".target-element" - -# Use more specific selector -- action: click - selector: "button[data-testid='submit-button']" - -# Handle iframe -- action: switch_to_iframe - selector: "iframe#payment-frame" -- action: click - selector: ".pay-now-button" -``` - -### Test Passes Locally, Fails in CI - -**Symptom**: Test works on dev machine but fails in CI environment - -**Causes**: - -- Different screen size (web/Electron) -- Missing dependencies -- Timing differences (slower CI machines) -- Environment variable differences - -**Solutions**: - -```yaml -# Set explicit viewport size (web/Electron) -scenario: - environment: - viewport: - width: 1920 - height: 1080 - -# Add longer timeouts in CI -- action: wait_for_element - selector: ".element" - timeout: 30s # Generous for CI - -# Verify prerequisites -prerequisites: - - "Chrome browser installed" - - "Environment variable API_KEY is set" -``` - -### Output Doesn't Match Expected - -**Symptom**: `verify_output` fails even though output looks correct - -**Causes**: - -- Extra whitespace or newlines -- ANSI color codes in output -- Case sensitivity - -**Solutions**: - -```yaml -# Use flexible matching -- action: verify_output - matches: "Result:\\s+Success" # Allow flexible whitespace - -# Strip ANSI codes -- action: verify_output - contains: "Success" - strip_ansi: true - -# Case-insensitive match -- action: verify_output - contains: "success" - case_sensitive: false -``` - -## Reference: Action Catalog [LEVEL 3] - -### CLI Actions - -| Action | Parameters | Description | -| ------------------ | -------------------------------- | -------------------------------------- | -| `launch` | `target`, `args`, `cwd`, `env` | Start CLI application | -| `send_input` | `value`, `delay` | Send text to stdin | -| `send_signal` | `signal` | Send OS signal (SIGINT, SIGTERM, etc.) | -| `wait_for_output` | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr | -| `verify_output` | `contains`, `matches`, `stream` | Check output content | -| `verify_exit_code` | `expected` | Validate exit code | -| `capture_output` | `save_as`, `stream` | Save output to file | - -### TUI Actions - -| Action | Parameters | Description | -| -------------------- | --------------------------------- | ------------------------ | -| `launch` | `target`, `args`, `terminal_size` | Start TUI application | -| `send_keypress` | `value`, `times`, `modifiers` | Send keyboard input | -| `wait_for_screen` | `contains`, `timeout` | Wait for text on screen | -| `verify_screen` | `contains`, `matches`, `region` | Check screen content | -| `capture_screenshot` | `save_as` | Save terminal screenshot | -| `navigate_menu` | `path` | Navigate menu structure | -| `fill_form` | `fields` | Fill TUI form fields | - -### Web Actions - -| Action | Parameters | Description | -| ------------------ | ----------------------------------------- | ---------------------- | -| `navigate` | `url`, `wait_for_load` | Go to URL | -| `click` | `selector`, `text`, `nth` | Click element | -| `type` | `selector`, `value`, `delay` | Type into input | -| `wait_for_element` | `selector`, `timeout`, `disappears` | Wait for element | -| `verify_element` | `selector`, `contains`, `count`, `exists` | Check element state | -| `verify_url` | `equals`, `contains`, `matches` | Validate URL | -| `screenshot` | `save_as`, `selector`, `full_page` | Capture screenshot | -| `scroll` | `selector`, `direction`, `amount` | Scroll page/element | -| `select_option` | `selector`, `value` | Select dropdown option | -| `checkbox` | `selector`, `checked` | Check/uncheck checkbox | - -### Electron Actions - -| Action | Parameters | Description | -| --------------- | -------------------------------------- | -------------------------- | -| `launch` | `target`, `args`, `wait_for_window` | Start Electron app | -| `window_action` | `window`, `action` | Interact with windows | -| `menu_click` | `path` | Click menu items | -| `dialog_action` | `type`, `action`, `filename` | Handle dialogs | -| `ipc_send` | `channel`, `data` | Send IPC message | -| `ipc_expect` | `channel`, `timeout` | Wait for IPC message | -| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state | -| All web actions | | Electron includes Chromium | - -### Common Parameters - -| Parameter | Type | Description | -| --------------------- | ---------- | ------------------------------------ | -| `timeout` | Duration | Maximum wait time (e.g., "5s", "2m") | -| `description` | String | Human-readable step explanation | -| `continue_on_failure` | Boolean | Don't fail scenario if step fails | -| `id` | String | Step identifier for conditionals | -| `condition` | Expression | Execute step only if condition true | - -## Quick Reference: YAML Template [LEVEL 1] - -```yaml -scenario: - # Required fields - name: "Test Name" - description: "What this test verifies" - type: cli | tui | web | electron - - # Optional metadata - tags: [smoke, critical] - timeout: 60s - - # What must be true before running - prerequisites: - - "Condition 1" - - "Condition 2" - - # Environment setup - environment: - variables: - VAR_NAME: "value" - - # Variables for templating - variables: - username: "testuser" - - # Test steps (executed in order) - steps: - - action: launch - target: "./app" - - - action: send_input - value: "command\n" - - - action: verify_output - contains: "Success" - timeout: 5s - - # Cleanup (always runs) - cleanup: - - action: stop_application -``` - -## Skill Usage Examples [LEVEL 1] - -### Invoke This Skill - -``` -Claude, use the outside-in-testing skill to create a CLI test for my calculator app. - -Claude, use outside-in-testing to generate web tests for the login flow. - -Claude, create TUI tests using the outside-in-testing skill for file navigation. -``` - -### What You'll Get - -1. **Complete YAML test scenario** matching your requirements -2. **Inline documentation** explaining each section -3. **Best practices applied** (timeouts, waits, verification) -4. **Appropriate complexity level** (1, 2, or 3 based on needs) -5. **Instructions for running** the test - -### Example Request and Response - -**User**: "Create a web test for user login with valid credentials" - -**Claude** (using this skill): - -```yaml -scenario: - name: "User Login - Valid Credentials" - description: "Verifies user can successfully log in with valid email and password" - type: web - tags: [smoke, auth, critical] - - prerequisites: - - "User test@example.com exists with password 'TestPass123!'" - - "Application is running on http://localhost:3000" - - steps: - # Navigate to login page - - action: navigate - url: "http://localhost:3000/login" - description: "Open login page" - - # Wait for page to fully load - - action: wait_for_element - selector: "form.login-form" - timeout: 5s - description: "Wait for login form to render" - - # Fill in email field - - action: type - selector: "input[name='email']" - value: "test@example.com" - description: "Enter email address" - - # Fill in password field - - action: type - selector: "input[name='password']" - value: "TestPass123!" - description: "Enter password" - - # Submit form - - action: click - selector: "button[type='submit']" - description: "Click login button" - - # Wait for redirect to dashboard - - action: wait_for_url - contains: "/dashboard" - timeout: 10s - description: "Should redirect to dashboard after successful login" - - # Verify user is logged in - - action: verify_element - selector: ".user-profile" - contains: "test@example.com" - description: "User email should appear in profile widget" - - # Capture evidence - - action: screenshot - save_as: "successful-login.png" - description: "Save screenshot of logged-in state" -# To run this test: -# 1. Save as test-login-valid.yaml -# 2. Run: gadugi-test run test-login-valid.yaml -# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/ -``` - -## Related Resources [LEVEL 1] - -### Official Documentation - -- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test -- **Framework Docs**: See repo README and docs/ folder -- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 - -## Level 4: Shadow Environment Integration [LEVEL 4] - -Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments. - -### Why Use Shadow Environments for Testing - -1. **Clean State**: Fresh container, no host pollution -2. **Local Changes**: Test uncommitted code exactly as-is -3. **Multi-Repo**: Coordinate changes across multiple repos -4. **CI Parity**: What shadow sees ≈ what CI will see - -### Shadow Testing Workflow - -For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests: - -#### Pattern 1: CLI Tests in Shadow (Amplifier) - -```python -# Create shadow with your local library changes -shadow.create(local_sources=["~/repos/my-lib:org/my-lib"]) - -# Run outside-in test scenarios inside shadow -shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml") - -# Extract evidence -shadow.extract(shadow_id, "/evidence", "./test-evidence") - -# Cleanup -shadow.destroy(shadow_id) -``` - -#### Pattern 2: CLI Tests in Shadow (Standalone) - -```bash -# Create shadow with local changes -amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test - -# Run your test scenarios -amplifier-shadow exec test "gadugi-test run test-scenario.yaml" - -# Extract results -amplifier-shadow extract test /evidence ./test-evidence - -# Cleanup -amplifier-shadow destroy test -``` - -#### Pattern 3: Multi-Repo Integration Test - -```yaml -# test-multi-repo.yaml -scenario: - name: "Multi-Repo Integration Test" - type: cli - - prerequisites: - - "Shadow environment with core-lib and cli-tool" - - steps: - - action: launch - target: "cli-tool" - - - action: send_input - value: "process --lib core-lib\n" - - - action: verify_output - contains: "Success: Using core-lib" -``` - -```bash -# Setup shadow with both repos -amplifier-shadow create \ - --local ~/repos/core-lib:org/core-lib \ - --local ~/repos/cli-tool:org/cli-tool \ - --name multi-test - -# Run test that exercises both -amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml" -``` - -#### Pattern 4: Web App Testing in Shadow - -```yaml -# test-web-app.yaml -scenario: - name: "Web App with Local Library" - type: web - - steps: - - action: navigate - url: "http://localhost:3000" - - - action: click - selector: "button.process" - - - action: verify_element - selector: ".result" - contains: "Processed with v2.0" # Your local version -``` - -```bash -# Shadow with library changes -amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test - -# Start web app inside shadow (uses your local lib) -amplifier-shadow exec web-test " - cd /workspace && - git clone https://github.com/org/web-app && - cd web-app && - npm install && # Pulls your local my-lib via git URL rewriting - npm start & -" - -# Wait for app to start, then run tests -amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml" -``` - -### Verification Best Practices - -When running tests in shadow, always verify your local sources are being used: - -```bash -# After shadow.create, check snapshot commits -shadow.status(shadow_id) -# Shows: snapshot_commits: {"org/my-lib": "abc1234..."} - -# When your test installs dependencies, verify commit matches -# Look in test output for: my-lib @ git+...@abc1234 -``` - -### Complete Example: Library Change Validation - -```yaml -# test-library-change.yaml - Outside-in test -scenario: - name: "Validate Library Breaking Change" - type: cli - description: "Test that dependent app still works with new library API" - - steps: - - action: launch - target: "/workspace/org/dependent-app/cli.py" - - - action: send_input - value: "process data.json\n" - - - action: verify_output - contains: "Processed successfully" - description: "New library API should still work" - - - action: verify_exit_code - expected: 0 -``` - -```bash -# Complete workflow -# 1. Create shadow with your breaking change -amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test - -# 2. Install dependent app (pulls your local lib) -amplifier-shadow exec breaking-test " - cd /workspace && - git clone https://github.com/org/dependent-app && - cd dependent-app && - pip install -e . && # This installs git+https://github.com/org/my-lib (your local version) - echo 'Ready to test' -" - -# 3. Run outside-in test -amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml" - -# If test passes, your breaking change is compatible! -# If test fails, you've caught the issue before pushing -``` - -### When to Use Shadow Integration - -Use shadow + outside-in tests when: - -- ✅ Testing library changes with dependent projects -- ✅ Validating multi-repo coordinated changes -- ✅ Need clean-state validation before pushing -- ✅ Want to catch integration issues early -- ✅ Testing that setup/install procedures work - -Don't use shadow for: - -- ❌ Simple unit tests (too much overhead) -- ❌ Tests of already-committed code (shadow adds no value) -- ❌ Performance testing (container overhead skews results) - -### Learn More - -For complete shadow environment documentation, including: - -- Shell scripts for DIY setup -- Docker Compose examples -- Multi-language support (Python, Node, Rust, Go) -- Troubleshooting and verification techniques - -**Load the shadow-testing skill**: - -``` -Claude, use the shadow-testing skill to set up a shadow environment -``` - -Or for Amplifier users, the shadow tool is built-in: - -```python -shadow.create(local_sources=["~/repos/lib:org/lib"]) -``` - ---- - -### Related Skills - -- **shadow-testing**: Complete shadow environment setup and usage -- **test-gap-analyzer**: Find untested code paths -- **philosophy-guardian**: Review test philosophy compliance -- **pr-review-assistant**: Include tests in PR reviews -- **module-spec-generator**: Generate specs with test scenarios - -### Further Reading - -- Outside-in vs inside-out testing approaches -- Behavior-driven development (BDD) principles -- AI-powered testing best practices -- Test automation patterns -- Shadow environment testing methodology - -## Changelog [LEVEL 3] - -### Version 1.1.0 (2026-01-29) - -- **NEW**: Level 4 - Shadow Environment Integration -- Added complete shadow testing workflow patterns -- Integration examples for Amplifier native and standalone CLI -- Multi-repo integration test patterns -- Web app testing in shadow environments -- Complete workflow example for library change validation -- References to shadow-testing skill for deep-dive documentation - -### Version 1.0.0 (2025-11-16) - -- Initial skill release -- Support for CLI, TUI, Web, and Electron applications -- 15 complete working examples -- Progressive disclosure levels (1, 2, 3) -- Embedded gadugi-agentic-test framework documentation (v0.1.0) -- Freshness check script for version monitoring -- Full integration with amplihack philosophy -- Comprehensive troubleshooting guide -- Action reference catalog - ---- - -**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows. - -Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen. +See `../qa-team/README.md` and `../qa-team/SKILL.md` for the primary documentation. diff --git a/amplifier-bundle/skills/outside-in-testing/examples b/amplifier-bundle/skills/outside-in-testing/examples new file mode 120000 index 000000000..68c765545 --- /dev/null +++ b/amplifier-bundle/skills/outside-in-testing/examples @@ -0,0 +1 @@ +../qa-team/examples \ No newline at end of file diff --git a/amplifier-bundle/skills/outside-in-testing/scripts b/amplifier-bundle/skills/outside-in-testing/scripts new file mode 120000 index 000000000..ff9bde766 --- /dev/null +++ b/amplifier-bundle/skills/outside-in-testing/scripts @@ -0,0 +1 @@ +../qa-team/scripts \ No newline at end of file diff --git a/amplifier-bundle/skills/outside-in-testing/tests b/amplifier-bundle/skills/outside-in-testing/tests new file mode 120000 index 000000000..371fb2568 --- /dev/null +++ b/amplifier-bundle/skills/outside-in-testing/tests @@ -0,0 +1 @@ +../qa-team/tests \ No newline at end of file diff --git a/amplifier-bundle/skills/qa-team/README.md b/amplifier-bundle/skills/qa-team/README.md new file mode 100644 index 000000000..51e32504a --- /dev/null +++ b/amplifier-bundle/skills/qa-team/README.md @@ -0,0 +1,794 @@ +# QA Team Skill + +## Overview + +QA Team is the renamed primary skill for outside-in validation. It helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation, and it now also covers side-by-side parity loops for legacy-vs-new or A-vs-B comparisons. + +**Key Benefits**: + +- Tests survive refactoring (implementation changes don't break tests) +- Readable by non-developers (declarative YAML format) +- Platform-agnostic (same structure for CLI, TUI, Web, Electron) +- AI-powered execution (agents handle complex interactions) +- Evidence-based validation (screenshots, logs, output captures) + +## What is Outside-In Testing? + +**Traditional Testing** (Inside-Out): + +```python +# Knows internal implementation +def test_user_service(): + service = UserService() + user = service.create_user("test@example.com") + assert user.id is not None + assert user.email == "test@example.com" + assert user.created_at <= datetime.now() # Internal state +``` + +**Outside-In Testing**: + +```yaml +# Only knows external behavior +scenario: + name: "User Registration" + type: web + steps: + - action: navigate + url: "/register" + - action: type + selector: "#email" + value: "test@example.com" + - action: click + selector: "button[type=submit]" + - action: verify_url + contains: "/welcome" +``` + +The outside-in test verifies the same functionality but: + +- Doesn't depend on internal classes (`UserService`) +- Doesn't check internal state (`created_at`, `id`) +- Tests from user's perspective (what they see and do) +- Remains valid even if implementation completely changes + +## When to Use This Skill + +### Perfect Scenarios + +1. **Smoke Testing** - Quickly verify critical paths work +2. **Acceptance Testing** - Validate features meet requirements +3. **Regression Testing** - Ensure changes don't break existing behavior +4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach +5. **Refactoring Safety** - Tests protect behavior during rewrites +6. **Documentation as Tests** - YAML doubles as executable specifications + +### Complementary to Unit Tests + +Outside-in tests work best alongside unit tests: + +- **Unit Tests** (60%): Internal logic, edge cases, error handling +- **Integration Tests** (30%): Component interactions, API contracts +- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths + +Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation. + +## Parity, Shadow, and A/B Comparison + +Use QA Team when you need more than a single implementation test: + +- compare legacy vs replacement behavior side by side +- run paired observable tmux sessions with `--observable` +- execute the same parity suite remotely with `--ssh-target azlin` +- log rollout divergences with `--shadow-mode --shadow-log ...` + +Example local parity command: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary +``` + +Example shadow-mode command: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary \ + --shadow-mode \ + --shadow-log /tmp/feature-shadow.jsonl +``` + +## Quick Start + +### 1. Install Framework + +**Option A: From GitHub (Recommended - Latest)** + +```bash +# Install globally +npm install -g github:rysweet/gadugi-agentic-test + +# Or use with npx +npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml + +# Or clone and build +git clone https://github.com/rysweet/gadugi-agentic-test +cd gadugi-agentic-test +npm install +npm run build +node dist/cli.js run scenarios/your-test.yaml +``` + +**Option B: From npm (when published)** + +```bash +npm install -g gadugi-agentic-test +gadugi-test run test.yaml +``` + +**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below. + +### 2. Create Your First Test + +Save as `test-hello.yaml`: + +```yaml +scenario: + name: "Hello World Test" + description: "Verify application prints greeting" + type: cli + + steps: + - action: launch + target: "./hello-world" + + - action: verify_output + contains: "Hello, World!" + + - action: verify_exit_code + expected: 0 +``` + +### 3. Run the Test + +**If installed globally**: + +```bash +gadugi-test run test-hello.yaml +``` + +**If using from source**: + +```bash +cd /path/to/gadugi-agentic-test +node dist/cli.js run /path/to/test-hello.yaml +``` + +**Run all tests in directory**: + +```bash +node dist/cli.js run -d ./my-test-scenarios +``` + +### 4. Review Results + +The framework generates evidence in `./evidence/`: + +- Execution logs +- Output captures +- Screenshots (for TUI/Web/Electron) +- Timing data +- HTML report + +## Supported Application Types + +### CLI (Command-Line Interface) + +Test command-line tools, scripts, and utilities: + +```yaml +scenario: + name: "Git Status Test" + type: cli + steps: + - action: launch + target: "git" + args: ["status"] + - action: verify_output + contains: "On branch" +``` + +**Common Use Cases**: + +- Package managers (npm, pip, cargo) +- Build tools (make, gradle, webpack) +- DevOps tools (docker, kubectl, terraform) +- Custom CLI applications + +### TUI (Terminal User Interface) + +Test interactive terminal applications: + +```yaml +scenario: + name: "TUI Navigation" + type: tui + steps: + - action: launch + target: "./file-manager" + - action: send_keypress + value: "down" + times: 3 + - action: verify_screen + contains: "> documents/" +``` + +**Common Use Cases**: + +- System monitors (htop, top) +- Text editors (vim, nano) +- File managers (ranger, midnight commander) +- Custom TUI dashboards + +### Web Applications + +Test browser-based applications: + +```yaml +scenario: + name: "Web Dashboard Test" + type: web + steps: + - action: navigate + url: "http://localhost:3000" + - action: verify_element + selector: "h1" + contains: "Dashboard" +``` + +**Common Use Cases**: + +- SPAs (React, Vue, Angular apps) +- Admin panels +- E-commerce sites +- SaaS applications + +### Electron Applications + +Test desktop apps built with Electron: + +```yaml +scenario: + name: "Desktop App Test" + type: electron + steps: + - action: launch + target: "./dist/my-app" + - action: verify_window + title: "My Application" +``` + +**Common Use Cases**: + +- Code editors (VS Code-like apps) +- Chat applications (Slack, Discord clones) +- Productivity tools +- Custom desktop applications + +## Progressive Learning Path + +The skill teaches testing in three levels: + +### Level 1: Fundamentals (Start Here) + +- Basic test structure (YAML anatomy) +- Single-action tests +- Simple verification +- Smoke tests + +**Examples**: + +- `examples/cli/calculator-basic.yaml` +- `examples/tui/file-manager-navigation.yaml` +- `examples/web/dashboard-smoke-test.yaml` +- `examples/electron/single-window-basic.yaml` + +### Level 2: Intermediate + +- Multi-step workflows +- Conditional logic +- Error handling +- Variables and templating + +**Examples**: + +- `examples/cli/cli-error-handling.yaml` +- `examples/tui/tui-form-validation.yaml` +- `examples/web/web-authentication-flow.yaml` +- `examples/electron/multi-window-coordination.yaml` + +### Level 3: Advanced + +- Custom comprehension agents +- Visual regression testing +- Performance validation +- IPC testing (Electron) + +**Examples**: + +- `examples/tui/tui-performance-monitoring.yaml` +- `examples/electron/electron-ipc-testing.yaml` +- `examples/custom-agents/custom-comprehension-agent.yaml` +- `examples/custom-agents/custom-reporter-integration.yaml` + +## Example Library + +This skill includes **15 complete working examples**: + +### CLI (3 examples) + +- Basic calculator operations [Level 1] +- Error handling and recovery [Level 2] +- Interactive session management [Level 2] + +### TUI (3 examples) + +- File manager navigation [Level 1] +- Form validation [Level 2] +- Performance monitoring [Level 3] + +### Web (3 examples) + +- Dashboard smoke test [Level 1] +- Authentication flow [Level 2] +- Visual regression [Level 2] + +### Electron (4 examples) + +- Single window basics [Level 1] +- Multi-window coordination [Level 2] +- Menu interactions [Level 2] +- IPC testing [Level 3] + +### Custom Agents (2 examples) + +- Domain-specific comprehension [Level 3] +- Custom reporting [Level 3] + +All examples include: + +- Complete working YAML +- Inline documentation +- Expected output +- Prerequisites +- Level indicators + +## Using This Skill in Claude + +### Invoke the Skill + +``` +Claude, use the qa-team skill to create a CLI test for my calculator app. + +Claude, use qa-team to generate web tests for user login. + +Claude, create Electron tests using qa-team for my desktop app. +``` + +### What You'll Receive + +1. **Complete YAML test scenario** matching your requirements +2. **Inline comments** explaining each section +3. **Best practices** applied (timeouts, waits, verification) +4. **Appropriate complexity** (Level 1, 2, or 3 based on needs) +5. **Instructions** for running the test + +### Example Interaction + +**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard" + +**Claude** (using this skill): Generates a complete Level 2 YAML scenario with: + +- Navigation to login page +- Form filling (email, password) +- Submit button click +- URL verification (redirected to dashboard) +- Element verification (user profile visible) +- Screenshot capture +- Proper timeouts and waits + +## Integration with Amplihack Philosophy + +This skill embodies amplihack's core principles: + +### Ruthless Simplicity + +- Declarative YAML over complex code +- Minimal boilerplate +- Focus on behavior, not implementation + +### Modular Design (Bricks & Studs) + +- Self-contained test scenarios +- Clear action contracts +- Composable test steps + +### Zero-BS Implementation + +- No stubs or placeholders +- Every example is runnable +- Clear, actionable error messages + +### Outside-In Thinking + +- User perspective first +- Implementation-agnostic tests +- Behavior-driven validation + +## Best Practices + +### 1. Start Simple + +Begin with basic smoke tests, then add complexity: + +```yaml +# Level 1: Smoke test +steps: + - action: launch + target: "./app" + - action: verify_output + contains: "Ready" + +# Level 2: Add interaction +steps: + - action: launch + target: "./app" + - action: send_input + value: "command\n" + - action: verify_output + contains: "Success" +``` + +### 2. Use Descriptive Names + +```yaml +# Good +scenario: + name: "User Login - Valid Credentials" + description: "Verifies successful login with email and password" + +# Bad +scenario: + name: "Test 1" +``` + +### 3. Verify Critical Paths Only + +Don't test every detail. Focus on user-facing behavior: + +```yaml +# Good - User-visible behavior +- action: verify_element + selector: ".welcome-message" + contains: "Welcome back" + +# Bad - Implementation detail +- action: verify_element + selector: ".cache-status" + contains: "initialized" +``` + +### 4. Always Wait for Dynamic Content + +```yaml +# Good - Wait before verification +- action: click + selector: ".load-data" +- action: wait_for_element + selector: ".data-table" + timeout: 10s +- action: verify_element + selector: ".data-table" + +# Bad - May fail due to timing +- action: click + selector: ".load-data" +- action: verify_element + selector: ".data-table" # Might not exist yet! +``` + +### 5. Clean Up After Tests + +```yaml +steps: + # Test steps... + +cleanup: + - action: delete_file + path: "./test-data.json" + - action: stop_application +``` + +## Troubleshooting + +### Installation Issues + +**Problem**: `@types/node-pty` not found error + +**Solution**: This was fixed in gadugi-agentic-test. If you see this: + +```bash +# Update to latest version +npm install -g github:rysweet/gadugi-agentic-test + +# Or if you cloned, pull latest: +git pull origin main +npm install +npm run build +``` + +**Problem**: `tsc: command not found` when building + +**Solution**: TypeScript not installed + +```bash +npm install # Installs all dependencies including TypeScript +npm run build # Now will work +``` + +### Test Times Out + +**Problem**: Test exceeds timeout and fails + +**Solution**: Increase timeout for slow operations + +```yaml +- action: wait_for_element + selector: ".slow-loading-data" + timeout: 30s # Generous timeout +``` + +### Scenario Format Issues + +**Problem**: "Scenario must have a name" error + +**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`: + +```yaml +# WRONG (won't load) +scenario: + name: "My Test" + steps: [...] + +# RIGHT +name: "My Test" +description: "What this tests" +version: "1.0.0" +config: + timeout: 120000 +steps: [...] +``` + +### Element Not Found + +**Problem**: Cannot find element to interact with + +**Solutions**: + +1. Use `wait_for_element` before interaction +2. Verify selector is correct +3. Check if element is in iframe + +```yaml +- action: wait_for_element + selector: ".target" + timeout: 10s +- action: click + selector: ".target" +``` + +### Flaky Tests in CI + +**Problem**: Tests pass locally but fail in CI + +**Solutions**: + +1. Add longer timeouts for CI environments +2. Set explicit viewport sizes +3. Wait for application readiness + +```yaml +scenario: + environment: + viewport: + width: 1920 + height: 1080 + + steps: + - action: wait_for_element + selector: ".app-ready" + timeout: 30s # Generous for CI +``` + +## Framework Version Check + +This skill embeds gadugi-agentic-test version **0.1.0**. + +To check for newer versions: + +```bash +python scripts/check-freshness.py +``` + +The script compares the embedded version against the latest GitHub release and notifies you of new features. + +## Related Skills + +- **test-gap-analyzer**: Find untested code paths (unit test focus) +- **philosophy-guardian**: Review test philosophy compliance +- **pr-review-assistant**: Include tests in PR reviews +- **module-spec-generator**: Generate specs with test scenarios + +## Resources + +### Documentation + +- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation +- **Examples**: `examples/` - 15 complete working examples +- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test + +### Getting Help + +- Review examples in `examples/` directory +- Check `SKILL.md` for detailed explanations +- See troubleshooting section in `SKILL.md` +- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues + +## Quick Reference + +### Basic Test Template + +```yaml +scenario: + name: "Test Name" + description: "What this verifies" + type: cli | tui | web | electron + + prerequisites: + - "Condition 1" + + steps: + - action: launch + target: "./app" + + - action: verify_output + contains: "Expected" + + cleanup: + - action: stop_application +``` + +### Common Actions + +**CLI**: + +- `launch` - Start application +- `send_input` - Send text +- `verify_output` - Check output +- `verify_exit_code` - Validate exit code + +**TUI**: + +- `send_keypress` - Send keys +- `verify_screen` - Check screen +- `capture_screenshot` - Save screenshot + +**Web**: + +- `navigate` - Go to URL +- `click` - Click element +- `type` - Type text +- `verify_element` - Check element + +**Electron**: + +- `window_action` - Control windows +- `menu_click` - Click menus +- `dialog_action` - Handle dialogs +- All web actions + +## Success Stories + +Outside-in testing shines when: + +1. **Refactoring**: Change implementation without updating tests +2. **Collaboration**: Non-developers can read and understand tests +3. **Documentation**: Tests serve as executable specifications +4. **Regression Prevention**: Catch breaking changes in critical flows +5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron + +Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen. + +--- + +**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation. + +## Real-World Example: Testing amplihack Guide Agent + +Based on actual testing of amplihack's guide agent, here's a complete working example: + +### Scenario: Naive Student Learning Flow + +```yaml +name: "Guide Agent - Beginner First Question" +description: "Test how guide responds to complete beginner" +version: "1.0.0" + +config: + timeout: 180000 # 3 minutes for AI response + retries: 1 + parallel: false + +agents: + - name: "student-cli" + type: "system" + config: + shell: "bash" + cwd: "/tmp/test-student" + timeout: 180000 + capture_output: true + +steps: + - name: "Student asks: What is amplihack?" + agent: "student-cli" + action: "execute_command" + params: + command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100' + expect: + exit_code: 0 + stdout_contains: + - "amplihack" + - "AI" + timeout: 180000 + + - name: "Verify guide gives immediate action" + agent: "student-cli" + action: "execute_command" + params: + command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md" + expect: + exit_code: 0 + timeout: 5000 + +metadata: + tags: ["guide-agent", "beginner", "real-world"] + priority: "high" +``` + +### What This Tests + +1. **Installation via uvx** - Tests users can run without installing +2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works +3. **Beginner-friendly response** - Checks for immediate actionable command +4. **Interactive elements** - Looks for TRY IT prompts + +### Running This Test + +```bash +cd gadugi-agentic-test +node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose +``` + +### What We Learned + +**From testing amplihack guide agent**: + +- Long-running AI commands need 180s+ timeouts +- Testing in clean `/tmp` directory avoids state pollution +- Combining `uvx --from git+...` with gadugi tests unreleased branches +- Checking file content (guide.md) verifies features beyond just output +- Real-world tests exposed gaps (guide showing bash commands in REPL context) diff --git a/amplifier-bundle/skills/qa-team/SKILL.md b/amplifier-bundle/skills/qa-team/SKILL.md new file mode 100644 index 000000000..f75aa884d --- /dev/null +++ b/amplifier-bundle/skills/qa-team/SKILL.md @@ -0,0 +1,2100 @@ +--- +name: qa-team +description: | + QA team for outside-in validation, side-by-side parity loops, and A/B behavioral comparison. + Use when you need behavior-driven tests, legacy-vs-new comparison, or rollout shadow validation. + Creates executable scenarios and parity workflows that agents can observe, compare, and iterate on. + Supports local, observable tmux, remote SSH, and shadow-mode divergence logging patterns. +version: 1.1.0 +embedded_framework_version: 0.1.0 +github_repo: https://github.com/rysweet/gadugi-agentic-test +issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 +--- + +# QA Team Skill + +## Purpose [LEVEL 1] + +This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate. + +**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details. + +## When to Use This Skill [LEVEL 1] + +### Perfect For + +- **Smoke Tests**: Quick validation that critical user flows work +- **Behavior-Driven Testing**: Verify features from user perspective +- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron +- **Refactoring Safety**: Tests remain valid when implementation changes +- **AI-Powered Testing**: Let agents handle complex interactions +- **Documentation as Tests**: YAML scenarios double as executable specs + +### Use This Skill When + +- Starting a new project and defining expected behaviors +- Refactoring code and need tests that won't break with internal changes +- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps) +- Writing acceptance criteria that can be automatically verified +- Need tests that non-developers can read and understand +- Want to catch regressions in critical user workflows +- Testing complex multi-step interactions + +### Don't Use This Skill When + +- Need unit tests for internal functions (use test-gap-analyzer instead) +- Testing performance or load characteristics +- Need precise timing or concurrency control +- Testing non-interactive batch processes +- Implementation details matter more than behavior + +## Core Concepts [LEVEL 1] + +### Outside-In Testing Philosophy + +**Traditional Inside-Out Testing**: + +```python +# Tightly coupled to implementation +def test_calculator_add(): + calc = Calculator() + result = calc.add(2, 3) + assert result == 5 + assert calc.history == [(2, 3, 5)] # Knows internal state +``` + +**Agentic Outside-In Testing**: + +```yaml +# Implementation-agnostic behavior verification +scenario: + name: "Calculator Addition" + steps: + - action: launch + target: "./calculator" + - action: send_input + value: "add 2 3" + - action: verify_output + contains: "Result: 5" +``` + +**Benefits**: + +- Tests survive refactoring (internal changes don't break tests) +- Readable by non-developers (YAML is declarative) +- Platform-agnostic (same structure for CLI/TUI/Web/Electron) +- AI agents handle complexity (navigation, timing, screenshots) + +### The Gadugi Agentic Test Framework [LEVEL 2] + +Gadugi-agentic-test is a Python framework that: + +1. **Parses YAML test scenarios** with declarative steps +2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents) +3. **Executes actions** (launch, input, click, wait, verify) +4. **Collects evidence** (screenshots, logs, output captures) +5. **Validates outcomes** against expected results +6. **Generates reports** with evidence trails + +**Architecture**: + +``` +YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine + ↓ + [CLI Agent, TUI Agent, Web Agent, Electron Agent] + ↓ + Observers → Comprehension Agent + ↓ + Evidence Report +``` + +### Progressive Disclosure Levels [LEVEL 1] + +This skill teaches testing in four levels: + +- **Level 1: Fundamentals** - Basic single-action tests, simple verification +- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling +- **Level 3: Advanced** - Custom agents, visual regression, performance validation +- **Level 4: Parity & Shadowing** - Side-by-side A/B comparison, remote observable runs, rollout divergence logging + +Each example is marked with its level. Start at Level 1 and progress as needed. + +## Side-by-Side Parity and A/B Validation [LEVEL 2] + +QA Team is the renamed primary skill for what used to be `outside-in-testing`. Use it for standard outside-in scenarios **and** for parity loops where you must compare a legacy implementation to a replacement, or compare approach A to approach B, as an external user would observe them. + +### Use QA Team for parity work when + +- migrating Python to Rust, old CLI to new CLI, or v1 to v2 behavior +- validating a rewrite before switching defaults +- comparing branch A vs branch B using the same user scenarios +- running observable side-by-side sessions in paired virtual TTYs +- logging rollout divergences in shadow mode without failing the run + +### Recommended parity loop + +1. Define shared user-facing scenarios first. +2. Run both implementations in isolated sandboxes. +3. Compare stdout, stderr, exit code, JSON outputs, and filesystem side effects. +4. Re-run in `--observable` mode when you need paired tmux panes for debugging. +5. Use `--ssh-target ` when parity must happen on a remote environment such as `azlin`. +6. Use `--shadow-mode --shadow-log ` during rollout to log divergences without blocking execution. + +### Command pattern to reuse + +If the repo already has a parity harness, extend it instead of inventing a second one. A good baseline is: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary \ + --observable +``` + +For remote parity: + +```bash +python tests/parity/validate_cli_parity.py \ + --ssh-target azlin \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /remote/path/to/legacy-repo \ + --rust-binary /remote/path/to/new-binary +``` + +For rollout shadow logging: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary \ + --shadow-mode \ + --shadow-log /tmp/feature-shadow.jsonl +``` + +## Quick Start [LEVEL 1] + +### Installation + +**Prerequisites (for native module compilation):** + +```bash +# macOS +xcode-select --install + +# Ubuntu/Debian +sudo apt-get install -y build-essential python3 + +# Windows: Install Visual Studio Build Tools with "Desktop development with C++" +``` + +**Install the framework:** + +```bash +# Install globally for CLI access +npm install -g @gadugi/agentic-test + +# Or install locally in your project +npm install @gadugi/agentic-test + +# Verify installation +gadugi-test --version +``` + +### Your First Test (CLI Example) + +Create `test-hello.yaml`: + +```yaml +scenario: + name: "Hello World CLI Test" + description: "Verify CLI prints greeting" + type: cli + + prerequisites: + - "./hello-world executable exists" + + steps: + - action: launch + target: "./hello-world" + + - action: verify_output + contains: "Hello, World!" + + - action: verify_exit_code + expected: 0 +``` + +Run the test: + +```bash +gadugi-test run test-hello.yaml +``` + +Output: + +``` +✓ Scenario: Hello World CLI Test + ✓ Step 1: Launched ./hello-world + ✓ Step 2: Output contains "Hello, World!" + ✓ Step 3: Exit code is 0 + +PASSED (3/3 steps successful) +Evidence saved to: ./evidence/test-hello-20250116-093045/ +``` + +### Understanding the YAML Structure [LEVEL 1] + +Every test scenario has this structure: + +```yaml +scenario: + name: "Descriptive test name" + description: "What this test verifies" + type: cli | tui | web | electron + + # Optional metadata + tags: [smoke, critical, auth] + timeout: 30s + + # What must be true before test runs + prerequisites: + - "Condition 1" + - "Condition 2" + + # The test steps (executed sequentially) + steps: + - action: action_name + parameter1: value1 + parameter2: value2 + + - action: verify_something + expected: value + + # Optional cleanup + cleanup: + - action: stop_application +``` + +## Application Types and Agents [LEVEL 2] + +### CLI Applications [LEVEL 1] + +**Use Case**: Command-line tools, scripts, build tools, package managers + +**Supported Actions**: + +- `launch` - Start the CLI program +- `send_input` - Send text or commands via stdin +- `send_signal` - Send OS signals (SIGINT, SIGTERM) +- `wait_for_output` - Wait for specific text in stdout/stderr +- `verify_output` - Check stdout/stderr contains/matches expected text +- `verify_exit_code` - Validate process exit code +- `capture_output` - Save output for later verification + +**Example** (see `examples/cli/calculator-basic.yaml`): + +```yaml +scenario: + name: "CLI Calculator Basic Operations" + type: cli + + steps: + - action: launch + target: "./calculator" + args: ["--mode", "interactive"] + + - action: send_input + value: "add 5 3\n" + + - action: verify_output + contains: "Result: 8" + timeout: 2s + + - action: send_input + value: "multiply 4 7\n" + + - action: verify_output + contains: "Result: 28" + + - action: send_input + value: "exit\n" + + - action: verify_exit_code + expected: 0 +``` + +### TUI Applications [LEVEL 1] + +**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs) + +**Supported Actions**: + +- `launch` - Start TUI application +- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.) +- `wait_for_screen` - Wait for specific text to appear on screen +- `verify_screen` - Check screen contents match expectations +- `capture_screenshot` - Save terminal screenshot (ANSI art) +- `navigate_menu` - Navigate menu structures +- `fill_form` - Fill TUI form fields + +**Example** (see `examples/tui/file-manager-navigation.yaml`): + +```yaml +scenario: + name: "TUI File Manager Navigation" + type: tui + + steps: + - action: launch + target: "./file-manager" + + - action: wait_for_screen + contains: "File Manager v1.0" + timeout: 3s + + - action: send_keypress + value: "down" + times: 3 + + - action: verify_screen + contains: "> documents/" + description: "Third item should be selected" + + - action: send_keypress + value: "enter" + + - action: wait_for_screen + contains: "documents/" + timeout: 2s + + - action: capture_screenshot + save_as: "documents-view.txt" +``` + +### Web Applications [LEVEL 1] + +**Use Case**: Web apps, dashboards, SPAs, admin panels + +**Supported Actions**: + +- `navigate` - Go to URL +- `click` - Click element by selector or text +- `type` - Type into input fields +- `wait_for_element` - Wait for element to appear +- `verify_element` - Check element exists/contains text +- `verify_url` - Validate current URL +- `screenshot` - Capture browser screenshot +- `scroll` - Scroll page or element + +**Example** (see `examples/web/dashboard-smoke-test.yaml`): + +```yaml +scenario: + name: "Dashboard Smoke Test" + type: web + + steps: + - action: navigate + url: "http://localhost:3000/dashboard" + + - action: wait_for_element + selector: "h1.dashboard-title" + timeout: 5s + + - action: verify_element + selector: "h1.dashboard-title" + contains: "Analytics Dashboard" + + - action: verify_element + selector: ".widget-stats" + count: 4 + description: "Should have 4 stat widgets" + + - action: click + selector: "button.refresh-data" + + - action: wait_for_element + selector: ".loading-spinner" + disappears: true + timeout: 10s + + - action: screenshot + save_as: "dashboard-loaded.png" +``` + +### Electron Applications [LEVEL 2] + +**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones) + +**Supported Actions**: + +- `launch` - Start Electron app +- `window_action` - Interact with windows (focus, minimize, close) +- `menu_click` - Click application menu items +- `dialog_action` - Handle native dialogs (open file, save, confirm) +- `ipc_send` - Send IPC message to main process +- `verify_window` - Check window state/properties +- All web actions (since Electron uses Chromium) + +**Example** (see `examples/electron/single-window-basic.yaml`): + +```yaml +scenario: + name: "Electron Single Window Test" + type: electron + + steps: + - action: launch + target: "./dist/my-app" + wait_for_window: true + timeout: 10s + + - action: verify_window + title: "My Application" + visible: true + + - action: menu_click + path: ["File", "New Document"] + + - action: wait_for_element + selector: ".document-editor" + + - action: type + selector: ".document-editor" + value: "Hello from test" + + - action: menu_click + path: ["File", "Save"] + + - action: dialog_action + type: save_file + filename: "test-document.txt" + + - action: verify_window + title_contains: "test-document.txt" +``` + +## Test Scenario Anatomy [LEVEL 2] + +### Metadata Section + +```yaml +scenario: + name: "Clear descriptive name" + description: "Detailed explanation of what this test verifies" + type: cli | tui | web | electron + + # Optional fields + tags: [smoke, regression, auth, payment] + priority: high | medium | low + timeout: 60s # Overall scenario timeout + retry_on_failure: 2 # Retry count + + # Environment requirements + environment: + variables: + API_URL: "http://localhost:8080" + DEBUG: "true" + files: + - "./config.json must exist" +``` + +### Prerequisites + +Prerequisites are conditions that must be true before the test runs. The framework validates these before execution. + +```yaml +prerequisites: + - "./application binary exists" + - "Port 8080 is available" + - "Database is running" + - "User account test@example.com exists" + - "File ./test-data.json exists" +``` + +If prerequisites fail, the test is skipped (not failed). + +### Steps + +Steps execute sequentially. Each step has: + +- **action**: Required - the action to perform +- **Parameters**: Action-specific parameters +- **description**: Optional - human-readable explanation +- **timeout**: Optional - step-specific timeout +- **continue_on_failure**: Optional - don't fail scenario if step fails + +```yaml +steps: + # Simple action + - action: launch + target: "./app" + + # Action with multiple parameters + - action: verify_output + contains: "Success" + timeout: 5s + description: "App should print success message" + + # Continue even if this fails + - action: click + selector: ".optional-button" + continue_on_failure: true +``` + +### Verification Actions [LEVEL 1] + +Verification actions check expected outcomes. They fail the test if expectations aren't met. + +**Common Verifications**: + +```yaml +# CLI: Check output contains text +- action: verify_output + contains: "Expected text" + +# CLI: Check output matches regex +- action: verify_output + matches: "Result: \\d+" + +# CLI: Check exit code +- action: verify_exit_code + expected: 0 + +# Web/TUI: Check element exists +- action: verify_element + selector: ".success-message" + +# Web/TUI: Check element contains text +- action: verify_element + selector: "h1" + contains: "Welcome" + +# Web: Check URL +- action: verify_url + equals: "http://localhost:3000/dashboard" + +# Web: Check element count +- action: verify_element + selector: ".list-item" + count: 5 + +# Electron: Check window state +- action: verify_window + title: "My App" + visible: true + focused: true +``` + +### Cleanup Section + +Cleanup runs after all steps complete (success or failure). Use for teardown actions. + +```yaml +cleanup: + - action: stop_application + force: true + + - action: delete_file + path: "./temp-test-data.json" + + - action: reset_database + connection: "test_db" +``` + +## Advanced Patterns [LEVEL 2] + +### Conditional Logic + +Execute steps based on conditions: + +```yaml +steps: + - action: launch + target: "./app" + + - action: verify_output + contains: "Login required" + id: login_check + + # Only run if login_check passed + - action: send_input + value: "login admin password123\n" + condition: login_check.passed +``` + +### Variables and Templating [LEVEL 2] + +Define variables and use them throughout the scenario: + +```yaml +scenario: + name: "Test with Variables" + type: cli + + variables: + username: "testuser" + api_url: "http://localhost:8080" + + steps: + - action: launch + target: "./app" + args: ["--api", "${api_url}"] + + - action: send_input + value: "login ${username}\n" + + - action: verify_output + contains: "Welcome, ${username}!" +``` + +### Loops and Repetition [LEVEL 2] + +Repeat actions multiple times: + +```yaml +steps: + - action: launch + target: "./app" + + # Repeat action N times + - action: send_keypress + value: "down" + times: 5 + + # Loop over list + - action: send_input + value: "${item}\n" + for_each: + - "apple" + - "banana" + - "cherry" +``` + +### Error Handling [LEVEL 2] + +Handle expected errors gracefully: + +```yaml +steps: + - action: send_input + value: "invalid command\n" + + # Verify error message appears + - action: verify_output + contains: "Error: Unknown command" + expected_failure: true + + # App should still be running + - action: verify_running + expected: true +``` + +### Multi-Step Workflows [LEVEL 2] + +Complex scenarios with multiple phases: + +```yaml +scenario: + name: "E-commerce Purchase Flow" + type: web + + steps: + # Phase 1: Authentication + - action: navigate + url: "http://localhost:3000/login" + + - action: type + selector: "#username" + value: "test@example.com" + + - action: type + selector: "#password" + value: "password123" + + - action: click + selector: "button[type=submit]" + + - action: wait_for_url + contains: "/dashboard" + + # Phase 2: Product Selection + - action: navigate + url: "http://localhost:3000/products" + + - action: click + text: "Add to Cart" + nth: 1 + + - action: verify_element + selector: ".cart-badge" + contains: "1" + + # Phase 3: Checkout + - action: click + selector: ".cart-icon" + + - action: click + text: "Proceed to Checkout" + + - action: fill_form + fields: + "#shipping-address": "123 Test St" + "#city": "Testville" + "#zip": "12345" + + - action: click + selector: "#place-order" + + - action: wait_for_element + selector: ".order-confirmation" + timeout: 10s + + - action: verify_element + selector: ".order-number" + exists: true +``` + +## Level 3: Advanced Topics [LEVEL 3] + +### Custom Comprehension Agents + +The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic. + +**Default Comprehension Agent**: + +- Observes raw output (text, HTML, screenshots) +- Applies general reasoning to verify expectations +- Returns pass/fail with explanation + +**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`): + +```yaml +scenario: + name: "Financial Dashboard Test with Custom Agent" + type: web + + # Define custom comprehension logic + comprehension_agent: + model: "gpt-4" + system_prompt: | + You are a financial data validator. When verifying dashboard content: + 1. All monetary values must use proper formatting ($1,234.56) + 2. Percentages must include % symbol + 3. Dates must be in MM/DD/YYYY format + 4. Negative values must be red + 5. Chart data must be logically consistent + + Be strict about formatting and data consistency. + + examples: + - input: "Total Revenue: 45000" + output: "FAIL - Missing currency symbol and comma separator" + - input: "Total Revenue: $45,000.00" + output: "PASS - Correctly formatted" + + steps: + - action: navigate + url: "http://localhost:3000/financial-dashboard" + + - action: verify_element + selector: ".revenue-widget" + use_custom_comprehension: true + description: "Revenue should be properly formatted" +``` + +### Visual Regression Testing [LEVEL 3] + +Compare screenshots against baseline images: + +```yaml +scenario: + name: "Visual Regression - Homepage" + type: web + + steps: + - action: navigate + url: "http://localhost:3000" + + - action: wait_for_element + selector: ".page-loaded" + + - action: screenshot + save_as: "homepage.png" + + - action: visual_compare + screenshot: "homepage.png" + baseline: "./baselines/homepage-baseline.png" + threshold: 0.05 # 5% difference allowed + highlight_differences: true +``` + +### Performance Validation [LEVEL 3] + +Measure and validate performance metrics: + +```yaml +scenario: + name: "Performance - Dashboard Load Time" + type: web + + performance: + metrics: + - page_load_time + - first_contentful_paint + - time_to_interactive + + steps: + - action: navigate + url: "http://localhost:3000/dashboard" + measure_timing: true + + - action: verify_performance + metric: page_load_time + less_than: 3000 # 3 seconds + + - action: verify_performance + metric: first_contentful_paint + less_than: 1500 # 1.5 seconds +``` + +### Multi-Window Coordination (Electron) [LEVEL 3] + +Test applications with multiple windows: + +```yaml +scenario: + name: "Multi-Window Chat Application" + type: electron + + steps: + - action: launch + target: "./chat-app" + + - action: menu_click + path: ["Window", "New Chat"] + + - action: verify_window + count: 2 + + - action: window_action + window: 1 + action: focus + + - action: type + selector: ".message-input" + value: "Hello from window 1" + + - action: click + selector: ".send-button" + + - action: window_action + window: 2 + action: focus + + - action: wait_for_element + selector: ".message" + contains: "Hello from window 1" + timeout: 5s +``` + +### IPC Testing (Electron) [LEVEL 3] + +Test Inter-Process Communication between renderer and main: + +```yaml +scenario: + name: "Electron IPC Communication" + type: electron + + steps: + - action: launch + target: "./my-app" + + - action: ipc_send + channel: "get-system-info" + + - action: ipc_expect + channel: "system-info-reply" + timeout: 3s + + - action: verify_ipc_payload + contains: + platform: "darwin" + arch: "x64" +``` + +### Custom Reporters [LEVEL 3] + +Generate custom test reports: + +```yaml +scenario: + name: "Test with Custom Reporting" + type: cli + + reporting: + format: custom + template: "./report-template.html" + include: + - screenshots + - logs + - timing_data + - video_recording + + email: + enabled: true + recipients: ["team@example.com"] + on_failure_only: true + + steps: + # ... test steps ... +``` + +## Framework Integration [LEVEL 2] + +### Running Tests + +**Single test**: + +```bash +gadugi-test run test-scenario.yaml +``` + +**Multiple tests**: + +```bash +gadugi-test run tests/*.yaml +``` + +**With options**: + +```bash +gadugi-test run test.yaml \ + --verbose \ + --evidence-dir ./test-evidence \ + --retry 2 \ + --timeout 60s +``` + +### CI/CD Integration + +**GitHub Actions** (`.github/workflows/agentic-tests.yml`): + +```yaml +name: Agentic Tests + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install gadugi-agentic-test + run: npm install -g @gadugi/agentic-test + + - name: Run tests + run: gadugi-test run tests/agentic/*.yaml + + - name: Upload evidence + if: always() + uses: actions/upload-artifact@v3 + with: + name: test-evidence + path: ./evidence/ +``` + +### Evidence Collection + +The framework automatically collects evidence for debugging: + +``` +evidence/ + scenario-name-20250116-093045/ + ├── scenario.yaml # Original test scenario + ├── execution-log.json # Detailed execution log + ├── screenshots/ # All captured screenshots + │ ├── step-1.png + │ ├── step-3.png + │ └── step-5.png + ├── output-captures/ # CLI/TUI output + │ ├── stdout.txt + │ └── stderr.txt + ├── timing.json # Performance metrics + └── report.html # Human-readable report +``` + +## Best Practices [LEVEL 2] + +### 1. Start Simple, Add Complexity + +Begin with basic smoke tests, then add detail: + +```yaml +# Level 1: Basic smoke test +steps: + - action: launch + target: "./app" + - action: verify_output + contains: "Ready" + +# Level 2: Add interaction +steps: + - action: launch + target: "./app" + - action: send_input + value: "command\n" + - action: verify_output + contains: "Success" + +# Level 3: Add error handling and edge cases +steps: + - action: launch + target: "./app" + - action: send_input + value: "invalid\n" + - action: verify_output + contains: "Error" + - action: send_input + value: "command\n" + - action: verify_output + contains: "Success" +``` + +### 2. Use Descriptive Names and Descriptions + +```yaml +# Bad +scenario: + name: "Test 1" + steps: + - action: click + selector: "button" + +# Good +scenario: + name: "User Login Flow - Valid Credentials" + description: "Verifies user can log in with valid email and password" + steps: + - action: click + selector: "button[type=submit]" + description: "Submit login form" +``` + +### 3. Verify Critical Paths Only + +Don't test every tiny detail. Focus on user-facing behavior: + +```yaml +# Bad - Tests implementation details +- action: verify_element + selector: ".internal-cache-status" + contains: "initialized" + +# Good - Tests user-visible behavior +- action: verify_element + selector: ".welcome-message" + contains: "Welcome back" +``` + +### 4. Use Prerequisites for Test Dependencies + +```yaml +scenario: + name: "User Profile Edit" + + prerequisites: + - "User testuser@example.com exists" + - "User is logged in" + - "Database is seeded with test data" + + steps: + # Test assumes prerequisites are met + - action: navigate + url: "/profile" +``` + +### 5. Keep Tests Independent + +Each test should set up its own state and clean up: + +```yaml +scenario: + name: "Create Document" + + steps: + # Create test user (don't assume exists) + - action: api_call + endpoint: "/api/users" + method: POST + data: { email: "test@example.com" } + + # Run test + - action: navigate + url: "/documents/new" + # ... test steps ... + + cleanup: + # Remove test user + - action: api_call + endpoint: "/api/users/test@example.com" + method: DELETE +``` + +### 6. Use Tags for Organization + +```yaml +scenario: + name: "Critical Payment Flow" + tags: [smoke, critical, payment, e2e] + # Run with: gadugi-test run --tags critical +``` + +### 7. Add Timeouts Strategically + +```yaml +steps: + # Quick operations - short timeout + - action: click + selector: "button" + timeout: 2s + + # Network operations - longer timeout + - action: wait_for_element + selector: ".data-loaded" + timeout: 10s + + # Complex operations - generous timeout + - action: verify_element + selector: ".report-generated" + timeout: 60s +``` + +## Testing Strategies [LEVEL 2] + +### Smoke Tests + +Minimal tests that verify critical functionality works: + +```yaml +scenario: + name: "Smoke Test - Application Starts" + tags: [smoke] + + steps: + - action: launch + target: "./app" + - action: verify_output + contains: "Ready" + timeout: 5s +``` + +Run before every commit: `gadugi-test run --tags smoke` + +### Happy Path Tests + +Test the ideal user journey: + +```yaml +scenario: + name: "Happy Path - User Registration" + + steps: + - action: navigate + url: "/register" + - action: type + selector: "#email" + value: "newuser@example.com" + - action: type + selector: "#password" + value: "SecurePass123!" + - action: click + selector: "button[type=submit]" + - action: wait_for_url + contains: "/welcome" +``` + +### Error Path Tests + +Verify error handling: + +```yaml +scenario: + name: "Error Path - Invalid Login" + + steps: + - action: navigate + url: "/login" + - action: type + selector: "#email" + value: "invalid@example.com" + - action: type + selector: "#password" + value: "wrongpassword" + - action: click + selector: "button[type=submit]" + - action: verify_element + selector: ".error-message" + contains: "Invalid credentials" +``` + +### Regression Tests + +Prevent bugs from reappearing: + +```yaml +scenario: + name: "Regression - Issue #123 Password Reset" + tags: [regression, bug-123] + description: "Verifies password reset email is sent (was broken in v1.2)" + + steps: + - action: navigate + url: "/forgot-password" + - action: type + selector: "#email" + value: "user@example.com" + - action: click + selector: "button[type=submit]" + - action: verify_element + selector: ".success-message" + contains: "Reset email sent" +``` + +## Philosophy Alignment [LEVEL 2] + +This skill follows amplihack's core principles: + +### Ruthless Simplicity + +- **YAML over code**: Declarative tests are simpler than programmatic tests +- **No implementation details**: Tests describe WHAT, not HOW +- **Minimal boilerplate**: Each test is focused and concise + +### Modular Design (Bricks & Studs) + +- **Self-contained scenarios**: Each YAML file is independent +- **Clear contracts**: Steps have well-defined inputs/outputs +- **Composable actions**: Reuse actions across different test types + +### Zero-BS Implementation + +- **No stubs**: Every example in this skill is a complete, runnable test +- **Working defaults**: Tests run with minimal configuration +- **Clear errors**: Framework provides actionable error messages + +### Outside-In Thinking + +- **User perspective**: Tests verify behavior users care about +- **Implementation agnostic**: Refactoring doesn't break tests +- **Behavior-driven**: Focus on outcomes, not internals + +## Common Pitfalls and Solutions [LEVEL 2] + +### Pitfall 1: Over-Specifying + +**Problem**: Test breaks when UI changes slightly + +```yaml +# Bad - Too specific +- action: verify_element + selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold" + contains: "Welcome" +``` + +**Solution**: Use flexible selectors + +```yaml +# Good - Focused on behavior +- action: verify_element + selector: ".welcome-message" + contains: "Welcome" +``` + +### Pitfall 2: Missing Waits + +**Problem**: Test fails intermittently due to timing + +```yaml +# Bad - No wait for async operation +- action: click + selector: ".load-data-button" +- action: verify_element + selector: ".data-table" # May not exist yet! +``` + +**Solution**: Always wait for dynamic content + +```yaml +# Good - Wait for element to appear +- action: click + selector: ".load-data-button" +- action: wait_for_element + selector: ".data-table" + timeout: 10s +- action: verify_element + selector: ".data-table" +``` + +### Pitfall 3: Testing Implementation Details + +**Problem**: Test coupled to internal state + +```yaml +# Bad - Tests internal cache state +- action: verify_output + contains: "Cache hit ratio: 85%" +``` + +**Solution**: Test user-visible behavior + +```yaml +# Good - Tests response time +- action: verify_response_time + less_than: 100ms + description: "Fast response indicates caching works" +``` + +### Pitfall 4: Flaky Assertions + +**Problem**: Assertions depend on exact timing or formatting + +```yaml +# Bad - Exact timestamp match will fail +- action: verify_output + contains: "Created at: 2025-11-16 09:30:45" +``` + +**Solution**: Use flexible patterns + +```yaml +# Good - Match pattern, not exact value +- action: verify_output + matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}" +``` + +### Pitfall 5: Not Cleaning Up + +**Problem**: Tests leave artifacts that affect future runs + +```yaml +# Bad - No cleanup +steps: + - action: create_file + path: "./test-data.json" + - action: launch + target: "./app" +``` + +**Solution**: Always use cleanup section + +```yaml +# Good - Cleanup ensures clean slate +steps: + - action: create_file + path: "./test-data.json" + - action: launch + target: "./app" + +cleanup: + - action: delete_file + path: "./test-data.json" +``` + +## Example Library [LEVEL 1] + +This skill includes 15 complete working examples organized by application type and complexity level: + +### CLI Examples + +1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations +2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery +3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI + +### TUI Examples + +4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation +5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation +6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing + +### Web Examples + +7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification +8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow +9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing + +### Electron Examples + +10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test +11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration +12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions +13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing + +### Custom Agent Examples + +14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic +15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting + +See `examples/` directory for full example code with inline documentation. + +## Framework Freshness Check [LEVEL 3] + +This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists: + +```bash +# Run the freshness check script +python scripts/check-freshness.py + +# Output if outdated: +# WARNING: Embedded framework version is 0.1.0 +# Latest GitHub version is 0.2.5 +# +# New features in 0.2.5: +# - Native Playwright support for web testing +# - Video recording for all test types +# - Parallel test execution +# +# Update with: npm update -g @gadugi/agentic-test +``` + +The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements. + +**When to Update This Skill**: + +- New framework version adds significant features +- Breaking changes in YAML schema +- New application types supported +- Agent capabilities expand + +## Integration with Other Skills [LEVEL 2] + +### Works Well With + +**test-gap-analyzer**: + +- Use test-gap-analyzer to find untested functions +- Write outside-in tests for critical user-facing paths +- Use unit tests (from test-gap-analyzer) for internal functions + +**philosophy-guardian**: + +- Ensure test YAML follows ruthless simplicity +- Verify tests focus on behavior, not implementation + +**pr-review-assistant**: + +- Include outside-in tests in PR reviews +- Verify tests cover changed functionality +- Check test readability and clarity + +**module-spec-generator**: + +- Generate module specs that include outside-in test scenarios +- Use specs as templates for test YAML + +### Example Combined Workflow + +```bash +# 1. Analyze coverage gaps +claude "Use test-gap-analyzer on ./src" + +# 2. Write outside-in tests for critical paths +claude "Use qa-team to create web tests for authentication" + +# 3. Verify philosophy compliance +claude "Use philosophy-guardian to review new test files" + +# 4. Include in PR +git add tests/agentic/ +git commit -m "Add outside-in tests for auth flow" +``` + +## Troubleshooting [LEVEL 2] + +### Test Times Out + +**Symptom**: Test exceeds timeout and fails + +**Causes**: + +- Application takes longer to start than expected +- Network requests are slow +- Element never appears (incorrect selector) + +**Solutions**: + +```yaml +# Increase timeout +- action: wait_for_element + selector: ".slow-loading-element" + timeout: 30s # Increase from default + +# Add intermediate verification +- action: launch + target: "./app" +- action: wait_for_output + contains: "Initializing..." + timeout: 5s +- action: wait_for_output + contains: "Ready" + timeout: 20s +``` + +### Element Not Found + +**Symptom**: `verify_element` or `click` fails with "element not found" + +**Causes**: + +- Incorrect CSS selector +- Element not yet rendered (timing issue) +- Element in iframe or shadow DOM + +**Solutions**: + +```yaml +# Add wait before interaction +- action: wait_for_element + selector: ".target-element" + timeout: 10s +- action: click + selector: ".target-element" + +# Use more specific selector +- action: click + selector: "button[data-testid='submit-button']" + +# Handle iframe +- action: switch_to_iframe + selector: "iframe#payment-frame" +- action: click + selector: ".pay-now-button" +``` + +### Test Passes Locally, Fails in CI + +**Symptom**: Test works on dev machine but fails in CI environment + +**Causes**: + +- Different screen size (web/Electron) +- Missing dependencies +- Timing differences (slower CI machines) +- Environment variable differences + +**Solutions**: + +```yaml +# Set explicit viewport size (web/Electron) +scenario: + environment: + viewport: + width: 1920 + height: 1080 + +# Add longer timeouts in CI +- action: wait_for_element + selector: ".element" + timeout: 30s # Generous for CI + +# Verify prerequisites +prerequisites: + - "Chrome browser installed" + - "Environment variable API_KEY is set" +``` + +### Output Doesn't Match Expected + +**Symptom**: `verify_output` fails even though output looks correct + +**Causes**: + +- Extra whitespace or newlines +- ANSI color codes in output +- Case sensitivity + +**Solutions**: + +```yaml +# Use flexible matching +- action: verify_output + matches: "Result:\\s+Success" # Allow flexible whitespace + +# Strip ANSI codes +- action: verify_output + contains: "Success" + strip_ansi: true + +# Case-insensitive match +- action: verify_output + contains: "success" + case_sensitive: false +``` + +## Reference: Action Catalog [LEVEL 3] + +### CLI Actions + +| Action | Parameters | Description | +| ------------------ | -------------------------------- | -------------------------------------- | +| `launch` | `target`, `args`, `cwd`, `env` | Start CLI application | +| `send_input` | `value`, `delay` | Send text to stdin | +| `send_signal` | `signal` | Send OS signal (SIGINT, SIGTERM, etc.) | +| `wait_for_output` | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr | +| `verify_output` | `contains`, `matches`, `stream` | Check output content | +| `verify_exit_code` | `expected` | Validate exit code | +| `capture_output` | `save_as`, `stream` | Save output to file | + +### TUI Actions + +| Action | Parameters | Description | +| -------------------- | --------------------------------- | ------------------------ | +| `launch` | `target`, `args`, `terminal_size` | Start TUI application | +| `send_keypress` | `value`, `times`, `modifiers` | Send keyboard input | +| `wait_for_screen` | `contains`, `timeout` | Wait for text on screen | +| `verify_screen` | `contains`, `matches`, `region` | Check screen content | +| `capture_screenshot` | `save_as` | Save terminal screenshot | +| `navigate_menu` | `path` | Navigate menu structure | +| `fill_form` | `fields` | Fill TUI form fields | + +### Web Actions + +| Action | Parameters | Description | +| ------------------ | ----------------------------------------- | ---------------------- | +| `navigate` | `url`, `wait_for_load` | Go to URL | +| `click` | `selector`, `text`, `nth` | Click element | +| `type` | `selector`, `value`, `delay` | Type into input | +| `wait_for_element` | `selector`, `timeout`, `disappears` | Wait for element | +| `verify_element` | `selector`, `contains`, `count`, `exists` | Check element state | +| `verify_url` | `equals`, `contains`, `matches` | Validate URL | +| `screenshot` | `save_as`, `selector`, `full_page` | Capture screenshot | +| `scroll` | `selector`, `direction`, `amount` | Scroll page/element | +| `select_option` | `selector`, `value` | Select dropdown option | +| `checkbox` | `selector`, `checked` | Check/uncheck checkbox | + +### Electron Actions + +| Action | Parameters | Description | +| --------------- | -------------------------------------- | -------------------------- | +| `launch` | `target`, `args`, `wait_for_window` | Start Electron app | +| `window_action` | `window`, `action` | Interact with windows | +| `menu_click` | `path` | Click menu items | +| `dialog_action` | `type`, `action`, `filename` | Handle dialogs | +| `ipc_send` | `channel`, `data` | Send IPC message | +| `ipc_expect` | `channel`, `timeout` | Wait for IPC message | +| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state | +| All web actions | | Electron includes Chromium | + +### Common Parameters + +| Parameter | Type | Description | +| --------------------- | ---------- | ------------------------------------ | +| `timeout` | Duration | Maximum wait time (e.g., "5s", "2m") | +| `description` | String | Human-readable step explanation | +| `continue_on_failure` | Boolean | Don't fail scenario if step fails | +| `id` | String | Step identifier for conditionals | +| `condition` | Expression | Execute step only if condition true | + +## Quick Reference: YAML Template [LEVEL 1] + +```yaml +scenario: + # Required fields + name: "Test Name" + description: "What this test verifies" + type: cli | tui | web | electron + + # Optional metadata + tags: [smoke, critical] + timeout: 60s + + # What must be true before running + prerequisites: + - "Condition 1" + - "Condition 2" + + # Environment setup + environment: + variables: + VAR_NAME: "value" + + # Variables for templating + variables: + username: "testuser" + + # Test steps (executed in order) + steps: + - action: launch + target: "./app" + + - action: send_input + value: "command\n" + + - action: verify_output + contains: "Success" + timeout: 5s + + # Cleanup (always runs) + cleanup: + - action: stop_application +``` + +## Skill Usage Examples [LEVEL 1] + +### Invoke This Skill + +``` +Claude, use the qa-team skill to create a CLI test for my calculator app. + +Claude, use qa-team to generate web tests for the login flow. + +Claude, create TUI tests using the qa-team skill for file navigation. +``` + +### What You'll Get + +1. **Complete YAML test scenario** matching your requirements +2. **Inline documentation** explaining each section +3. **Best practices applied** (timeouts, waits, verification) +4. **Appropriate complexity level** (1, 2, or 3 based on needs) +5. **Instructions for running** the test + +### Example Request and Response + +**User**: "Create a web test for user login with valid credentials" + +**Claude** (using this skill): + +```yaml +scenario: + name: "User Login - Valid Credentials" + description: "Verifies user can successfully log in with valid email and password" + type: web + tags: [smoke, auth, critical] + + prerequisites: + - "User test@example.com exists with password 'TestPass123!'" + - "Application is running on http://localhost:3000" + + steps: + # Navigate to login page + - action: navigate + url: "http://localhost:3000/login" + description: "Open login page" + + # Wait for page to fully load + - action: wait_for_element + selector: "form.login-form" + timeout: 5s + description: "Wait for login form to render" + + # Fill in email field + - action: type + selector: "input[name='email']" + value: "test@example.com" + description: "Enter email address" + + # Fill in password field + - action: type + selector: "input[name='password']" + value: "TestPass123!" + description: "Enter password" + + # Submit form + - action: click + selector: "button[type='submit']" + description: "Click login button" + + # Wait for redirect to dashboard + - action: wait_for_url + contains: "/dashboard" + timeout: 10s + description: "Should redirect to dashboard after successful login" + + # Verify user is logged in + - action: verify_element + selector: ".user-profile" + contains: "test@example.com" + description: "User email should appear in profile widget" + + # Capture evidence + - action: screenshot + save_as: "successful-login.png" + description: "Save screenshot of logged-in state" +# To run this test: +# 1. Save as test-login-valid.yaml +# 2. Run: gadugi-test run test-login-valid.yaml +# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/ +``` + +## Related Resources [LEVEL 1] + +### Official Documentation + +- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test +- **Framework Docs**: See repo README and docs/ folder +- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 + +## Level 4: Shadow Environment Integration [LEVEL 4] + +Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments. + +### Why Use Shadow Environments for Testing + +1. **Clean State**: Fresh container, no host pollution +2. **Local Changes**: Test uncommitted code exactly as-is +3. **Multi-Repo**: Coordinate changes across multiple repos +4. **CI Parity**: What shadow sees ≈ what CI will see + +### Shadow Testing Workflow + +For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests: + +#### Pattern 1: CLI Tests in Shadow (Amplifier) + +```python +# Create shadow with your local library changes +shadow.create(local_sources=["~/repos/my-lib:org/my-lib"]) + +# Run outside-in test scenarios inside shadow +shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml") + +# Extract evidence +shadow.extract(shadow_id, "/evidence", "./test-evidence") + +# Cleanup +shadow.destroy(shadow_id) +``` + +#### Pattern 2: CLI Tests in Shadow (Standalone) + +```bash +# Create shadow with local changes +amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test + +# Run your test scenarios +amplifier-shadow exec test "gadugi-test run test-scenario.yaml" + +# Extract results +amplifier-shadow extract test /evidence ./test-evidence + +# Cleanup +amplifier-shadow destroy test +``` + +#### Pattern 3: Multi-Repo Integration Test + +```yaml +# test-multi-repo.yaml +scenario: + name: "Multi-Repo Integration Test" + type: cli + + prerequisites: + - "Shadow environment with core-lib and cli-tool" + + steps: + - action: launch + target: "cli-tool" + + - action: send_input + value: "process --lib core-lib\n" + + - action: verify_output + contains: "Success: Using core-lib" +``` + +```bash +# Setup shadow with both repos +amplifier-shadow create \ + --local ~/repos/core-lib:org/core-lib \ + --local ~/repos/cli-tool:org/cli-tool \ + --name multi-test + +# Run test that exercises both +amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml" +``` + +#### Pattern 4: Web App Testing in Shadow + +```yaml +# test-web-app.yaml +scenario: + name: "Web App with Local Library" + type: web + + steps: + - action: navigate + url: "http://localhost:3000" + + - action: click + selector: "button.process" + + - action: verify_element + selector: ".result" + contains: "Processed with v2.0" # Your local version +``` + +```bash +# Shadow with library changes +amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test + +# Start web app inside shadow (uses your local lib) +amplifier-shadow exec web-test " + cd /workspace && + git clone https://github.com/org/web-app && + cd web-app && + npm install && # Pulls your local my-lib via git URL rewriting + npm start & +" + +# Wait for app to start, then run tests +amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml" +``` + +### Verification Best Practices + +When running tests in shadow, always verify your local sources are being used: + +```bash +# After shadow.create, check snapshot commits +shadow.status(shadow_id) +# Shows: snapshot_commits: {"org/my-lib": "abc1234..."} + +# When your test installs dependencies, verify commit matches +# Look in test output for: my-lib @ git+...@abc1234 +``` + +### Complete Example: Library Change Validation + +```yaml +# test-library-change.yaml - Outside-in test +scenario: + name: "Validate Library Breaking Change" + type: cli + description: "Test that dependent app still works with new library API" + + steps: + - action: launch + target: "/workspace/org/dependent-app/cli.py" + + - action: send_input + value: "process data.json\n" + + - action: verify_output + contains: "Processed successfully" + description: "New library API should still work" + + - action: verify_exit_code + expected: 0 +``` + +```bash +# Complete workflow +# 1. Create shadow with your breaking change +amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test + +# 2. Install dependent app (pulls your local lib) +amplifier-shadow exec breaking-test " + cd /workspace && + git clone https://github.com/org/dependent-app && + cd dependent-app && + pip install -e . && # This installs git+https://github.com/org/my-lib (your local version) + echo 'Ready to test' +" + +# 3. Run outside-in test +amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml" + +# If test passes, your breaking change is compatible! +# If test fails, you've caught the issue before pushing +``` + +### When to Use Shadow Integration + +Use shadow + outside-in tests when: + +- ✅ Testing library changes with dependent projects +- ✅ Validating multi-repo coordinated changes +- ✅ Need clean-state validation before pushing +- ✅ Want to catch integration issues early +- ✅ Testing that setup/install procedures work + +Don't use shadow for: + +- ❌ Simple unit tests (too much overhead) +- ❌ Tests of already-committed code (shadow adds no value) +- ❌ Performance testing (container overhead skews results) + +### Learn More + +For complete shadow environment documentation, including: + +- Shell scripts for DIY setup +- Docker Compose examples +- Multi-language support (Python, Node, Rust, Go) +- Troubleshooting and verification techniques + +**Load the shadow-testing skill**: + +``` +Claude, use the shadow-testing skill to set up a shadow environment +``` + +Or for Amplifier users, the shadow tool is built-in: + +```python +shadow.create(local_sources=["~/repos/lib:org/lib"]) +``` + +--- + +### Related Skills + +- **shadow-testing**: Complete shadow environment setup and usage +- **test-gap-analyzer**: Find untested code paths +- **philosophy-guardian**: Review test philosophy compliance +- **pr-review-assistant**: Include tests in PR reviews +- **module-spec-generator**: Generate specs with test scenarios + +### Further Reading + +- Outside-in vs inside-out testing approaches +- Behavior-driven development (BDD) principles +- AI-powered testing best practices +- Test automation patterns +- Shadow environment testing methodology + +## Changelog [LEVEL 3] + +### Version 1.1.0 (2026-01-29) + +- **NEW**: Level 4 - Shadow Environment Integration +- Added complete shadow testing workflow patterns +- Integration examples for Amplifier native and standalone CLI +- Multi-repo integration test patterns +- Web app testing in shadow environments +- Complete workflow example for library change validation +- References to shadow-testing skill for deep-dive documentation + +### Version 1.0.0 (2025-11-16) + +- Initial skill release +- Support for CLI, TUI, Web, and Electron applications +- 15 complete working examples +- Progressive disclosure levels (1, 2, 3) +- Embedded gadugi-agentic-test framework documentation (v0.1.0) +- Freshness check script for version monitoring +- Full integration with amplihack philosophy +- Comprehensive troubleshooting guide +- Action reference catalog + +--- + +**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows. + +Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen. diff --git a/amplifier-bundle/skills/outside-in-testing/examples/cli/calculator-basic.yaml b/amplifier-bundle/skills/qa-team/examples/cli/calculator-basic.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/cli/calculator-basic.yaml rename to amplifier-bundle/skills/qa-team/examples/cli/calculator-basic.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/cli/cli-error-handling.yaml b/amplifier-bundle/skills/qa-team/examples/cli/cli-error-handling.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/cli/cli-error-handling.yaml rename to amplifier-bundle/skills/qa-team/examples/cli/cli-error-handling.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml b/amplifier-bundle/skills/qa-team/examples/cli/cli-interactive-session.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml rename to amplifier-bundle/skills/qa-team/examples/cli/cli-interactive-session.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml b/amplifier-bundle/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml rename to amplifier-bundle/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml b/amplifier-bundle/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml rename to amplifier-bundle/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml b/amplifier-bundle/skills/qa-team/examples/electron/electron-ipc-testing.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml rename to amplifier-bundle/skills/qa-team/examples/electron/electron-ipc-testing.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml b/amplifier-bundle/skills/qa-team/examples/electron/electron-menu-testing.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml rename to amplifier-bundle/skills/qa-team/examples/electron/electron-menu-testing.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml b/amplifier-bundle/skills/qa-team/examples/electron/multi-window-coordination.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml rename to amplifier-bundle/skills/qa-team/examples/electron/multi-window-coordination.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/electron/single-window-basic.yaml b/amplifier-bundle/skills/qa-team/examples/electron/single-window-basic.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/electron/single-window-basic.yaml rename to amplifier-bundle/skills/qa-team/examples/electron/single-window-basic.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml b/amplifier-bundle/skills/qa-team/examples/tui/file-manager-navigation.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml rename to amplifier-bundle/skills/qa-team/examples/tui/file-manager-navigation.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/tui/tui-form-validation.yaml b/amplifier-bundle/skills/qa-team/examples/tui/tui-form-validation.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/tui/tui-form-validation.yaml rename to amplifier-bundle/skills/qa-team/examples/tui/tui-form-validation.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml b/amplifier-bundle/skills/qa-team/examples/tui/tui-performance-monitoring.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml rename to amplifier-bundle/skills/qa-team/examples/tui/tui-performance-monitoring.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml b/amplifier-bundle/skills/qa-team/examples/web/dashboard-smoke-test.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml rename to amplifier-bundle/skills/qa-team/examples/web/dashboard-smoke-test.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/web/web-authentication-flow.yaml b/amplifier-bundle/skills/qa-team/examples/web/web-authentication-flow.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/web/web-authentication-flow.yaml rename to amplifier-bundle/skills/qa-team/examples/web/web-authentication-flow.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/examples/web/web-visual-regression.yaml b/amplifier-bundle/skills/qa-team/examples/web/web-visual-regression.yaml similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/examples/web/web-visual-regression.yaml rename to amplifier-bundle/skills/qa-team/examples/web/web-visual-regression.yaml diff --git a/amplifier-bundle/skills/outside-in-testing/scripts/check-freshness.py b/amplifier-bundle/skills/qa-team/scripts/check-freshness.py similarity index 100% rename from amplifier-bundle/skills/outside-in-testing/scripts/check-freshness.py rename to amplifier-bundle/skills/qa-team/scripts/check-freshness.py diff --git a/amplifier-bundle/skills/outside-in-testing/tests/test_skill_examples.py b/amplifier-bundle/skills/qa-team/tests/test_skill_examples.py similarity index 98% rename from amplifier-bundle/skills/outside-in-testing/tests/test_skill_examples.py rename to amplifier-bundle/skills/qa-team/tests/test_skill_examples.py index 5b57472e6..7ffeb669b 100644 --- a/amplifier-bundle/skills/outside-in-testing/tests/test_skill_examples.py +++ b/amplifier-bundle/skills/qa-team/tests/test_skill_examples.py @@ -1,5 +1,5 @@ """ -Tests for outside-in-testing skill example YAML files. +Tests for qa-team skill example YAML files. Validates that all example YAML files are: - Valid YAML syntax @@ -276,6 +276,7 @@ def test_skill_has_yaml_frontmatter(self): # Check required frontmatter fields assert "name" in metadata, "Frontmatter missing 'name'" + assert metadata["name"] == "qa-team", "Frontmatter name should be 'qa-team'" assert "description" in metadata, "Frontmatter missing 'description'" assert "version" in metadata, "Frontmatter missing 'version'" assert "embedded_framework_version" in metadata, ( diff --git a/docs/claude/skills/outside-in-testing/README.md b/docs/claude/skills/outside-in-testing/README.md deleted file mode 100644 index 9eb17ee84..000000000 --- a/docs/claude/skills/outside-in-testing/README.md +++ /dev/null @@ -1,765 +0,0 @@ -# Outside-In Testing Skill - -## Overview - -The Outside-In Testing Skill helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate. - -**Key Benefits**: - -- Tests survive refactoring (implementation changes don't break tests) -- Readable by non-developers (declarative YAML format) -- Platform-agnostic (same structure for CLI, TUI, Web, Electron) -- AI-powered execution (agents handle complex interactions) -- Evidence-based validation (screenshots, logs, output captures) - -## What is Outside-In Testing? - -**Traditional Testing** (Inside-Out): - -```python -# Knows internal implementation -def test_user_service(): - service = UserService() - user = service.create_user("test@example.com") - assert user.id is not None - assert user.email == "test@example.com" - assert user.created_at <= datetime.now() # Internal state -``` - -**Outside-In Testing**: - -```yaml -# Only knows external behavior -scenario: - name: "User Registration" - type: web - steps: - - action: navigate - url: "/register" - - action: type - selector: "#email" - value: "test@example.com" - - action: click - selector: "button[type=submit]" - - action: verify_url - contains: "/welcome" -``` - -The outside-in test verifies the same functionality but: - -- Doesn't depend on internal classes (`UserService`) -- Doesn't check internal state (`created_at`, `id`) -- Tests from user's perspective (what they see and do) -- Remains valid even if implementation completely changes - -## When to Use This Skill - -### Perfect Scenarios - -1. **Smoke Testing** - Quickly verify critical paths work -2. **Acceptance Testing** - Validate features meet requirements -3. **Regression Testing** - Ensure changes don't break existing behavior -4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach -5. **Refactoring Safety** - Tests protect behavior during rewrites -6. **Documentation as Tests** - YAML doubles as executable specifications - -### Complementary to Unit Tests - -Outside-in tests work best alongside unit tests: - -- **Unit Tests** (60%): Internal logic, edge cases, error handling -- **Integration Tests** (30%): Component interactions, API contracts -- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths - -Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation. - -## Quick Start - -### 1. Install Framework - -**Option A: From GitHub (Recommended - Latest)** - -```bash -# Install globally -npm install -g github:rysweet/gadugi-agentic-test - -# Or use with npx -npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml - -# Or clone and build -git clone https://github.com/rysweet/gadugi-agentic-test -cd gadugi-agentic-test -npm install -npm run build -node dist/cli.js run scenarios/your-test.yaml -``` - -**Option B: From npm (when published)** - -```bash -npm install -g gadugi-agentic-test -gadugi-test run test.yaml -``` - -**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below. - -### 2. Create Your First Test - -Save as `test-hello.yaml`: - -```yaml -scenario: - name: "Hello World Test" - description: "Verify application prints greeting" - type: cli - - steps: - - action: launch - target: "./hello-world" - - - action: verify_output - contains: "Hello, World!" - - - action: verify_exit_code - expected: 0 -``` - -### 3. Run the Test - -**If installed globally**: - -```bash -gadugi-test run test-hello.yaml -``` - -**If using from source**: - -```bash -cd /path/to/gadugi-agentic-test -node dist/cli.js run /path/to/test-hello.yaml -``` - -**Run all tests in directory**: - -```bash -node dist/cli.js run -d ./my-test-scenarios -``` - -### 4. Review Results - -The framework generates evidence in `./evidence/`: - -- Execution logs -- Output captures -- Screenshots (for TUI/Web/Electron) -- Timing data -- HTML report - -## Supported Application Types - -### CLI (Command-Line Interface) - -Test command-line tools, scripts, and utilities: - -```yaml -scenario: - name: "Git Status Test" - type: cli - steps: - - action: launch - target: "git" - args: ["status"] - - action: verify_output - contains: "On branch" -``` - -**Common Use Cases**: - -- Package managers (npm, pip, cargo) -- Build tools (make, gradle, webpack) -- DevOps tools (docker, kubectl, terraform) -- Custom CLI applications - -### TUI (Terminal User Interface) - -Test interactive terminal applications: - -```yaml -scenario: - name: "TUI Navigation" - type: tui - steps: - - action: launch - target: "./file-manager" - - action: send_keypress - value: "down" - times: 3 - - action: verify_screen - contains: "> documents/" -``` - -**Common Use Cases**: - -- System monitors (htop, top) -- Text editors (vim, nano) -- File managers (ranger, midnight commander) -- Custom TUI dashboards - -### Web Applications - -Test browser-based applications: - -```yaml -scenario: - name: "Web Dashboard Test" - type: web - steps: - - action: navigate - url: "http://localhost:3000" - - action: verify_element - selector: "h1" - contains: "Dashboard" -``` - -**Common Use Cases**: - -- SPAs (React, Vue, Angular apps) -- Admin panels -- E-commerce sites -- SaaS applications - -### Electron Applications - -Test desktop apps built with Electron: - -```yaml -scenario: - name: "Desktop App Test" - type: electron - steps: - - action: launch - target: "./dist/my-app" - - action: verify_window - title: "My Application" -``` - -**Common Use Cases**: - -- Code editors (VS Code-like apps) -- Chat applications (Slack, Discord clones) -- Productivity tools -- Custom desktop applications - -## Progressive Learning Path - -The skill teaches testing in three levels: - -### Level 1: Fundamentals (Start Here) - -- Basic test structure (YAML anatomy) -- Single-action tests -- Simple verification -- Smoke tests - -**Examples**: - -- `examples/cli/calculator-basic.yaml` -- `examples/tui/file-manager-navigation.yaml` -- `examples/web/dashboard-smoke-test.yaml` -- `examples/electron/single-window-basic.yaml` - -### Level 2: Intermediate - -- Multi-step workflows -- Conditional logic -- Error handling -- Variables and templating - -**Examples**: - -- `examples/cli/cli-error-handling.yaml` -- `examples/tui/tui-form-validation.yaml` -- `examples/web/web-authentication-flow.yaml` -- `examples/electron/multi-window-coordination.yaml` - -### Level 3: Advanced - -- Custom comprehension agents -- Visual regression testing -- Performance validation -- IPC testing (Electron) - -**Examples**: - -- `examples/tui/tui-performance-monitoring.yaml` -- `examples/electron/electron-ipc-testing.yaml` -- `examples/custom-agents/custom-comprehension-agent.yaml` -- `examples/custom-agents/custom-reporter-integration.yaml` - -## Example Library - -This skill includes **15 complete working examples**: - -### CLI (3 examples) - -- Basic calculator operations [Level 1] -- Error handling and recovery [Level 2] -- Interactive session management [Level 2] - -### TUI (3 examples) - -- File manager navigation [Level 1] -- Form validation [Level 2] -- Performance monitoring [Level 3] - -### Web (3 examples) - -- Dashboard smoke test [Level 1] -- Authentication flow [Level 2] -- Visual regression [Level 2] - -### Electron (4 examples) - -- Single window basics [Level 1] -- Multi-window coordination [Level 2] -- Menu interactions [Level 2] -- IPC testing [Level 3] - -### Custom Agents (2 examples) - -- Domain-specific comprehension [Level 3] -- Custom reporting [Level 3] - -All examples include: - -- Complete working YAML -- Inline documentation -- Expected output -- Prerequisites -- Level indicators - -## Using This Skill in Claude - -### Invoke the Skill - -``` -Claude, use the outside-in-testing skill to create a CLI test for my calculator app. - -Claude, use outside-in-testing to generate web tests for user login. - -Claude, create Electron tests using outside-in-testing for my desktop app. -``` - -### What You'll Receive - -1. **Complete YAML test scenario** matching your requirements -2. **Inline comments** explaining each section -3. **Best practices** applied (timeouts, waits, verification) -4. **Appropriate complexity** (Level 1, 2, or 3 based on needs) -5. **Instructions** for running the test - -### Example Interaction - -**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard" - -**Claude** (using this skill): Generates a complete Level 2 YAML scenario with: - -- Navigation to login page -- Form filling (email, password) -- Submit button click -- URL verification (redirected to dashboard) -- Element verification (user profile visible) -- Screenshot capture -- Proper timeouts and waits - -## Integration with Amplihack Philosophy - -This skill embodies amplihack's core principles: - -### Ruthless Simplicity - -- Declarative YAML over complex code -- Minimal boilerplate -- Focus on behavior, not implementation - -### Modular Design (Bricks & Studs) - -- Self-contained test scenarios -- Clear action contracts -- Composable test steps - -### Zero-BS Implementation - -- No stubs or placeholders -- Every example is runnable -- Clear, actionable error messages - -### Outside-In Thinking - -- User perspective first -- Implementation-agnostic tests -- Behavior-driven validation - -## Best Practices - -### 1. Start Simple - -Begin with basic smoke tests, then add complexity: - -```yaml -# Level 1: Smoke test -steps: - - action: launch - target: "./app" - - action: verify_output - contains: "Ready" - -# Level 2: Add interaction -steps: - - action: launch - target: "./app" - - action: send_input - value: "command\n" - - action: verify_output - contains: "Success" -``` - -### 2. Use Descriptive Names - -```yaml -# Good -scenario: - name: "User Login - Valid Credentials" - description: "Verifies successful login with email and password" - -# Bad -scenario: - name: "Test 1" -``` - -### 3. Verify Critical Paths Only - -Don't test every detail. Focus on user-facing behavior: - -```yaml -# Good - User-visible behavior -- action: verify_element - selector: ".welcome-message" - contains: "Welcome back" - -# Bad - Implementation detail -- action: verify_element - selector: ".cache-status" - contains: "initialized" -``` - -### 4. Always Wait for Dynamic Content - -```yaml -# Good - Wait before verification -- action: click - selector: ".load-data" -- action: wait_for_element - selector: ".data-table" - timeout: 10s -- action: verify_element - selector: ".data-table" - -# Bad - May fail due to timing -- action: click - selector: ".load-data" -- action: verify_element - selector: ".data-table" # Might not exist yet! -``` - -### 5. Clean Up After Tests - -```yaml -steps: - # Test steps... - -cleanup: - - action: delete_file - path: "./test-data.json" - - action: stop_application -``` - -## Troubleshooting - -### Installation Issues - -**Problem**: `@types/node-pty` not found error - -**Solution**: This was fixed in gadugi-agentic-test. If you see this: - -```bash -# Update to latest version -npm install -g github:rysweet/gadugi-agentic-test - -# Or if you cloned, pull latest: -git pull origin main -npm install -npm run build -``` - -**Problem**: `tsc: command not found` when building - -**Solution**: TypeScript not installed - -```bash -npm install # Installs all dependencies including TypeScript -npm run build # Now will work -``` - -### Test Times Out - -**Problem**: Test exceeds timeout and fails - -**Solution**: Increase timeout for slow operations - -```yaml -- action: wait_for_element - selector: ".slow-loading-data" - timeout: 30s # Generous timeout -``` - -### Scenario Format Issues - -**Problem**: "Scenario must have a name" error - -**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`: - -```yaml -# WRONG (won't load) -scenario: - name: "My Test" - steps: [...] - -# RIGHT -name: "My Test" -description: "What this tests" -version: "1.0.0" -config: - timeout: 120000 -steps: [...] -``` - -### Element Not Found - -**Problem**: Cannot find element to interact with - -**Solutions**: - -1. Use `wait_for_element` before interaction -2. Verify selector is correct -3. Check if element is in iframe - -```yaml -- action: wait_for_element - selector: ".target" - timeout: 10s -- action: click - selector: ".target" -``` - -### Flaky Tests in CI - -**Problem**: Tests pass locally but fail in CI - -**Solutions**: - -1. Add longer timeouts for CI environments -2. Set explicit viewport sizes -3. Wait for application readiness - -```yaml -scenario: - environment: - viewport: - width: 1920 - height: 1080 - - steps: - - action: wait_for_element - selector: ".app-ready" - timeout: 30s # Generous for CI -``` - -## Framework Version Check - -This skill embeds gadugi-agentic-test version **0.1.0**. - -To check for newer versions: - -```bash -python scripts/check-freshness.py -``` - -The script compares the embedded version against the latest GitHub release and notifies you of new features. - -## Related Skills - -- **test-gap-analyzer**: Find untested code paths (unit test focus) -- **philosophy-guardian**: Review test philosophy compliance -- **pr-review-assistant**: Include tests in PR reviews -- **module-spec-generator**: Generate specs with test scenarios - -## Resources - -### Documentation - -- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation -- **Examples**: `examples/` - 15 complete working examples -- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test - -### Getting Help - -- Review examples in `examples/` directory -- Check `SKILL.md` for detailed explanations -- See troubleshooting section in `SKILL.md` -- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues - -## Quick Reference - -### Basic Test Template - -```yaml -scenario: - name: "Test Name" - description: "What this verifies" - type: cli | tui | web | electron - - prerequisites: - - "Condition 1" - - steps: - - action: launch - target: "./app" - - - action: verify_output - contains: "Expected" - - cleanup: - - action: stop_application -``` - -### Common Actions - -**CLI**: - -- `launch` - Start application -- `send_input` - Send text -- `verify_output` - Check output -- `verify_exit_code` - Validate exit code - -**TUI**: - -- `send_keypress` - Send keys -- `verify_screen` - Check screen -- `capture_screenshot` - Save screenshot - -**Web**: - -- `navigate` - Go to URL -- `click` - Click element -- `type` - Type text -- `verify_element` - Check element - -**Electron**: - -- `window_action` - Control windows -- `menu_click` - Click menus -- `dialog_action` - Handle dialogs -- All web actions - -## Success Stories - -Outside-in testing shines when: - -1. **Refactoring**: Change implementation without updating tests -2. **Collaboration**: Non-developers can read and understand tests -3. **Documentation**: Tests serve as executable specifications -4. **Regression Prevention**: Catch breaking changes in critical flows -5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron - -Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen. - ---- - -**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation. - -## Real-World Example: Testing amplihack Guide Agent - -Based on actual testing of amplihack's guide agent, here's a complete working example: - -### Scenario: Naive Student Learning Flow - -```yaml -name: "Guide Agent - Beginner First Question" -description: "Test how guide responds to complete beginner" -version: "1.0.0" - -config: - timeout: 180000 # 3 minutes for AI response - retries: 1 - parallel: false - -agents: - - name: "student-cli" - type: "system" - config: - shell: "bash" - cwd: "/tmp/test-student" - timeout: 180000 - capture_output: true - -steps: - - name: "Student asks: What is amplihack?" - agent: "student-cli" - action: "execute_command" - params: - command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100' - expect: - exit_code: 0 - stdout_contains: - - "amplihack" - - "AI" - timeout: 180000 - - - name: "Verify guide gives immediate action" - agent: "student-cli" - action: "execute_command" - params: - command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md" - expect: - exit_code: 0 - timeout: 5000 - -metadata: - tags: ["guide-agent", "beginner", "real-world"] - priority: "high" -``` - -### What This Tests - -1. **Installation via uvx** - Tests users can run without installing -2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works -3. **Beginner-friendly response** - Checks for immediate actionable command -4. **Interactive elements** - Looks for TRY IT prompts - -### Running This Test - -```bash -cd gadugi-agentic-test -node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose -``` - -### What We Learned - -**From testing amplihack guide agent**: - -- Long-running AI commands need 180s+ timeouts -- Testing in clean `/tmp` directory avoids state pollution -- Combining `uvx --from git+...` with gadugi tests unreleased branches -- Checking file content (guide.md) verifies features beyond just output -- Real-world tests exposed gaps (guide showing bash commands in REPL context) diff --git a/docs/claude/skills/outside-in-testing/README.md b/docs/claude/skills/outside-in-testing/README.md new file mode 120000 index 000000000..40402e77f --- /dev/null +++ b/docs/claude/skills/outside-in-testing/README.md @@ -0,0 +1 @@ +../qa-team/README.md \ No newline at end of file diff --git a/docs/claude/skills/outside-in-testing/SKILL.md b/docs/claude/skills/outside-in-testing/SKILL.md index 91526566b..891b6fb02 100644 --- a/docs/claude/skills/outside-in-testing/SKILL.md +++ b/docs/claude/skills/outside-in-testing/SKILL.md @@ -1,2045 +1,16 @@ --- name: outside-in-testing description: | - Generates agentic outside-in tests using gadugi-agentic-test framework for CLI, TUI, Web, and Electron apps. - Use when you need behavior-driven tests that verify external interfaces without internal implementation knowledge. - Creates YAML test scenarios that AI agents execute, observe, and validate against expected outcomes. - Supports progressive complexity from simple smoke tests to advanced multi-step workflows. -version: 1.0.0 -embedded_framework_version: 0.1.0 -github_repo: https://github.com/rysweet/gadugi-agentic-test -issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 + Deprecated compatibility alias for qa-team. + Use when existing recipes or agents still invoke outside-in-testing by name. + Redirects future work to the qa-team skill for outside-in validation and parity loops. +version: 1.1.0 --- -# Outside-In Testing Skill +# outside-in-testing (Alias) -## Purpose [LEVEL 1] +`outside-in-testing` is now a compatibility alias for `qa-team`. -This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate. +Use `qa-team` for all new work. This alias remains so existing workflows, recipes, and skills that still invoke `outside-in-testing` continue to resolve cleanly while the rename propagates. -**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details. - -## When to Use This Skill [LEVEL 1] - -### Perfect For - -- **Smoke Tests**: Quick validation that critical user flows work -- **Behavior-Driven Testing**: Verify features from user perspective -- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron -- **Refactoring Safety**: Tests remain valid when implementation changes -- **AI-Powered Testing**: Let agents handle complex interactions -- **Documentation as Tests**: YAML scenarios double as executable specs - -### Use This Skill When - -- Starting a new project and defining expected behaviors -- Refactoring code and need tests that won't break with internal changes -- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps) -- Writing acceptance criteria that can be automatically verified -- Need tests that non-developers can read and understand -- Want to catch regressions in critical user workflows -- Testing complex multi-step interactions - -### Don't Use This Skill When - -- Need unit tests for internal functions (use test-gap-analyzer instead) -- Testing performance or load characteristics -- Need precise timing or concurrency control -- Testing non-interactive batch processes -- Implementation details matter more than behavior - -## Core Concepts [LEVEL 1] - -### Outside-In Testing Philosophy - -**Traditional Inside-Out Testing**: - -```python -# Tightly coupled to implementation -def test_calculator_add(): - calc = Calculator() - result = calc.add(2, 3) - assert result == 5 - assert calc.history == [(2, 3, 5)] # Knows internal state -``` - -**Agentic Outside-In Testing**: - -```yaml -# Implementation-agnostic behavior verification -scenario: - name: "Calculator Addition" - steps: - - action: launch - target: "./calculator" - - action: send_input - value: "add 2 3" - - action: verify_output - contains: "Result: 5" -``` - -**Benefits**: - -- Tests survive refactoring (internal changes don't break tests) -- Readable by non-developers (YAML is declarative) -- Platform-agnostic (same structure for CLI/TUI/Web/Electron) -- AI agents handle complexity (navigation, timing, screenshots) - -### The Gadugi Agentic Test Framework [LEVEL 2] - -Gadugi-agentic-test is a Python framework that: - -1. **Parses YAML test scenarios** with declarative steps -2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents) -3. **Executes actions** (launch, input, click, wait, verify) -4. **Collects evidence** (screenshots, logs, output captures) -5. **Validates outcomes** against expected results -6. **Generates reports** with evidence trails - -**Architecture**: - -``` -YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine - ↓ - [CLI Agent, TUI Agent, Web Agent, Electron Agent] - ↓ - Observers → Comprehension Agent - ↓ - Evidence Report -``` - -### Progressive Disclosure Levels [LEVEL 1] - -This skill teaches testing in three levels: - -- **Level 1: Fundamentals** - Basic single-action tests, simple verification -- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling -- **Level 3: Advanced** - Custom agents, visual regression, performance validation - -Each example is marked with its level. Start at Level 1 and progress as needed. - -## Quick Start [LEVEL 1] - -### Installation - -**Prerequisites (for native module compilation):** - -```bash -# macOS -xcode-select --install - -# Ubuntu/Debian -sudo apt-get install -y build-essential python3 - -# Windows: Install Visual Studio Build Tools with "Desktop development with C++" -``` - -**Install the framework:** - -```bash -# Install globally for CLI access -npm install -g @gadugi/agentic-test - -# Or install locally in your project -npm install @gadugi/agentic-test - -# Verify installation -gadugi-test --version -``` - -### Your First Test (CLI Example) - -Create `test-hello.yaml`: - -```yaml -scenario: - name: "Hello World CLI Test" - description: "Verify CLI prints greeting" - type: cli - - prerequisites: - - "./hello-world executable exists" - - steps: - - action: launch - target: "./hello-world" - - - action: verify_output - contains: "Hello, World!" - - - action: verify_exit_code - expected: 0 -``` - -Run the test: - -```bash -gadugi-test run test-hello.yaml -``` - -Output: - -``` -✓ Scenario: Hello World CLI Test - ✓ Step 1: Launched ./hello-world - ✓ Step 2: Output contains "Hello, World!" - ✓ Step 3: Exit code is 0 - -PASSED (3/3 steps successful) -Evidence saved to: ./evidence/test-hello-20250116-093045/ -``` - -### Understanding the YAML Structure [LEVEL 1] - -Every test scenario has this structure: - -```yaml -scenario: - name: "Descriptive test name" - description: "What this test verifies" - type: cli | tui | web | electron - - # Optional metadata - tags: [smoke, critical, auth] - timeout: 30s - - # What must be true before test runs - prerequisites: - - "Condition 1" - - "Condition 2" - - # The test steps (executed sequentially) - steps: - - action: action_name - parameter1: value1 - parameter2: value2 - - - action: verify_something - expected: value - - # Optional cleanup - cleanup: - - action: stop_application -``` - -## Application Types and Agents [LEVEL 2] - -### CLI Applications [LEVEL 1] - -**Use Case**: Command-line tools, scripts, build tools, package managers - -**Supported Actions**: - -- `launch` - Start the CLI program -- `send_input` - Send text or commands via stdin -- `send_signal` - Send OS signals (SIGINT, SIGTERM) -- `wait_for_output` - Wait for specific text in stdout/stderr -- `verify_output` - Check stdout/stderr contains/matches expected text -- `verify_exit_code` - Validate process exit code -- `capture_output` - Save output for later verification - -**Example** (see `examples/cli/calculator-basic.yaml`): - -```yaml -scenario: - name: "CLI Calculator Basic Operations" - type: cli - - steps: - - action: launch - target: "./calculator" - args: ["--mode", "interactive"] - - - action: send_input - value: "add 5 3\n" - - - action: verify_output - contains: "Result: 8" - timeout: 2s - - - action: send_input - value: "multiply 4 7\n" - - - action: verify_output - contains: "Result: 28" - - - action: send_input - value: "exit\n" - - - action: verify_exit_code - expected: 0 -``` - -### TUI Applications [LEVEL 1] - -**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs) - -**Supported Actions**: - -- `launch` - Start TUI application -- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.) -- `wait_for_screen` - Wait for specific text to appear on screen -- `verify_screen` - Check screen contents match expectations -- `capture_screenshot` - Save terminal screenshot (ANSI art) -- `navigate_menu` - Navigate menu structures -- `fill_form` - Fill TUI form fields - -**Example** (see `examples/tui/file-manager-navigation.yaml`): - -```yaml -scenario: - name: "TUI File Manager Navigation" - type: tui - - steps: - - action: launch - target: "./file-manager" - - - action: wait_for_screen - contains: "File Manager v1.0" - timeout: 3s - - - action: send_keypress - value: "down" - times: 3 - - - action: verify_screen - contains: "> documents/" - description: "Third item should be selected" - - - action: send_keypress - value: "enter" - - - action: wait_for_screen - contains: "documents/" - timeout: 2s - - - action: capture_screenshot - save_as: "documents-view.txt" -``` - -### Web Applications [LEVEL 1] - -**Use Case**: Web apps, dashboards, SPAs, admin panels - -**Supported Actions**: - -- `navigate` - Go to URL -- `click` - Click element by selector or text -- `type` - Type into input fields -- `wait_for_element` - Wait for element to appear -- `verify_element` - Check element exists/contains text -- `verify_url` - Validate current URL -- `screenshot` - Capture browser screenshot -- `scroll` - Scroll page or element - -**Example** (see `examples/web/dashboard-smoke-test.yaml`): - -```yaml -scenario: - name: "Dashboard Smoke Test" - type: web - - steps: - - action: navigate - url: "http://localhost:3000/dashboard" - - - action: wait_for_element - selector: "h1.dashboard-title" - timeout: 5s - - - action: verify_element - selector: "h1.dashboard-title" - contains: "Analytics Dashboard" - - - action: verify_element - selector: ".widget-stats" - count: 4 - description: "Should have 4 stat widgets" - - - action: click - selector: "button.refresh-data" - - - action: wait_for_element - selector: ".loading-spinner" - disappears: true - timeout: 10s - - - action: screenshot - save_as: "dashboard-loaded.png" -``` - -### Electron Applications [LEVEL 2] - -**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones) - -**Supported Actions**: - -- `launch` - Start Electron app -- `window_action` - Interact with windows (focus, minimize, close) -- `menu_click` - Click application menu items -- `dialog_action` - Handle native dialogs (open file, save, confirm) -- `ipc_send` - Send IPC message to main process -- `verify_window` - Check window state/properties -- All web actions (since Electron uses Chromium) - -**Example** (see `examples/electron/single-window-basic.yaml`): - -```yaml -scenario: - name: "Electron Single Window Test" - type: electron - - steps: - - action: launch - target: "./dist/my-app" - wait_for_window: true - timeout: 10s - - - action: verify_window - title: "My Application" - visible: true - - - action: menu_click - path: ["File", "New Document"] - - - action: wait_for_element - selector: ".document-editor" - - - action: type - selector: ".document-editor" - value: "Hello from test" - - - action: menu_click - path: ["File", "Save"] - - - action: dialog_action - type: save_file - filename: "test-document.txt" - - - action: verify_window - title_contains: "test-document.txt" -``` - -## Test Scenario Anatomy [LEVEL 2] - -### Metadata Section - -```yaml -scenario: - name: "Clear descriptive name" - description: "Detailed explanation of what this test verifies" - type: cli | tui | web | electron - - # Optional fields - tags: [smoke, regression, auth, payment] - priority: high | medium | low - timeout: 60s # Overall scenario timeout - retry_on_failure: 2 # Retry count - - # Environment requirements - environment: - variables: - API_URL: "http://localhost:8080" - DEBUG: "true" - files: - - "./config.json must exist" -``` - -### Prerequisites - -Prerequisites are conditions that must be true before the test runs. The framework validates these before execution. - -```yaml -prerequisites: - - "./application binary exists" - - "Port 8080 is available" - - "Database is running" - - "User account test@example.com exists" - - "File ./test-data.json exists" -``` - -If prerequisites fail, the test is skipped (not failed). - -### Steps - -Steps execute sequentially. Each step has: - -- **action**: Required - the action to perform -- **Parameters**: Action-specific parameters -- **description**: Optional - human-readable explanation -- **timeout**: Optional - step-specific timeout -- **continue_on_failure**: Optional - don't fail scenario if step fails - -```yaml -steps: - # Simple action - - action: launch - target: "./app" - - # Action with multiple parameters - - action: verify_output - contains: "Success" - timeout: 5s - description: "App should print success message" - - # Continue even if this fails - - action: click - selector: ".optional-button" - continue_on_failure: true -``` - -### Verification Actions [LEVEL 1] - -Verification actions check expected outcomes. They fail the test if expectations aren't met. - -**Common Verifications**: - -```yaml -# CLI: Check output contains text -- action: verify_output - contains: "Expected text" - -# CLI: Check output matches regex -- action: verify_output - matches: "Result: \\d+" - -# CLI: Check exit code -- action: verify_exit_code - expected: 0 - -# Web/TUI: Check element exists -- action: verify_element - selector: ".success-message" - -# Web/TUI: Check element contains text -- action: verify_element - selector: "h1" - contains: "Welcome" - -# Web: Check URL -- action: verify_url - equals: "http://localhost:3000/dashboard" - -# Web: Check element count -- action: verify_element - selector: ".list-item" - count: 5 - -# Electron: Check window state -- action: verify_window - title: "My App" - visible: true - focused: true -``` - -### Cleanup Section - -Cleanup runs after all steps complete (success or failure). Use for teardown actions. - -```yaml -cleanup: - - action: stop_application - force: true - - - action: delete_file - path: "./temp-test-data.json" - - - action: reset_database - connection: "test_db" -``` - -## Advanced Patterns [LEVEL 2] - -### Conditional Logic - -Execute steps based on conditions: - -```yaml -steps: - - action: launch - target: "./app" - - - action: verify_output - contains: "Login required" - id: login_check - - # Only run if login_check passed - - action: send_input - value: "login admin password123\n" - condition: login_check.passed -``` - -### Variables and Templating [LEVEL 2] - -Define variables and use them throughout the scenario: - -```yaml -scenario: - name: "Test with Variables" - type: cli - - variables: - username: "testuser" - api_url: "http://localhost:8080" - - steps: - - action: launch - target: "./app" - args: ["--api", "${api_url}"] - - - action: send_input - value: "login ${username}\n" - - - action: verify_output - contains: "Welcome, ${username}!" -``` - -### Loops and Repetition [LEVEL 2] - -Repeat actions multiple times: - -```yaml -steps: - - action: launch - target: "./app" - - # Repeat action N times - - action: send_keypress - value: "down" - times: 5 - - # Loop over list - - action: send_input - value: "${item}\n" - for_each: - - "apple" - - "banana" - - "cherry" -``` - -### Error Handling [LEVEL 2] - -Handle expected errors gracefully: - -```yaml -steps: - - action: send_input - value: "invalid command\n" - - # Verify error message appears - - action: verify_output - contains: "Error: Unknown command" - expected_failure: true - - # App should still be running - - action: verify_running - expected: true -``` - -### Multi-Step Workflows [LEVEL 2] - -Complex scenarios with multiple phases: - -```yaml -scenario: - name: "E-commerce Purchase Flow" - type: web - - steps: - # Phase 1: Authentication - - action: navigate - url: "http://localhost:3000/login" - - - action: type - selector: "#username" - value: "test@example.com" - - - action: type - selector: "#password" - value: "password123" - - - action: click - selector: "button[type=submit]" - - - action: wait_for_url - contains: "/dashboard" - - # Phase 2: Product Selection - - action: navigate - url: "http://localhost:3000/products" - - - action: click - text: "Add to Cart" - nth: 1 - - - action: verify_element - selector: ".cart-badge" - contains: "1" - - # Phase 3: Checkout - - action: click - selector: ".cart-icon" - - - action: click - text: "Proceed to Checkout" - - - action: fill_form - fields: - "#shipping-address": "123 Test St" - "#city": "Testville" - "#zip": "12345" - - - action: click - selector: "#place-order" - - - action: wait_for_element - selector: ".order-confirmation" - timeout: 10s - - - action: verify_element - selector: ".order-number" - exists: true -``` - -## Level 3: Advanced Topics [LEVEL 3] - -### Custom Comprehension Agents - -The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic. - -**Default Comprehension Agent**: - -- Observes raw output (text, HTML, screenshots) -- Applies general reasoning to verify expectations -- Returns pass/fail with explanation - -**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`): - -```yaml -scenario: - name: "Financial Dashboard Test with Custom Agent" - type: web - - # Define custom comprehension logic - comprehension_agent: - model: "gpt-4" - system_prompt: | - You are a financial data validator. When verifying dashboard content: - 1. All monetary values must use proper formatting ($1,234.56) - 2. Percentages must include % symbol - 3. Dates must be in MM/DD/YYYY format - 4. Negative values must be red - 5. Chart data must be logically consistent - - Be strict about formatting and data consistency. - - examples: - - input: "Total Revenue: 45000" - output: "FAIL - Missing currency symbol and comma separator" - - input: "Total Revenue: $45,000.00" - output: "PASS - Correctly formatted" - - steps: - - action: navigate - url: "http://localhost:3000/financial-dashboard" - - - action: verify_element - selector: ".revenue-widget" - use_custom_comprehension: true - description: "Revenue should be properly formatted" -``` - -### Visual Regression Testing [LEVEL 3] - -Compare screenshots against baseline images: - -```yaml -scenario: - name: "Visual Regression - Homepage" - type: web - - steps: - - action: navigate - url: "http://localhost:3000" - - - action: wait_for_element - selector: ".page-loaded" - - - action: screenshot - save_as: "homepage.png" - - - action: visual_compare - screenshot: "homepage.png" - baseline: "./baselines/homepage-baseline.png" - threshold: 0.05 # 5% difference allowed - highlight_differences: true -``` - -### Performance Validation [LEVEL 3] - -Measure and validate performance metrics: - -```yaml -scenario: - name: "Performance - Dashboard Load Time" - type: web - - performance: - metrics: - - page_load_time - - first_contentful_paint - - time_to_interactive - - steps: - - action: navigate - url: "http://localhost:3000/dashboard" - measure_timing: true - - - action: verify_performance - metric: page_load_time - less_than: 3000 # 3 seconds - - - action: verify_performance - metric: first_contentful_paint - less_than: 1500 # 1.5 seconds -``` - -### Multi-Window Coordination (Electron) [LEVEL 3] - -Test applications with multiple windows: - -```yaml -scenario: - name: "Multi-Window Chat Application" - type: electron - - steps: - - action: launch - target: "./chat-app" - - - action: menu_click - path: ["Window", "New Chat"] - - - action: verify_window - count: 2 - - - action: window_action - window: 1 - action: focus - - - action: type - selector: ".message-input" - value: "Hello from window 1" - - - action: click - selector: ".send-button" - - - action: window_action - window: 2 - action: focus - - - action: wait_for_element - selector: ".message" - contains: "Hello from window 1" - timeout: 5s -``` - -### IPC Testing (Electron) [LEVEL 3] - -Test Inter-Process Communication between renderer and main: - -```yaml -scenario: - name: "Electron IPC Communication" - type: electron - - steps: - - action: launch - target: "./my-app" - - - action: ipc_send - channel: "get-system-info" - - - action: ipc_expect - channel: "system-info-reply" - timeout: 3s - - - action: verify_ipc_payload - contains: - platform: "darwin" - arch: "x64" -``` - -### Custom Reporters [LEVEL 3] - -Generate custom test reports: - -```yaml -scenario: - name: "Test with Custom Reporting" - type: cli - - reporting: - format: custom - template: "./report-template.html" - include: - - screenshots - - logs - - timing_data - - video_recording - - email: - enabled: true - recipients: ["team@example.com"] - on_failure_only: true - - steps: - # ... test steps ... -``` - -## Framework Integration [LEVEL 2] - -### Running Tests - -**Single test**: - -```bash -gadugi-test run test-scenario.yaml -``` - -**Multiple tests**: - -```bash -gadugi-test run tests/*.yaml -``` - -**With options**: - -```bash -gadugi-test run test.yaml \ - --verbose \ - --evidence-dir ./test-evidence \ - --retry 2 \ - --timeout 60s -``` - -### CI/CD Integration - -**GitHub Actions** (`.github/workflows/agentic-tests.yml`): - -```yaml -name: Agentic Tests - -on: [push, pull_request] - -jobs: - test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - - - name: Install gadugi-agentic-test - run: npm install -g @gadugi/agentic-test - - - name: Run tests - run: gadugi-test run tests/agentic/*.yaml - - - name: Upload evidence - if: always() - uses: actions/upload-artifact@v3 - with: - name: test-evidence - path: ./evidence/ -``` - -### Evidence Collection - -The framework automatically collects evidence for debugging: - -``` -evidence/ - scenario-name-20250116-093045/ - ├── scenario.yaml # Original test scenario - ├── execution-log.json # Detailed execution log - ├── screenshots/ # All captured screenshots - │ ├── step-1.png - │ ├── step-3.png - │ └── step-5.png - ├── output-captures/ # CLI/TUI output - │ ├── stdout.txt - │ └── stderr.txt - ├── timing.json # Performance metrics - └── report.html # Human-readable report -``` - -## Best Practices [LEVEL 2] - -### 1. Start Simple, Add Complexity - -Begin with basic smoke tests, then add detail: - -```yaml -# Level 1: Basic smoke test -steps: - - action: launch - target: "./app" - - action: verify_output - contains: "Ready" - -# Level 2: Add interaction -steps: - - action: launch - target: "./app" - - action: send_input - value: "command\n" - - action: verify_output - contains: "Success" - -# Level 3: Add error handling and edge cases -steps: - - action: launch - target: "./app" - - action: send_input - value: "invalid\n" - - action: verify_output - contains: "Error" - - action: send_input - value: "command\n" - - action: verify_output - contains: "Success" -``` - -### 2. Use Descriptive Names and Descriptions - -```yaml -# Bad -scenario: - name: "Test 1" - steps: - - action: click - selector: "button" - -# Good -scenario: - name: "User Login Flow - Valid Credentials" - description: "Verifies user can log in with valid email and password" - steps: - - action: click - selector: "button[type=submit]" - description: "Submit login form" -``` - -### 3. Verify Critical Paths Only - -Don't test every tiny detail. Focus on user-facing behavior: - -```yaml -# Bad - Tests implementation details -- action: verify_element - selector: ".internal-cache-status" - contains: "initialized" - -# Good - Tests user-visible behavior -- action: verify_element - selector: ".welcome-message" - contains: "Welcome back" -``` - -### 4. Use Prerequisites for Test Dependencies - -```yaml -scenario: - name: "User Profile Edit" - - prerequisites: - - "User testuser@example.com exists" - - "User is logged in" - - "Database is seeded with test data" - - steps: - # Test assumes prerequisites are met - - action: navigate - url: "/profile" -``` - -### 5. Keep Tests Independent - -Each test should set up its own state and clean up: - -```yaml -scenario: - name: "Create Document" - - steps: - # Create test user (don't assume exists) - - action: api_call - endpoint: "/api/users" - method: POST - data: { email: "test@example.com" } - - # Run test - - action: navigate - url: "/documents/new" - # ... test steps ... - - cleanup: - # Remove test user - - action: api_call - endpoint: "/api/users/test@example.com" - method: DELETE -``` - -### 6. Use Tags for Organization - -```yaml -scenario: - name: "Critical Payment Flow" - tags: [smoke, critical, payment, e2e] - # Run with: gadugi-test run --tags critical -``` - -### 7. Add Timeouts Strategically - -```yaml -steps: - # Quick operations - short timeout - - action: click - selector: "button" - timeout: 2s - - # Network operations - longer timeout - - action: wait_for_element - selector: ".data-loaded" - timeout: 10s - - # Complex operations - generous timeout - - action: verify_element - selector: ".report-generated" - timeout: 60s -``` - -## Testing Strategies [LEVEL 2] - -### Smoke Tests - -Minimal tests that verify critical functionality works: - -```yaml -scenario: - name: "Smoke Test - Application Starts" - tags: [smoke] - - steps: - - action: launch - target: "./app" - - action: verify_output - contains: "Ready" - timeout: 5s -``` - -Run before every commit: `gadugi-test run --tags smoke` - -### Happy Path Tests - -Test the ideal user journey: - -```yaml -scenario: - name: "Happy Path - User Registration" - - steps: - - action: navigate - url: "/register" - - action: type - selector: "#email" - value: "newuser@example.com" - - action: type - selector: "#password" - value: "SecurePass123!" - - action: click - selector: "button[type=submit]" - - action: wait_for_url - contains: "/welcome" -``` - -### Error Path Tests - -Verify error handling: - -```yaml -scenario: - name: "Error Path - Invalid Login" - - steps: - - action: navigate - url: "/login" - - action: type - selector: "#email" - value: "invalid@example.com" - - action: type - selector: "#password" - value: "wrongpassword" - - action: click - selector: "button[type=submit]" - - action: verify_element - selector: ".error-message" - contains: "Invalid credentials" -``` - -### Regression Tests - -Prevent bugs from reappearing: - -```yaml -scenario: - name: "Regression - Issue #123 Password Reset" - tags: [regression, bug-123] - description: "Verifies password reset email is sent (was broken in v1.2)" - - steps: - - action: navigate - url: "/forgot-password" - - action: type - selector: "#email" - value: "user@example.com" - - action: click - selector: "button[type=submit]" - - action: verify_element - selector: ".success-message" - contains: "Reset email sent" -``` - -## Philosophy Alignment [LEVEL 2] - -This skill follows amplihack's core principles: - -### Ruthless Simplicity - -- **YAML over code**: Declarative tests are simpler than programmatic tests -- **No implementation details**: Tests describe WHAT, not HOW -- **Minimal boilerplate**: Each test is focused and concise - -### Modular Design (Bricks & Studs) - -- **Self-contained scenarios**: Each YAML file is independent -- **Clear contracts**: Steps have well-defined inputs/outputs -- **Composable actions**: Reuse actions across different test types - -### Zero-BS Implementation - -- **No stubs**: Every example in this skill is a complete, runnable test -- **Working defaults**: Tests run with minimal configuration -- **Clear errors**: Framework provides actionable error messages - -### Outside-In Thinking - -- **User perspective**: Tests verify behavior users care about -- **Implementation agnostic**: Refactoring doesn't break tests -- **Behavior-driven**: Focus on outcomes, not internals - -## Common Pitfalls and Solutions [LEVEL 2] - -### Pitfall 1: Over-Specifying - -**Problem**: Test breaks when UI changes slightly - -```yaml -# Bad - Too specific -- action: verify_element - selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold" - contains: "Welcome" -``` - -**Solution**: Use flexible selectors - -```yaml -# Good - Focused on behavior -- action: verify_element - selector: ".welcome-message" - contains: "Welcome" -``` - -### Pitfall 2: Missing Waits - -**Problem**: Test fails intermittently due to timing - -```yaml -# Bad - No wait for async operation -- action: click - selector: ".load-data-button" -- action: verify_element - selector: ".data-table" # May not exist yet! -``` - -**Solution**: Always wait for dynamic content - -```yaml -# Good - Wait for element to appear -- action: click - selector: ".load-data-button" -- action: wait_for_element - selector: ".data-table" - timeout: 10s -- action: verify_element - selector: ".data-table" -``` - -### Pitfall 3: Testing Implementation Details - -**Problem**: Test coupled to internal state - -```yaml -# Bad - Tests internal cache state -- action: verify_output - contains: "Cache hit ratio: 85%" -``` - -**Solution**: Test user-visible behavior - -```yaml -# Good - Tests response time -- action: verify_response_time - less_than: 100ms - description: "Fast response indicates caching works" -``` - -### Pitfall 4: Flaky Assertions - -**Problem**: Assertions depend on exact timing or formatting - -```yaml -# Bad - Exact timestamp match will fail -- action: verify_output - contains: "Created at: 2025-11-16 09:30:45" -``` - -**Solution**: Use flexible patterns - -```yaml -# Good - Match pattern, not exact value -- action: verify_output - matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}" -``` - -### Pitfall 5: Not Cleaning Up - -**Problem**: Tests leave artifacts that affect future runs - -```yaml -# Bad - No cleanup -steps: - - action: create_file - path: "./test-data.json" - - action: launch - target: "./app" -``` - -**Solution**: Always use cleanup section - -```yaml -# Good - Cleanup ensures clean slate -steps: - - action: create_file - path: "./test-data.json" - - action: launch - target: "./app" - -cleanup: - - action: delete_file - path: "./test-data.json" -``` - -## Example Library [LEVEL 1] - -This skill includes 15 complete working examples organized by application type and complexity level: - -### CLI Examples - -1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations -2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery -3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI - -### TUI Examples - -4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation -5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation -6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing - -### Web Examples - -7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification -8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow -9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing - -### Electron Examples - -10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test -11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration -12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions -13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing - -### Custom Agent Examples - -14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic -15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting - -See `examples/` directory for full example code with inline documentation. - -## Framework Freshness Check [LEVEL 3] - -This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists: - -```bash -# Run the freshness check script -python scripts/check-freshness.py - -# Output if outdated: -# WARNING: Embedded framework version is 0.1.0 -# Latest GitHub version is 0.2.5 -# -# New features in 0.2.5: -# - Native Playwright support for web testing -# - Video recording for all test types -# - Parallel test execution -# -# Update with: npm update -g @gadugi/agentic-test -``` - -The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements. - -**When to Update This Skill**: - -- New framework version adds significant features -- Breaking changes in YAML schema -- New application types supported -- Agent capabilities expand - -## Integration with Other Skills [LEVEL 2] - -### Works Well With - -**test-gap-analyzer**: - -- Use test-gap-analyzer to find untested functions -- Write outside-in tests for critical user-facing paths -- Use unit tests (from test-gap-analyzer) for internal functions - -**philosophy-guardian**: - -- Ensure test YAML follows ruthless simplicity -- Verify tests focus on behavior, not implementation - -**pr-review-assistant**: - -- Include outside-in tests in PR reviews -- Verify tests cover changed functionality -- Check test readability and clarity - -**module-spec-generator**: - -- Generate module specs that include outside-in test scenarios -- Use specs as templates for test YAML - -### Example Combined Workflow - -```bash -# 1. Analyze coverage gaps -claude "Use test-gap-analyzer on ./src" - -# 2. Write outside-in tests for critical paths -claude "Use outside-in-testing to create web tests for authentication" - -# 3. Verify philosophy compliance -claude "Use philosophy-guardian to review new test files" - -# 4. Include in PR -git add tests/agentic/ -git commit -m "Add outside-in tests for auth flow" -``` - -## Troubleshooting [LEVEL 2] - -### Test Times Out - -**Symptom**: Test exceeds timeout and fails - -**Causes**: - -- Application takes longer to start than expected -- Network requests are slow -- Element never appears (incorrect selector) - -**Solutions**: - -```yaml -# Increase timeout -- action: wait_for_element - selector: ".slow-loading-element" - timeout: 30s # Increase from default - -# Add intermediate verification -- action: launch - target: "./app" -- action: wait_for_output - contains: "Initializing..." - timeout: 5s -- action: wait_for_output - contains: "Ready" - timeout: 20s -``` - -### Element Not Found - -**Symptom**: `verify_element` or `click` fails with "element not found" - -**Causes**: - -- Incorrect CSS selector -- Element not yet rendered (timing issue) -- Element in iframe or shadow DOM - -**Solutions**: - -```yaml -# Add wait before interaction -- action: wait_for_element - selector: ".target-element" - timeout: 10s -- action: click - selector: ".target-element" - -# Use more specific selector -- action: click - selector: "button[data-testid='submit-button']" - -# Handle iframe -- action: switch_to_iframe - selector: "iframe#payment-frame" -- action: click - selector: ".pay-now-button" -``` - -### Test Passes Locally, Fails in CI - -**Symptom**: Test works on dev machine but fails in CI environment - -**Causes**: - -- Different screen size (web/Electron) -- Missing dependencies -- Timing differences (slower CI machines) -- Environment variable differences - -**Solutions**: - -```yaml -# Set explicit viewport size (web/Electron) -scenario: - environment: - viewport: - width: 1920 - height: 1080 - -# Add longer timeouts in CI -- action: wait_for_element - selector: ".element" - timeout: 30s # Generous for CI - -# Verify prerequisites -prerequisites: - - "Chrome browser installed" - - "Environment variable API_KEY is set" -``` - -### Output Doesn't Match Expected - -**Symptom**: `verify_output` fails even though output looks correct - -**Causes**: - -- Extra whitespace or newlines -- ANSI color codes in output -- Case sensitivity - -**Solutions**: - -```yaml -# Use flexible matching -- action: verify_output - matches: "Result:\\s+Success" # Allow flexible whitespace - -# Strip ANSI codes -- action: verify_output - contains: "Success" - strip_ansi: true - -# Case-insensitive match -- action: verify_output - contains: "success" - case_sensitive: false -``` - -## Reference: Action Catalog [LEVEL 3] - -### CLI Actions - -| Action | Parameters | Description | -| ------------------ | -------------------------------- | -------------------------------------- | -| `launch` | `target`, `args`, `cwd`, `env` | Start CLI application | -| `send_input` | `value`, `delay` | Send text to stdin | -| `send_signal` | `signal` | Send OS signal (SIGINT, SIGTERM, etc.) | -| `wait_for_output` | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr | -| `verify_output` | `contains`, `matches`, `stream` | Check output content | -| `verify_exit_code` | `expected` | Validate exit code | -| `capture_output` | `save_as`, `stream` | Save output to file | - -### TUI Actions - -| Action | Parameters | Description | -| -------------------- | --------------------------------- | ------------------------ | -| `launch` | `target`, `args`, `terminal_size` | Start TUI application | -| `send_keypress` | `value`, `times`, `modifiers` | Send keyboard input | -| `wait_for_screen` | `contains`, `timeout` | Wait for text on screen | -| `verify_screen` | `contains`, `matches`, `region` | Check screen content | -| `capture_screenshot` | `save_as` | Save terminal screenshot | -| `navigate_menu` | `path` | Navigate menu structure | -| `fill_form` | `fields` | Fill TUI form fields | - -### Web Actions - -| Action | Parameters | Description | -| ------------------ | ----------------------------------------- | ---------------------- | -| `navigate` | `url`, `wait_for_load` | Go to URL | -| `click` | `selector`, `text`, `nth` | Click element | -| `type` | `selector`, `value`, `delay` | Type into input | -| `wait_for_element` | `selector`, `timeout`, `disappears` | Wait for element | -| `verify_element` | `selector`, `contains`, `count`, `exists` | Check element state | -| `verify_url` | `equals`, `contains`, `matches` | Validate URL | -| `screenshot` | `save_as`, `selector`, `full_page` | Capture screenshot | -| `scroll` | `selector`, `direction`, `amount` | Scroll page/element | -| `select_option` | `selector`, `value` | Select dropdown option | -| `checkbox` | `selector`, `checked` | Check/uncheck checkbox | - -### Electron Actions - -| Action | Parameters | Description | -| --------------- | -------------------------------------- | -------------------------- | -| `launch` | `target`, `args`, `wait_for_window` | Start Electron app | -| `window_action` | `window`, `action` | Interact with windows | -| `menu_click` | `path` | Click menu items | -| `dialog_action` | `type`, `action`, `filename` | Handle dialogs | -| `ipc_send` | `channel`, `data` | Send IPC message | -| `ipc_expect` | `channel`, `timeout` | Wait for IPC message | -| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state | -| All web actions | | Electron includes Chromium | - -### Common Parameters - -| Parameter | Type | Description | -| --------------------- | ---------- | ------------------------------------ | -| `timeout` | Duration | Maximum wait time (e.g., "5s", "2m") | -| `description` | String | Human-readable step explanation | -| `continue_on_failure` | Boolean | Don't fail scenario if step fails | -| `id` | String | Step identifier for conditionals | -| `condition` | Expression | Execute step only if condition true | - -## Quick Reference: YAML Template [LEVEL 1] - -```yaml -scenario: - # Required fields - name: "Test Name" - description: "What this test verifies" - type: cli | tui | web | electron - - # Optional metadata - tags: [smoke, critical] - timeout: 60s - - # What must be true before running - prerequisites: - - "Condition 1" - - "Condition 2" - - # Environment setup - environment: - variables: - VAR_NAME: "value" - - # Variables for templating - variables: - username: "testuser" - - # Test steps (executed in order) - steps: - - action: launch - target: "./app" - - - action: send_input - value: "command\n" - - - action: verify_output - contains: "Success" - timeout: 5s - - # Cleanup (always runs) - cleanup: - - action: stop_application -``` - -## Skill Usage Examples [LEVEL 1] - -### Invoke This Skill - -``` -Claude, use the outside-in-testing skill to create a CLI test for my calculator app. - -Claude, use outside-in-testing to generate web tests for the login flow. - -Claude, create TUI tests using the outside-in-testing skill for file navigation. -``` - -### What You'll Get - -1. **Complete YAML test scenario** matching your requirements -2. **Inline documentation** explaining each section -3. **Best practices applied** (timeouts, waits, verification) -4. **Appropriate complexity level** (1, 2, or 3 based on needs) -5. **Instructions for running** the test - -### Example Request and Response - -**User**: "Create a web test for user login with valid credentials" - -**Claude** (using this skill): - -```yaml -scenario: - name: "User Login - Valid Credentials" - description: "Verifies user can successfully log in with valid email and password" - type: web - tags: [smoke, auth, critical] - - prerequisites: - - "User test@example.com exists with password 'TestPass123!'" - - "Application is running on http://localhost:3000" - - steps: - # Navigate to login page - - action: navigate - url: "http://localhost:3000/login" - description: "Open login page" - - # Wait for page to fully load - - action: wait_for_element - selector: "form.login-form" - timeout: 5s - description: "Wait for login form to render" - - # Fill in email field - - action: type - selector: "input[name='email']" - value: "test@example.com" - description: "Enter email address" - - # Fill in password field - - action: type - selector: "input[name='password']" - value: "TestPass123!" - description: "Enter password" - - # Submit form - - action: click - selector: "button[type='submit']" - description: "Click login button" - - # Wait for redirect to dashboard - - action: wait_for_url - contains: "/dashboard" - timeout: 10s - description: "Should redirect to dashboard after successful login" - - # Verify user is logged in - - action: verify_element - selector: ".user-profile" - contains: "test@example.com" - description: "User email should appear in profile widget" - - # Capture evidence - - action: screenshot - save_as: "successful-login.png" - description: "Save screenshot of logged-in state" -# To run this test: -# 1. Save as test-login-valid.yaml -# 2. Run: gadugi-test run test-login-valid.yaml -# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/ -``` - -## Related Resources [LEVEL 1] - -### Official Documentation - -- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test -- **Framework Docs**: See repo README and docs/ folder -- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 - -## Level 4: Shadow Environment Integration [LEVEL 4] - -Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments. - -### Why Use Shadow Environments for Testing - -1. **Clean State**: Fresh container, no host pollution -2. **Local Changes**: Test uncommitted code exactly as-is -3. **Multi-Repo**: Coordinate changes across multiple repos -4. **CI Parity**: What shadow sees ≈ what CI will see - -### Shadow Testing Workflow - -For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests: - -#### Pattern 1: CLI Tests in Shadow (Amplifier) - -```python -# Create shadow with your local library changes -shadow.create(local_sources=["~/repos/my-lib:org/my-lib"]) - -# Run outside-in test scenarios inside shadow -shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml") - -# Extract evidence -shadow.extract(shadow_id, "/evidence", "./test-evidence") - -# Cleanup -shadow.destroy(shadow_id) -``` - -#### Pattern 2: CLI Tests in Shadow (Standalone) - -```bash -# Create shadow with local changes -amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test - -# Run your test scenarios -amplifier-shadow exec test "gadugi-test run test-scenario.yaml" - -# Extract results -amplifier-shadow extract test /evidence ./test-evidence - -# Cleanup -amplifier-shadow destroy test -``` - -#### Pattern 3: Multi-Repo Integration Test - -```yaml -# test-multi-repo.yaml -scenario: - name: "Multi-Repo Integration Test" - type: cli - - prerequisites: - - "Shadow environment with core-lib and cli-tool" - - steps: - - action: launch - target: "cli-tool" - - - action: send_input - value: "process --lib core-lib\n" - - - action: verify_output - contains: "Success: Using core-lib" -``` - -```bash -# Setup shadow with both repos -amplifier-shadow create \ - --local ~/repos/core-lib:org/core-lib \ - --local ~/repos/cli-tool:org/cli-tool \ - --name multi-test - -# Run test that exercises both -amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml" -``` - -#### Pattern 4: Web App Testing in Shadow - -```yaml -# test-web-app.yaml -scenario: - name: "Web App with Local Library" - type: web - - steps: - - action: navigate - url: "http://localhost:3000" - - - action: click - selector: "button.process" - - - action: verify_element - selector: ".result" - contains: "Processed with v2.0" # Your local version -``` - -```bash -# Shadow with library changes -amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test - -# Start web app inside shadow (uses your local lib) -amplifier-shadow exec web-test " - cd /workspace && - git clone https://github.com/org/web-app && - cd web-app && - npm install && # Pulls your local my-lib via git URL rewriting - npm start & -" - -# Wait for app to start, then run tests -amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml" -``` - -### Verification Best Practices - -When running tests in shadow, always verify your local sources are being used: - -```bash -# After shadow.create, check snapshot commits -shadow.status(shadow_id) -# Shows: snapshot_commits: {"org/my-lib": "abc1234..."} - -# When your test installs dependencies, verify commit matches -# Look in test output for: my-lib @ git+...@abc1234 -``` - -### Complete Example: Library Change Validation - -```yaml -# test-library-change.yaml - Outside-in test -scenario: - name: "Validate Library Breaking Change" - type: cli - description: "Test that dependent app still works with new library API" - - steps: - - action: launch - target: "/workspace/org/dependent-app/cli.py" - - - action: send_input - value: "process data.json\n" - - - action: verify_output - contains: "Processed successfully" - description: "New library API should still work" - - - action: verify_exit_code - expected: 0 -``` - -```bash -# Complete workflow -# 1. Create shadow with your breaking change -amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test - -# 2. Install dependent app (pulls your local lib) -amplifier-shadow exec breaking-test " - cd /workspace && - git clone https://github.com/org/dependent-app && - cd dependent-app && - pip install -e . && # This installs git+https://github.com/org/my-lib (your local version) - echo 'Ready to test' -" - -# 3. Run outside-in test -amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml" - -# If test passes, your breaking change is compatible! -# If test fails, you've caught the issue before pushing -``` - -### When to Use Shadow Integration - -Use shadow + outside-in tests when: - -- ✅ Testing library changes with dependent projects -- ✅ Validating multi-repo coordinated changes -- ✅ Need clean-state validation before pushing -- ✅ Want to catch integration issues early -- ✅ Testing that setup/install procedures work - -Don't use shadow for: - -- ❌ Simple unit tests (too much overhead) -- ❌ Tests of already-committed code (shadow adds no value) -- ❌ Performance testing (container overhead skews results) - -### Learn More - -For complete shadow environment documentation, including: - -- Shell scripts for DIY setup -- Docker Compose examples -- Multi-language support (Python, Node, Rust, Go) -- Troubleshooting and verification techniques - -**Load the shadow-testing skill**: - -``` -Claude, use the shadow-testing skill to set up a shadow environment -``` - -Or for Amplifier users, the shadow tool is built-in: - -```python -shadow.create(local_sources=["~/repos/lib:org/lib"]) -``` - ---- - -### Related Skills - -- **shadow-testing**: Complete shadow environment setup and usage -- **test-gap-analyzer**: Find untested code paths -- **philosophy-guardian**: Review test philosophy compliance -- **pr-review-assistant**: Include tests in PR reviews -- **module-spec-generator**: Generate specs with test scenarios - -### Further Reading - -- Outside-in vs inside-out testing approaches -- Behavior-driven development (BDD) principles -- AI-powered testing best practices -- Test automation patterns -- Shadow environment testing methodology - -## Changelog [LEVEL 3] - -### Version 1.1.0 (2026-01-29) - -- **NEW**: Level 4 - Shadow Environment Integration -- Added complete shadow testing workflow patterns -- Integration examples for Amplifier native and standalone CLI -- Multi-repo integration test patterns -- Web app testing in shadow environments -- Complete workflow example for library change validation -- References to shadow-testing skill for deep-dive documentation - -### Version 1.0.0 (2025-11-16) - -- Initial skill release -- Support for CLI, TUI, Web, and Electron applications -- 15 complete working examples -- Progressive disclosure levels (1, 2, 3) -- Embedded gadugi-agentic-test framework documentation (v0.1.0) -- Freshness check script for version monitoring -- Full integration with amplihack philosophy -- Comprehensive troubleshooting guide -- Action reference catalog - ---- - -**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows. - -Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen. +See `../qa-team/README.md` and `../qa-team/SKILL.md` for the primary documentation. diff --git a/docs/claude/skills/outside-in-testing/examples b/docs/claude/skills/outside-in-testing/examples new file mode 120000 index 000000000..68c765545 --- /dev/null +++ b/docs/claude/skills/outside-in-testing/examples @@ -0,0 +1 @@ +../qa-team/examples \ No newline at end of file diff --git a/docs/claude/skills/outside-in-testing/scripts b/docs/claude/skills/outside-in-testing/scripts new file mode 120000 index 000000000..ff9bde766 --- /dev/null +++ b/docs/claude/skills/outside-in-testing/scripts @@ -0,0 +1 @@ +../qa-team/scripts \ No newline at end of file diff --git a/docs/claude/skills/outside-in-testing/tests b/docs/claude/skills/outside-in-testing/tests new file mode 120000 index 000000000..371fb2568 --- /dev/null +++ b/docs/claude/skills/outside-in-testing/tests @@ -0,0 +1 @@ +../qa-team/tests \ No newline at end of file diff --git a/docs/claude/skills/qa-team/README.md b/docs/claude/skills/qa-team/README.md new file mode 100644 index 000000000..51e32504a --- /dev/null +++ b/docs/claude/skills/qa-team/README.md @@ -0,0 +1,794 @@ +# QA Team Skill + +## Overview + +QA Team is the renamed primary skill for outside-in validation. It helps you create behavior-driven tests that verify applications from an external user's perspective without requiring knowledge of internal implementation, and it now also covers side-by-side parity loops for legacy-vs-new or A-vs-B comparisons. + +**Key Benefits**: + +- Tests survive refactoring (implementation changes don't break tests) +- Readable by non-developers (declarative YAML format) +- Platform-agnostic (same structure for CLI, TUI, Web, Electron) +- AI-powered execution (agents handle complex interactions) +- Evidence-based validation (screenshots, logs, output captures) + +## What is Outside-In Testing? + +**Traditional Testing** (Inside-Out): + +```python +# Knows internal implementation +def test_user_service(): + service = UserService() + user = service.create_user("test@example.com") + assert user.id is not None + assert user.email == "test@example.com" + assert user.created_at <= datetime.now() # Internal state +``` + +**Outside-In Testing**: + +```yaml +# Only knows external behavior +scenario: + name: "User Registration" + type: web + steps: + - action: navigate + url: "/register" + - action: type + selector: "#email" + value: "test@example.com" + - action: click + selector: "button[type=submit]" + - action: verify_url + contains: "/welcome" +``` + +The outside-in test verifies the same functionality but: + +- Doesn't depend on internal classes (`UserService`) +- Doesn't check internal state (`created_at`, `id`) +- Tests from user's perspective (what they see and do) +- Remains valid even if implementation completely changes + +## When to Use This Skill + +### Perfect Scenarios + +1. **Smoke Testing** - Quickly verify critical paths work +2. **Acceptance Testing** - Validate features meet requirements +3. **Regression Testing** - Ensure changes don't break existing behavior +4. **Cross-Platform Testing** - Test CLI, TUI, Web, Electron with same approach +5. **Refactoring Safety** - Tests protect behavior during rewrites +6. **Documentation as Tests** - YAML doubles as executable specifications + +### Complementary to Unit Tests + +Outside-in tests work best alongside unit tests: + +- **Unit Tests** (60%): Internal logic, edge cases, error handling +- **Integration Tests** (30%): Component interactions, API contracts +- **Outside-In Tests** (10%): Critical user workflows, end-to-end paths + +Use `test-gap-analyzer` skill for unit test guidance, and this skill for behavioral validation. + +## Parity, Shadow, and A/B Comparison + +Use QA Team when you need more than a single implementation test: + +- compare legacy vs replacement behavior side by side +- run paired observable tmux sessions with `--observable` +- execute the same parity suite remotely with `--ssh-target azlin` +- log rollout divergences with `--shadow-mode --shadow-log ...` + +Example local parity command: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary +``` + +Example shadow-mode command: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary \ + --shadow-mode \ + --shadow-log /tmp/feature-shadow.jsonl +``` + +## Quick Start + +### 1. Install Framework + +**Option A: From GitHub (Recommended - Latest)** + +```bash +# Install globally +npm install -g github:rysweet/gadugi-agentic-test + +# Or use with npx +npx github:rysweet/gadugi-agentic-test gadugi-test run test.yaml + +# Or clone and build +git clone https://github.com/rysweet/gadugi-agentic-test +cd gadugi-agentic-test +npm install +npm run build +node dist/cli.js run scenarios/your-test.yaml +``` + +**Option B: From npm (when published)** + +```bash +npm install -g gadugi-agentic-test +gadugi-test run test.yaml +``` + +**Note**: Framework is Node.js/TypeScript based, not Python. If you get dependency errors, see troubleshooting below. + +### 2. Create Your First Test + +Save as `test-hello.yaml`: + +```yaml +scenario: + name: "Hello World Test" + description: "Verify application prints greeting" + type: cli + + steps: + - action: launch + target: "./hello-world" + + - action: verify_output + contains: "Hello, World!" + + - action: verify_exit_code + expected: 0 +``` + +### 3. Run the Test + +**If installed globally**: + +```bash +gadugi-test run test-hello.yaml +``` + +**If using from source**: + +```bash +cd /path/to/gadugi-agentic-test +node dist/cli.js run /path/to/test-hello.yaml +``` + +**Run all tests in directory**: + +```bash +node dist/cli.js run -d ./my-test-scenarios +``` + +### 4. Review Results + +The framework generates evidence in `./evidence/`: + +- Execution logs +- Output captures +- Screenshots (for TUI/Web/Electron) +- Timing data +- HTML report + +## Supported Application Types + +### CLI (Command-Line Interface) + +Test command-line tools, scripts, and utilities: + +```yaml +scenario: + name: "Git Status Test" + type: cli + steps: + - action: launch + target: "git" + args: ["status"] + - action: verify_output + contains: "On branch" +``` + +**Common Use Cases**: + +- Package managers (npm, pip, cargo) +- Build tools (make, gradle, webpack) +- DevOps tools (docker, kubectl, terraform) +- Custom CLI applications + +### TUI (Terminal User Interface) + +Test interactive terminal applications: + +```yaml +scenario: + name: "TUI Navigation" + type: tui + steps: + - action: launch + target: "./file-manager" + - action: send_keypress + value: "down" + times: 3 + - action: verify_screen + contains: "> documents/" +``` + +**Common Use Cases**: + +- System monitors (htop, top) +- Text editors (vim, nano) +- File managers (ranger, midnight commander) +- Custom TUI dashboards + +### Web Applications + +Test browser-based applications: + +```yaml +scenario: + name: "Web Dashboard Test" + type: web + steps: + - action: navigate + url: "http://localhost:3000" + - action: verify_element + selector: "h1" + contains: "Dashboard" +``` + +**Common Use Cases**: + +- SPAs (React, Vue, Angular apps) +- Admin panels +- E-commerce sites +- SaaS applications + +### Electron Applications + +Test desktop apps built with Electron: + +```yaml +scenario: + name: "Desktop App Test" + type: electron + steps: + - action: launch + target: "./dist/my-app" + - action: verify_window + title: "My Application" +``` + +**Common Use Cases**: + +- Code editors (VS Code-like apps) +- Chat applications (Slack, Discord clones) +- Productivity tools +- Custom desktop applications + +## Progressive Learning Path + +The skill teaches testing in three levels: + +### Level 1: Fundamentals (Start Here) + +- Basic test structure (YAML anatomy) +- Single-action tests +- Simple verification +- Smoke tests + +**Examples**: + +- `examples/cli/calculator-basic.yaml` +- `examples/tui/file-manager-navigation.yaml` +- `examples/web/dashboard-smoke-test.yaml` +- `examples/electron/single-window-basic.yaml` + +### Level 2: Intermediate + +- Multi-step workflows +- Conditional logic +- Error handling +- Variables and templating + +**Examples**: + +- `examples/cli/cli-error-handling.yaml` +- `examples/tui/tui-form-validation.yaml` +- `examples/web/web-authentication-flow.yaml` +- `examples/electron/multi-window-coordination.yaml` + +### Level 3: Advanced + +- Custom comprehension agents +- Visual regression testing +- Performance validation +- IPC testing (Electron) + +**Examples**: + +- `examples/tui/tui-performance-monitoring.yaml` +- `examples/electron/electron-ipc-testing.yaml` +- `examples/custom-agents/custom-comprehension-agent.yaml` +- `examples/custom-agents/custom-reporter-integration.yaml` + +## Example Library + +This skill includes **15 complete working examples**: + +### CLI (3 examples) + +- Basic calculator operations [Level 1] +- Error handling and recovery [Level 2] +- Interactive session management [Level 2] + +### TUI (3 examples) + +- File manager navigation [Level 1] +- Form validation [Level 2] +- Performance monitoring [Level 3] + +### Web (3 examples) + +- Dashboard smoke test [Level 1] +- Authentication flow [Level 2] +- Visual regression [Level 2] + +### Electron (4 examples) + +- Single window basics [Level 1] +- Multi-window coordination [Level 2] +- Menu interactions [Level 2] +- IPC testing [Level 3] + +### Custom Agents (2 examples) + +- Domain-specific comprehension [Level 3] +- Custom reporting [Level 3] + +All examples include: + +- Complete working YAML +- Inline documentation +- Expected output +- Prerequisites +- Level indicators + +## Using This Skill in Claude + +### Invoke the Skill + +``` +Claude, use the qa-team skill to create a CLI test for my calculator app. + +Claude, use qa-team to generate web tests for user login. + +Claude, create Electron tests using qa-team for my desktop app. +``` + +### What You'll Receive + +1. **Complete YAML test scenario** matching your requirements +2. **Inline comments** explaining each section +3. **Best practices** applied (timeouts, waits, verification) +4. **Appropriate complexity** (Level 1, 2, or 3 based on needs) +5. **Instructions** for running the test + +### Example Interaction + +**You**: "Create a web test that verifies user can log in with valid credentials and sees their dashboard" + +**Claude** (using this skill): Generates a complete Level 2 YAML scenario with: + +- Navigation to login page +- Form filling (email, password) +- Submit button click +- URL verification (redirected to dashboard) +- Element verification (user profile visible) +- Screenshot capture +- Proper timeouts and waits + +## Integration with Amplihack Philosophy + +This skill embodies amplihack's core principles: + +### Ruthless Simplicity + +- Declarative YAML over complex code +- Minimal boilerplate +- Focus on behavior, not implementation + +### Modular Design (Bricks & Studs) + +- Self-contained test scenarios +- Clear action contracts +- Composable test steps + +### Zero-BS Implementation + +- No stubs or placeholders +- Every example is runnable +- Clear, actionable error messages + +### Outside-In Thinking + +- User perspective first +- Implementation-agnostic tests +- Behavior-driven validation + +## Best Practices + +### 1. Start Simple + +Begin with basic smoke tests, then add complexity: + +```yaml +# Level 1: Smoke test +steps: + - action: launch + target: "./app" + - action: verify_output + contains: "Ready" + +# Level 2: Add interaction +steps: + - action: launch + target: "./app" + - action: send_input + value: "command\n" + - action: verify_output + contains: "Success" +``` + +### 2. Use Descriptive Names + +```yaml +# Good +scenario: + name: "User Login - Valid Credentials" + description: "Verifies successful login with email and password" + +# Bad +scenario: + name: "Test 1" +``` + +### 3. Verify Critical Paths Only + +Don't test every detail. Focus on user-facing behavior: + +```yaml +# Good - User-visible behavior +- action: verify_element + selector: ".welcome-message" + contains: "Welcome back" + +# Bad - Implementation detail +- action: verify_element + selector: ".cache-status" + contains: "initialized" +``` + +### 4. Always Wait for Dynamic Content + +```yaml +# Good - Wait before verification +- action: click + selector: ".load-data" +- action: wait_for_element + selector: ".data-table" + timeout: 10s +- action: verify_element + selector: ".data-table" + +# Bad - May fail due to timing +- action: click + selector: ".load-data" +- action: verify_element + selector: ".data-table" # Might not exist yet! +``` + +### 5. Clean Up After Tests + +```yaml +steps: + # Test steps... + +cleanup: + - action: delete_file + path: "./test-data.json" + - action: stop_application +``` + +## Troubleshooting + +### Installation Issues + +**Problem**: `@types/node-pty` not found error + +**Solution**: This was fixed in gadugi-agentic-test. If you see this: + +```bash +# Update to latest version +npm install -g github:rysweet/gadugi-agentic-test + +# Or if you cloned, pull latest: +git pull origin main +npm install +npm run build +``` + +**Problem**: `tsc: command not found` when building + +**Solution**: TypeScript not installed + +```bash +npm install # Installs all dependencies including TypeScript +npm run build # Now will work +``` + +### Test Times Out + +**Problem**: Test exceeds timeout and fails + +**Solution**: Increase timeout for slow operations + +```yaml +- action: wait_for_element + selector: ".slow-loading-data" + timeout: 30s # Generous timeout +``` + +### Scenario Format Issues + +**Problem**: "Scenario must have a name" error + +**Solution**: gadugi expects top-level `name:`, not nested under `scenario:`: + +```yaml +# WRONG (won't load) +scenario: + name: "My Test" + steps: [...] + +# RIGHT +name: "My Test" +description: "What this tests" +version: "1.0.0" +config: + timeout: 120000 +steps: [...] +``` + +### Element Not Found + +**Problem**: Cannot find element to interact with + +**Solutions**: + +1. Use `wait_for_element` before interaction +2. Verify selector is correct +3. Check if element is in iframe + +```yaml +- action: wait_for_element + selector: ".target" + timeout: 10s +- action: click + selector: ".target" +``` + +### Flaky Tests in CI + +**Problem**: Tests pass locally but fail in CI + +**Solutions**: + +1. Add longer timeouts for CI environments +2. Set explicit viewport sizes +3. Wait for application readiness + +```yaml +scenario: + environment: + viewport: + width: 1920 + height: 1080 + + steps: + - action: wait_for_element + selector: ".app-ready" + timeout: 30s # Generous for CI +``` + +## Framework Version Check + +This skill embeds gadugi-agentic-test version **0.1.0**. + +To check for newer versions: + +```bash +python scripts/check-freshness.py +``` + +The script compares the embedded version against the latest GitHub release and notifies you of new features. + +## Related Skills + +- **test-gap-analyzer**: Find untested code paths (unit test focus) +- **philosophy-guardian**: Review test philosophy compliance +- **pr-review-assistant**: Include tests in PR reviews +- **module-spec-generator**: Generate specs with test scenarios + +## Resources + +### Documentation + +- **Complete Skill Guide**: `SKILL.md` - Comprehensive 3000+ line documentation +- **Examples**: `examples/` - 15 complete working examples +- **Framework Repo**: https://github.com/rysweet/gadugi-agentic-test + +### Getting Help + +- Review examples in `examples/` directory +- Check `SKILL.md` for detailed explanations +- See troubleshooting section in `SKILL.md` +- Open issues at: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues + +## Quick Reference + +### Basic Test Template + +```yaml +scenario: + name: "Test Name" + description: "What this verifies" + type: cli | tui | web | electron + + prerequisites: + - "Condition 1" + + steps: + - action: launch + target: "./app" + + - action: verify_output + contains: "Expected" + + cleanup: + - action: stop_application +``` + +### Common Actions + +**CLI**: + +- `launch` - Start application +- `send_input` - Send text +- `verify_output` - Check output +- `verify_exit_code` - Validate exit code + +**TUI**: + +- `send_keypress` - Send keys +- `verify_screen` - Check screen +- `capture_screenshot` - Save screenshot + +**Web**: + +- `navigate` - Go to URL +- `click` - Click element +- `type` - Type text +- `verify_element` - Check element + +**Electron**: + +- `window_action` - Control windows +- `menu_click` - Click menus +- `dialog_action` - Handle dialogs +- All web actions + +## Success Stories + +Outside-in testing shines when: + +1. **Refactoring**: Change implementation without updating tests +2. **Collaboration**: Non-developers can read and understand tests +3. **Documentation**: Tests serve as executable specifications +4. **Regression Prevention**: Catch breaking changes in critical flows +5. **Cross-Platform**: Same test structure across CLI/TUI/Web/Electron + +Start with Level 1 examples and progressively add complexity as needed. The AI agents handle the hard parts - you just describe what should happen. + +--- + +**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on behavior, and your tests will remain stable across refactorings while providing meaningful validation. + +## Real-World Example: Testing amplihack Guide Agent + +Based on actual testing of amplihack's guide agent, here's a complete working example: + +### Scenario: Naive Student Learning Flow + +```yaml +name: "Guide Agent - Beginner First Question" +description: "Test how guide responds to complete beginner" +version: "1.0.0" + +config: + timeout: 180000 # 3 minutes for AI response + retries: 1 + parallel: false + +agents: + - name: "student-cli" + type: "system" + config: + shell: "bash" + cwd: "/tmp/test-student" + timeout: 180000 + capture_output: true + +steps: + - name: "Student asks: What is amplihack?" + agent: "student-cli" + action: "execute_command" + params: + command: 'mkdir -p /tmp/test-student && uvx --from git+https://github.com/rysweet/amplihack amplihack claude -- -p "Task(subagent_type=''guide'', prompt=''I am new. What is amplihack?'')" 2>&1 | head -100' + expect: + exit_code: 0 + stdout_contains: + - "amplihack" + - "AI" + timeout: 180000 + + - name: "Verify guide gives immediate action" + agent: "student-cli" + action: "execute_command" + params: + command: "grep -i 'TRY IT\\|try this\\|run this' /tmp/test-student/.claude/agents/amplihack/core/guide.md" + expect: + exit_code: 0 + timeout: 5000 + +metadata: + tags: ["guide-agent", "beginner", "real-world"] + priority: "high" +``` + +### What This Tests + +1. **Installation via uvx** - Tests users can run without installing +2. **Guide agent invocation** - Verifies Task(subagent_type='guide') works +3. **Beginner-friendly response** - Checks for immediate actionable command +4. **Interactive elements** - Looks for TRY IT prompts + +### Running This Test + +```bash +cd gadugi-agentic-test +node dist/cli.js run scenarios/amplihack-guide-test.yaml --verbose +``` + +### What We Learned + +**From testing amplihack guide agent**: + +- Long-running AI commands need 180s+ timeouts +- Testing in clean `/tmp` directory avoids state pollution +- Combining `uvx --from git+...` with gadugi tests unreleased branches +- Checking file content (guide.md) verifies features beyond just output +- Real-world tests exposed gaps (guide showing bash commands in REPL context) diff --git a/docs/claude/skills/qa-team/SKILL.md b/docs/claude/skills/qa-team/SKILL.md new file mode 100644 index 000000000..f75aa884d --- /dev/null +++ b/docs/claude/skills/qa-team/SKILL.md @@ -0,0 +1,2100 @@ +--- +name: qa-team +description: | + QA team for outside-in validation, side-by-side parity loops, and A/B behavioral comparison. + Use when you need behavior-driven tests, legacy-vs-new comparison, or rollout shadow validation. + Creates executable scenarios and parity workflows that agents can observe, compare, and iterate on. + Supports local, observable tmux, remote SSH, and shadow-mode divergence logging patterns. +version: 1.1.0 +embedded_framework_version: 0.1.0 +github_repo: https://github.com/rysweet/gadugi-agentic-test +issue: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 +--- + +# QA Team Skill + +## Purpose [LEVEL 1] + +This skill helps you create **agentic outside-in tests** that verify application behavior from an external user's perspective without any knowledge of internal implementation. Using the gadugi-agentic-test framework, you write declarative YAML scenarios that AI agents execute, observe, and validate. + +**Key Principle**: Tests describe WHAT should happen, not HOW it's implemented. Agents figure out the execution details. + +## When to Use This Skill [LEVEL 1] + +### Perfect For + +- **Smoke Tests**: Quick validation that critical user flows work +- **Behavior-Driven Testing**: Verify features from user perspective +- **Cross-Platform Testing**: Same test logic for CLI, TUI, Web, Electron +- **Refactoring Safety**: Tests remain valid when implementation changes +- **AI-Powered Testing**: Let agents handle complex interactions +- **Documentation as Tests**: YAML scenarios double as executable specs + +### Use This Skill When + +- Starting a new project and defining expected behaviors +- Refactoring code and need tests that won't break with internal changes +- Testing user-facing applications (CLI tools, TUIs, web apps, desktop apps) +- Writing acceptance criteria that can be automatically verified +- Need tests that non-developers can read and understand +- Want to catch regressions in critical user workflows +- Testing complex multi-step interactions + +### Don't Use This Skill When + +- Need unit tests for internal functions (use test-gap-analyzer instead) +- Testing performance or load characteristics +- Need precise timing or concurrency control +- Testing non-interactive batch processes +- Implementation details matter more than behavior + +## Core Concepts [LEVEL 1] + +### Outside-In Testing Philosophy + +**Traditional Inside-Out Testing**: + +```python +# Tightly coupled to implementation +def test_calculator_add(): + calc = Calculator() + result = calc.add(2, 3) + assert result == 5 + assert calc.history == [(2, 3, 5)] # Knows internal state +``` + +**Agentic Outside-In Testing**: + +```yaml +# Implementation-agnostic behavior verification +scenario: + name: "Calculator Addition" + steps: + - action: launch + target: "./calculator" + - action: send_input + value: "add 2 3" + - action: verify_output + contains: "Result: 5" +``` + +**Benefits**: + +- Tests survive refactoring (internal changes don't break tests) +- Readable by non-developers (YAML is declarative) +- Platform-agnostic (same structure for CLI/TUI/Web/Electron) +- AI agents handle complexity (navigation, timing, screenshots) + +### The Gadugi Agentic Test Framework [LEVEL 2] + +Gadugi-agentic-test is a Python framework that: + +1. **Parses YAML test scenarios** with declarative steps +2. **Dispatches to specialized agents** (CLI, TUI, Web, Electron agents) +3. **Executes actions** (launch, input, click, wait, verify) +4. **Collects evidence** (screenshots, logs, output captures) +5. **Validates outcomes** against expected results +6. **Generates reports** with evidence trails + +**Architecture**: + +``` +YAML Scenario → Scenario Loader → Agent Dispatcher → Execution Engine + ↓ + [CLI Agent, TUI Agent, Web Agent, Electron Agent] + ↓ + Observers → Comprehension Agent + ↓ + Evidence Report +``` + +### Progressive Disclosure Levels [LEVEL 1] + +This skill teaches testing in four levels: + +- **Level 1: Fundamentals** - Basic single-action tests, simple verification +- **Level 2: Intermediate** - Multi-step flows, conditional logic, error handling +- **Level 3: Advanced** - Custom agents, visual regression, performance validation +- **Level 4: Parity & Shadowing** - Side-by-side A/B comparison, remote observable runs, rollout divergence logging + +Each example is marked with its level. Start at Level 1 and progress as needed. + +## Side-by-Side Parity and A/B Validation [LEVEL 2] + +QA Team is the renamed primary skill for what used to be `outside-in-testing`. Use it for standard outside-in scenarios **and** for parity loops where you must compare a legacy implementation to a replacement, or compare approach A to approach B, as an external user would observe them. + +### Use QA Team for parity work when + +- migrating Python to Rust, old CLI to new CLI, or v1 to v2 behavior +- validating a rewrite before switching defaults +- comparing branch A vs branch B using the same user scenarios +- running observable side-by-side sessions in paired virtual TTYs +- logging rollout divergences in shadow mode without failing the run + +### Recommended parity loop + +1. Define shared user-facing scenarios first. +2. Run both implementations in isolated sandboxes. +3. Compare stdout, stderr, exit code, JSON outputs, and filesystem side effects. +4. Re-run in `--observable` mode when you need paired tmux panes for debugging. +5. Use `--ssh-target ` when parity must happen on a remote environment such as `azlin`. +6. Use `--shadow-mode --shadow-log ` during rollout to log divergences without blocking execution. + +### Command pattern to reuse + +If the repo already has a parity harness, extend it instead of inventing a second one. A good baseline is: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary \ + --observable +``` + +For remote parity: + +```bash +python tests/parity/validate_cli_parity.py \ + --ssh-target azlin \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /remote/path/to/legacy-repo \ + --rust-binary /remote/path/to/new-binary +``` + +For rollout shadow logging: + +```bash +python tests/parity/validate_cli_parity.py \ + --scenario tests/parity/scenarios/feature.yaml \ + --python-repo /path/to/legacy-repo \ + --rust-binary /path/to/new-binary \ + --shadow-mode \ + --shadow-log /tmp/feature-shadow.jsonl +``` + +## Quick Start [LEVEL 1] + +### Installation + +**Prerequisites (for native module compilation):** + +```bash +# macOS +xcode-select --install + +# Ubuntu/Debian +sudo apt-get install -y build-essential python3 + +# Windows: Install Visual Studio Build Tools with "Desktop development with C++" +``` + +**Install the framework:** + +```bash +# Install globally for CLI access +npm install -g @gadugi/agentic-test + +# Or install locally in your project +npm install @gadugi/agentic-test + +# Verify installation +gadugi-test --version +``` + +### Your First Test (CLI Example) + +Create `test-hello.yaml`: + +```yaml +scenario: + name: "Hello World CLI Test" + description: "Verify CLI prints greeting" + type: cli + + prerequisites: + - "./hello-world executable exists" + + steps: + - action: launch + target: "./hello-world" + + - action: verify_output + contains: "Hello, World!" + + - action: verify_exit_code + expected: 0 +``` + +Run the test: + +```bash +gadugi-test run test-hello.yaml +``` + +Output: + +``` +✓ Scenario: Hello World CLI Test + ✓ Step 1: Launched ./hello-world + ✓ Step 2: Output contains "Hello, World!" + ✓ Step 3: Exit code is 0 + +PASSED (3/3 steps successful) +Evidence saved to: ./evidence/test-hello-20250116-093045/ +``` + +### Understanding the YAML Structure [LEVEL 1] + +Every test scenario has this structure: + +```yaml +scenario: + name: "Descriptive test name" + description: "What this test verifies" + type: cli | tui | web | electron + + # Optional metadata + tags: [smoke, critical, auth] + timeout: 30s + + # What must be true before test runs + prerequisites: + - "Condition 1" + - "Condition 2" + + # The test steps (executed sequentially) + steps: + - action: action_name + parameter1: value1 + parameter2: value2 + + - action: verify_something + expected: value + + # Optional cleanup + cleanup: + - action: stop_application +``` + +## Application Types and Agents [LEVEL 2] + +### CLI Applications [LEVEL 1] + +**Use Case**: Command-line tools, scripts, build tools, package managers + +**Supported Actions**: + +- `launch` - Start the CLI program +- `send_input` - Send text or commands via stdin +- `send_signal` - Send OS signals (SIGINT, SIGTERM) +- `wait_for_output` - Wait for specific text in stdout/stderr +- `verify_output` - Check stdout/stderr contains/matches expected text +- `verify_exit_code` - Validate process exit code +- `capture_output` - Save output for later verification + +**Example** (see `examples/cli/calculator-basic.yaml`): + +```yaml +scenario: + name: "CLI Calculator Basic Operations" + type: cli + + steps: + - action: launch + target: "./calculator" + args: ["--mode", "interactive"] + + - action: send_input + value: "add 5 3\n" + + - action: verify_output + contains: "Result: 8" + timeout: 2s + + - action: send_input + value: "multiply 4 7\n" + + - action: verify_output + contains: "Result: 28" + + - action: send_input + value: "exit\n" + + - action: verify_exit_code + expected: 0 +``` + +### TUI Applications [LEVEL 1] + +**Use Case**: Terminal user interfaces (htop, vim, tmux, custom dashboard TUIs) + +**Supported Actions**: + +- `launch` - Start TUI application +- `send_keypress` - Send keyboard input (arrow keys, enter, ctrl+c, etc.) +- `wait_for_screen` - Wait for specific text to appear on screen +- `verify_screen` - Check screen contents match expectations +- `capture_screenshot` - Save terminal screenshot (ANSI art) +- `navigate_menu` - Navigate menu structures +- `fill_form` - Fill TUI form fields + +**Example** (see `examples/tui/file-manager-navigation.yaml`): + +```yaml +scenario: + name: "TUI File Manager Navigation" + type: tui + + steps: + - action: launch + target: "./file-manager" + + - action: wait_for_screen + contains: "File Manager v1.0" + timeout: 3s + + - action: send_keypress + value: "down" + times: 3 + + - action: verify_screen + contains: "> documents/" + description: "Third item should be selected" + + - action: send_keypress + value: "enter" + + - action: wait_for_screen + contains: "documents/" + timeout: 2s + + - action: capture_screenshot + save_as: "documents-view.txt" +``` + +### Web Applications [LEVEL 1] + +**Use Case**: Web apps, dashboards, SPAs, admin panels + +**Supported Actions**: + +- `navigate` - Go to URL +- `click` - Click element by selector or text +- `type` - Type into input fields +- `wait_for_element` - Wait for element to appear +- `verify_element` - Check element exists/contains text +- `verify_url` - Validate current URL +- `screenshot` - Capture browser screenshot +- `scroll` - Scroll page or element + +**Example** (see `examples/web/dashboard-smoke-test.yaml`): + +```yaml +scenario: + name: "Dashboard Smoke Test" + type: web + + steps: + - action: navigate + url: "http://localhost:3000/dashboard" + + - action: wait_for_element + selector: "h1.dashboard-title" + timeout: 5s + + - action: verify_element + selector: "h1.dashboard-title" + contains: "Analytics Dashboard" + + - action: verify_element + selector: ".widget-stats" + count: 4 + description: "Should have 4 stat widgets" + + - action: click + selector: "button.refresh-data" + + - action: wait_for_element + selector: ".loading-spinner" + disappears: true + timeout: 10s + + - action: screenshot + save_as: "dashboard-loaded.png" +``` + +### Electron Applications [LEVEL 2] + +**Use Case**: Desktop apps built with Electron (VS Code, Slack, Discord clones) + +**Supported Actions**: + +- `launch` - Start Electron app +- `window_action` - Interact with windows (focus, minimize, close) +- `menu_click` - Click application menu items +- `dialog_action` - Handle native dialogs (open file, save, confirm) +- `ipc_send` - Send IPC message to main process +- `verify_window` - Check window state/properties +- All web actions (since Electron uses Chromium) + +**Example** (see `examples/electron/single-window-basic.yaml`): + +```yaml +scenario: + name: "Electron Single Window Test" + type: electron + + steps: + - action: launch + target: "./dist/my-app" + wait_for_window: true + timeout: 10s + + - action: verify_window + title: "My Application" + visible: true + + - action: menu_click + path: ["File", "New Document"] + + - action: wait_for_element + selector: ".document-editor" + + - action: type + selector: ".document-editor" + value: "Hello from test" + + - action: menu_click + path: ["File", "Save"] + + - action: dialog_action + type: save_file + filename: "test-document.txt" + + - action: verify_window + title_contains: "test-document.txt" +``` + +## Test Scenario Anatomy [LEVEL 2] + +### Metadata Section + +```yaml +scenario: + name: "Clear descriptive name" + description: "Detailed explanation of what this test verifies" + type: cli | tui | web | electron + + # Optional fields + tags: [smoke, regression, auth, payment] + priority: high | medium | low + timeout: 60s # Overall scenario timeout + retry_on_failure: 2 # Retry count + + # Environment requirements + environment: + variables: + API_URL: "http://localhost:8080" + DEBUG: "true" + files: + - "./config.json must exist" +``` + +### Prerequisites + +Prerequisites are conditions that must be true before the test runs. The framework validates these before execution. + +```yaml +prerequisites: + - "./application binary exists" + - "Port 8080 is available" + - "Database is running" + - "User account test@example.com exists" + - "File ./test-data.json exists" +``` + +If prerequisites fail, the test is skipped (not failed). + +### Steps + +Steps execute sequentially. Each step has: + +- **action**: Required - the action to perform +- **Parameters**: Action-specific parameters +- **description**: Optional - human-readable explanation +- **timeout**: Optional - step-specific timeout +- **continue_on_failure**: Optional - don't fail scenario if step fails + +```yaml +steps: + # Simple action + - action: launch + target: "./app" + + # Action with multiple parameters + - action: verify_output + contains: "Success" + timeout: 5s + description: "App should print success message" + + # Continue even if this fails + - action: click + selector: ".optional-button" + continue_on_failure: true +``` + +### Verification Actions [LEVEL 1] + +Verification actions check expected outcomes. They fail the test if expectations aren't met. + +**Common Verifications**: + +```yaml +# CLI: Check output contains text +- action: verify_output + contains: "Expected text" + +# CLI: Check output matches regex +- action: verify_output + matches: "Result: \\d+" + +# CLI: Check exit code +- action: verify_exit_code + expected: 0 + +# Web/TUI: Check element exists +- action: verify_element + selector: ".success-message" + +# Web/TUI: Check element contains text +- action: verify_element + selector: "h1" + contains: "Welcome" + +# Web: Check URL +- action: verify_url + equals: "http://localhost:3000/dashboard" + +# Web: Check element count +- action: verify_element + selector: ".list-item" + count: 5 + +# Electron: Check window state +- action: verify_window + title: "My App" + visible: true + focused: true +``` + +### Cleanup Section + +Cleanup runs after all steps complete (success or failure). Use for teardown actions. + +```yaml +cleanup: + - action: stop_application + force: true + + - action: delete_file + path: "./temp-test-data.json" + + - action: reset_database + connection: "test_db" +``` + +## Advanced Patterns [LEVEL 2] + +### Conditional Logic + +Execute steps based on conditions: + +```yaml +steps: + - action: launch + target: "./app" + + - action: verify_output + contains: "Login required" + id: login_check + + # Only run if login_check passed + - action: send_input + value: "login admin password123\n" + condition: login_check.passed +``` + +### Variables and Templating [LEVEL 2] + +Define variables and use them throughout the scenario: + +```yaml +scenario: + name: "Test with Variables" + type: cli + + variables: + username: "testuser" + api_url: "http://localhost:8080" + + steps: + - action: launch + target: "./app" + args: ["--api", "${api_url}"] + + - action: send_input + value: "login ${username}\n" + + - action: verify_output + contains: "Welcome, ${username}!" +``` + +### Loops and Repetition [LEVEL 2] + +Repeat actions multiple times: + +```yaml +steps: + - action: launch + target: "./app" + + # Repeat action N times + - action: send_keypress + value: "down" + times: 5 + + # Loop over list + - action: send_input + value: "${item}\n" + for_each: + - "apple" + - "banana" + - "cherry" +``` + +### Error Handling [LEVEL 2] + +Handle expected errors gracefully: + +```yaml +steps: + - action: send_input + value: "invalid command\n" + + # Verify error message appears + - action: verify_output + contains: "Error: Unknown command" + expected_failure: true + + # App should still be running + - action: verify_running + expected: true +``` + +### Multi-Step Workflows [LEVEL 2] + +Complex scenarios with multiple phases: + +```yaml +scenario: + name: "E-commerce Purchase Flow" + type: web + + steps: + # Phase 1: Authentication + - action: navigate + url: "http://localhost:3000/login" + + - action: type + selector: "#username" + value: "test@example.com" + + - action: type + selector: "#password" + value: "password123" + + - action: click + selector: "button[type=submit]" + + - action: wait_for_url + contains: "/dashboard" + + # Phase 2: Product Selection + - action: navigate + url: "http://localhost:3000/products" + + - action: click + text: "Add to Cart" + nth: 1 + + - action: verify_element + selector: ".cart-badge" + contains: "1" + + # Phase 3: Checkout + - action: click + selector: ".cart-icon" + + - action: click + text: "Proceed to Checkout" + + - action: fill_form + fields: + "#shipping-address": "123 Test St" + "#city": "Testville" + "#zip": "12345" + + - action: click + selector: "#place-order" + + - action: wait_for_element + selector: ".order-confirmation" + timeout: 10s + + - action: verify_element + selector: ".order-number" + exists: true +``` + +## Level 3: Advanced Topics [LEVEL 3] + +### Custom Comprehension Agents + +The framework uses AI agents to interpret application output and determine if tests pass. You can customize these agents for domain-specific logic. + +**Default Comprehension Agent**: + +- Observes raw output (text, HTML, screenshots) +- Applies general reasoning to verify expectations +- Returns pass/fail with explanation + +**Custom Comprehension Agent** (see `examples/custom-agents/custom-comprehension-agent.yaml`): + +```yaml +scenario: + name: "Financial Dashboard Test with Custom Agent" + type: web + + # Define custom comprehension logic + comprehension_agent: + model: "gpt-4" + system_prompt: | + You are a financial data validator. When verifying dashboard content: + 1. All monetary values must use proper formatting ($1,234.56) + 2. Percentages must include % symbol + 3. Dates must be in MM/DD/YYYY format + 4. Negative values must be red + 5. Chart data must be logically consistent + + Be strict about formatting and data consistency. + + examples: + - input: "Total Revenue: 45000" + output: "FAIL - Missing currency symbol and comma separator" + - input: "Total Revenue: $45,000.00" + output: "PASS - Correctly formatted" + + steps: + - action: navigate + url: "http://localhost:3000/financial-dashboard" + + - action: verify_element + selector: ".revenue-widget" + use_custom_comprehension: true + description: "Revenue should be properly formatted" +``` + +### Visual Regression Testing [LEVEL 3] + +Compare screenshots against baseline images: + +```yaml +scenario: + name: "Visual Regression - Homepage" + type: web + + steps: + - action: navigate + url: "http://localhost:3000" + + - action: wait_for_element + selector: ".page-loaded" + + - action: screenshot + save_as: "homepage.png" + + - action: visual_compare + screenshot: "homepage.png" + baseline: "./baselines/homepage-baseline.png" + threshold: 0.05 # 5% difference allowed + highlight_differences: true +``` + +### Performance Validation [LEVEL 3] + +Measure and validate performance metrics: + +```yaml +scenario: + name: "Performance - Dashboard Load Time" + type: web + + performance: + metrics: + - page_load_time + - first_contentful_paint + - time_to_interactive + + steps: + - action: navigate + url: "http://localhost:3000/dashboard" + measure_timing: true + + - action: verify_performance + metric: page_load_time + less_than: 3000 # 3 seconds + + - action: verify_performance + metric: first_contentful_paint + less_than: 1500 # 1.5 seconds +``` + +### Multi-Window Coordination (Electron) [LEVEL 3] + +Test applications with multiple windows: + +```yaml +scenario: + name: "Multi-Window Chat Application" + type: electron + + steps: + - action: launch + target: "./chat-app" + + - action: menu_click + path: ["Window", "New Chat"] + + - action: verify_window + count: 2 + + - action: window_action + window: 1 + action: focus + + - action: type + selector: ".message-input" + value: "Hello from window 1" + + - action: click + selector: ".send-button" + + - action: window_action + window: 2 + action: focus + + - action: wait_for_element + selector: ".message" + contains: "Hello from window 1" + timeout: 5s +``` + +### IPC Testing (Electron) [LEVEL 3] + +Test Inter-Process Communication between renderer and main: + +```yaml +scenario: + name: "Electron IPC Communication" + type: electron + + steps: + - action: launch + target: "./my-app" + + - action: ipc_send + channel: "get-system-info" + + - action: ipc_expect + channel: "system-info-reply" + timeout: 3s + + - action: verify_ipc_payload + contains: + platform: "darwin" + arch: "x64" +``` + +### Custom Reporters [LEVEL 3] + +Generate custom test reports: + +```yaml +scenario: + name: "Test with Custom Reporting" + type: cli + + reporting: + format: custom + template: "./report-template.html" + include: + - screenshots + - logs + - timing_data + - video_recording + + email: + enabled: true + recipients: ["team@example.com"] + on_failure_only: true + + steps: + # ... test steps ... +``` + +## Framework Integration [LEVEL 2] + +### Running Tests + +**Single test**: + +```bash +gadugi-test run test-scenario.yaml +``` + +**Multiple tests**: + +```bash +gadugi-test run tests/*.yaml +``` + +**With options**: + +```bash +gadugi-test run test.yaml \ + --verbose \ + --evidence-dir ./test-evidence \ + --retry 2 \ + --timeout 60s +``` + +### CI/CD Integration + +**GitHub Actions** (`.github/workflows/agentic-tests.yml`): + +```yaml +name: Agentic Tests + +on: [push, pull_request] + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + + - name: Install gadugi-agentic-test + run: npm install -g @gadugi/agentic-test + + - name: Run tests + run: gadugi-test run tests/agentic/*.yaml + + - name: Upload evidence + if: always() + uses: actions/upload-artifact@v3 + with: + name: test-evidence + path: ./evidence/ +``` + +### Evidence Collection + +The framework automatically collects evidence for debugging: + +``` +evidence/ + scenario-name-20250116-093045/ + ├── scenario.yaml # Original test scenario + ├── execution-log.json # Detailed execution log + ├── screenshots/ # All captured screenshots + │ ├── step-1.png + │ ├── step-3.png + │ └── step-5.png + ├── output-captures/ # CLI/TUI output + │ ├── stdout.txt + │ └── stderr.txt + ├── timing.json # Performance metrics + └── report.html # Human-readable report +``` + +## Best Practices [LEVEL 2] + +### 1. Start Simple, Add Complexity + +Begin with basic smoke tests, then add detail: + +```yaml +# Level 1: Basic smoke test +steps: + - action: launch + target: "./app" + - action: verify_output + contains: "Ready" + +# Level 2: Add interaction +steps: + - action: launch + target: "./app" + - action: send_input + value: "command\n" + - action: verify_output + contains: "Success" + +# Level 3: Add error handling and edge cases +steps: + - action: launch + target: "./app" + - action: send_input + value: "invalid\n" + - action: verify_output + contains: "Error" + - action: send_input + value: "command\n" + - action: verify_output + contains: "Success" +``` + +### 2. Use Descriptive Names and Descriptions + +```yaml +# Bad +scenario: + name: "Test 1" + steps: + - action: click + selector: "button" + +# Good +scenario: + name: "User Login Flow - Valid Credentials" + description: "Verifies user can log in with valid email and password" + steps: + - action: click + selector: "button[type=submit]" + description: "Submit login form" +``` + +### 3. Verify Critical Paths Only + +Don't test every tiny detail. Focus on user-facing behavior: + +```yaml +# Bad - Tests implementation details +- action: verify_element + selector: ".internal-cache-status" + contains: "initialized" + +# Good - Tests user-visible behavior +- action: verify_element + selector: ".welcome-message" + contains: "Welcome back" +``` + +### 4. Use Prerequisites for Test Dependencies + +```yaml +scenario: + name: "User Profile Edit" + + prerequisites: + - "User testuser@example.com exists" + - "User is logged in" + - "Database is seeded with test data" + + steps: + # Test assumes prerequisites are met + - action: navigate + url: "/profile" +``` + +### 5. Keep Tests Independent + +Each test should set up its own state and clean up: + +```yaml +scenario: + name: "Create Document" + + steps: + # Create test user (don't assume exists) + - action: api_call + endpoint: "/api/users" + method: POST + data: { email: "test@example.com" } + + # Run test + - action: navigate + url: "/documents/new" + # ... test steps ... + + cleanup: + # Remove test user + - action: api_call + endpoint: "/api/users/test@example.com" + method: DELETE +``` + +### 6. Use Tags for Organization + +```yaml +scenario: + name: "Critical Payment Flow" + tags: [smoke, critical, payment, e2e] + # Run with: gadugi-test run --tags critical +``` + +### 7. Add Timeouts Strategically + +```yaml +steps: + # Quick operations - short timeout + - action: click + selector: "button" + timeout: 2s + + # Network operations - longer timeout + - action: wait_for_element + selector: ".data-loaded" + timeout: 10s + + # Complex operations - generous timeout + - action: verify_element + selector: ".report-generated" + timeout: 60s +``` + +## Testing Strategies [LEVEL 2] + +### Smoke Tests + +Minimal tests that verify critical functionality works: + +```yaml +scenario: + name: "Smoke Test - Application Starts" + tags: [smoke] + + steps: + - action: launch + target: "./app" + - action: verify_output + contains: "Ready" + timeout: 5s +``` + +Run before every commit: `gadugi-test run --tags smoke` + +### Happy Path Tests + +Test the ideal user journey: + +```yaml +scenario: + name: "Happy Path - User Registration" + + steps: + - action: navigate + url: "/register" + - action: type + selector: "#email" + value: "newuser@example.com" + - action: type + selector: "#password" + value: "SecurePass123!" + - action: click + selector: "button[type=submit]" + - action: wait_for_url + contains: "/welcome" +``` + +### Error Path Tests + +Verify error handling: + +```yaml +scenario: + name: "Error Path - Invalid Login" + + steps: + - action: navigate + url: "/login" + - action: type + selector: "#email" + value: "invalid@example.com" + - action: type + selector: "#password" + value: "wrongpassword" + - action: click + selector: "button[type=submit]" + - action: verify_element + selector: ".error-message" + contains: "Invalid credentials" +``` + +### Regression Tests + +Prevent bugs from reappearing: + +```yaml +scenario: + name: "Regression - Issue #123 Password Reset" + tags: [regression, bug-123] + description: "Verifies password reset email is sent (was broken in v1.2)" + + steps: + - action: navigate + url: "/forgot-password" + - action: type + selector: "#email" + value: "user@example.com" + - action: click + selector: "button[type=submit]" + - action: verify_element + selector: ".success-message" + contains: "Reset email sent" +``` + +## Philosophy Alignment [LEVEL 2] + +This skill follows amplihack's core principles: + +### Ruthless Simplicity + +- **YAML over code**: Declarative tests are simpler than programmatic tests +- **No implementation details**: Tests describe WHAT, not HOW +- **Minimal boilerplate**: Each test is focused and concise + +### Modular Design (Bricks & Studs) + +- **Self-contained scenarios**: Each YAML file is independent +- **Clear contracts**: Steps have well-defined inputs/outputs +- **Composable actions**: Reuse actions across different test types + +### Zero-BS Implementation + +- **No stubs**: Every example in this skill is a complete, runnable test +- **Working defaults**: Tests run with minimal configuration +- **Clear errors**: Framework provides actionable error messages + +### Outside-In Thinking + +- **User perspective**: Tests verify behavior users care about +- **Implementation agnostic**: Refactoring doesn't break tests +- **Behavior-driven**: Focus on outcomes, not internals + +## Common Pitfalls and Solutions [LEVEL 2] + +### Pitfall 1: Over-Specifying + +**Problem**: Test breaks when UI changes slightly + +```yaml +# Bad - Too specific +- action: verify_element + selector: "div.container > div.row > div.col-md-6 > span.text-primary.font-bold" + contains: "Welcome" +``` + +**Solution**: Use flexible selectors + +```yaml +# Good - Focused on behavior +- action: verify_element + selector: ".welcome-message" + contains: "Welcome" +``` + +### Pitfall 2: Missing Waits + +**Problem**: Test fails intermittently due to timing + +```yaml +# Bad - No wait for async operation +- action: click + selector: ".load-data-button" +- action: verify_element + selector: ".data-table" # May not exist yet! +``` + +**Solution**: Always wait for dynamic content + +```yaml +# Good - Wait for element to appear +- action: click + selector: ".load-data-button" +- action: wait_for_element + selector: ".data-table" + timeout: 10s +- action: verify_element + selector: ".data-table" +``` + +### Pitfall 3: Testing Implementation Details + +**Problem**: Test coupled to internal state + +```yaml +# Bad - Tests internal cache state +- action: verify_output + contains: "Cache hit ratio: 85%" +``` + +**Solution**: Test user-visible behavior + +```yaml +# Good - Tests response time +- action: verify_response_time + less_than: 100ms + description: "Fast response indicates caching works" +``` + +### Pitfall 4: Flaky Assertions + +**Problem**: Assertions depend on exact timing or formatting + +```yaml +# Bad - Exact timestamp match will fail +- action: verify_output + contains: "Created at: 2025-11-16 09:30:45" +``` + +**Solution**: Use flexible patterns + +```yaml +# Good - Match pattern, not exact value +- action: verify_output + matches: "Created at: \\d{4}-\\d{2}-\\d{2} \\d{2}:\\d{2}:\\d{2}" +``` + +### Pitfall 5: Not Cleaning Up + +**Problem**: Tests leave artifacts that affect future runs + +```yaml +# Bad - No cleanup +steps: + - action: create_file + path: "./test-data.json" + - action: launch + target: "./app" +``` + +**Solution**: Always use cleanup section + +```yaml +# Good - Cleanup ensures clean slate +steps: + - action: create_file + path: "./test-data.json" + - action: launch + target: "./app" + +cleanup: + - action: delete_file + path: "./test-data.json" +``` + +## Example Library [LEVEL 1] + +This skill includes 15 complete working examples organized by application type and complexity level: + +### CLI Examples + +1. **calculator-basic.yaml** [LEVEL 1] - Simple CLI arithmetic operations +2. **cli-error-handling.yaml** [LEVEL 2] - Error messages and recovery +3. **cli-interactive-session.yaml** [LEVEL 2] - Multi-turn interactive CLI + +### TUI Examples + +4. **file-manager-navigation.yaml** [LEVEL 1] - Basic TUI keyboard navigation +5. **tui-form-validation.yaml** [LEVEL 2] - Complex form filling and validation +6. **tui-performance-monitoring.yaml** [LEVEL 3] - TUI performance dashboard testing + +### Web Examples + +7. **dashboard-smoke-test.yaml** [LEVEL 1] - Simple web dashboard verification +8. **web-authentication-flow.yaml** [LEVEL 2] - Multi-step login workflow +9. **web-visual-regression.yaml** [LEVEL 2] - Screenshot-based visual testing + +### Electron Examples + +10. **single-window-basic.yaml** [LEVEL 1] - Basic Electron window test +11. **multi-window-coordination.yaml** [LEVEL 2] - Multiple window orchestration +12. **electron-menu-testing.yaml** [LEVEL 2] - Application menu interactions +13. **electron-ipc-testing.yaml** [LEVEL 3] - Main/renderer IPC testing + +### Custom Agent Examples + +14. **custom-comprehension-agent.yaml** [LEVEL 3] - Domain-specific validation logic +15. **custom-reporter-integration.yaml** [LEVEL 3] - Custom test reporting + +See `examples/` directory for full example code with inline documentation. + +## Framework Freshness Check [LEVEL 3] + +This skill embeds knowledge of gadugi-agentic-test version 0.1.0. To check if a newer version exists: + +```bash +# Run the freshness check script +python scripts/check-freshness.py + +# Output if outdated: +# WARNING: Embedded framework version is 0.1.0 +# Latest GitHub version is 0.2.5 +# +# New features in 0.2.5: +# - Native Playwright support for web testing +# - Video recording for all test types +# - Parallel test execution +# +# Update with: npm update -g @gadugi/agentic-test +``` + +The script checks the GitHub repository for releases and compares against the embedded version. This ensures you're aware of new features and improvements. + +**When to Update This Skill**: + +- New framework version adds significant features +- Breaking changes in YAML schema +- New application types supported +- Agent capabilities expand + +## Integration with Other Skills [LEVEL 2] + +### Works Well With + +**test-gap-analyzer**: + +- Use test-gap-analyzer to find untested functions +- Write outside-in tests for critical user-facing paths +- Use unit tests (from test-gap-analyzer) for internal functions + +**philosophy-guardian**: + +- Ensure test YAML follows ruthless simplicity +- Verify tests focus on behavior, not implementation + +**pr-review-assistant**: + +- Include outside-in tests in PR reviews +- Verify tests cover changed functionality +- Check test readability and clarity + +**module-spec-generator**: + +- Generate module specs that include outside-in test scenarios +- Use specs as templates for test YAML + +### Example Combined Workflow + +```bash +# 1. Analyze coverage gaps +claude "Use test-gap-analyzer on ./src" + +# 2. Write outside-in tests for critical paths +claude "Use qa-team to create web tests for authentication" + +# 3. Verify philosophy compliance +claude "Use philosophy-guardian to review new test files" + +# 4. Include in PR +git add tests/agentic/ +git commit -m "Add outside-in tests for auth flow" +``` + +## Troubleshooting [LEVEL 2] + +### Test Times Out + +**Symptom**: Test exceeds timeout and fails + +**Causes**: + +- Application takes longer to start than expected +- Network requests are slow +- Element never appears (incorrect selector) + +**Solutions**: + +```yaml +# Increase timeout +- action: wait_for_element + selector: ".slow-loading-element" + timeout: 30s # Increase from default + +# Add intermediate verification +- action: launch + target: "./app" +- action: wait_for_output + contains: "Initializing..." + timeout: 5s +- action: wait_for_output + contains: "Ready" + timeout: 20s +``` + +### Element Not Found + +**Symptom**: `verify_element` or `click` fails with "element not found" + +**Causes**: + +- Incorrect CSS selector +- Element not yet rendered (timing issue) +- Element in iframe or shadow DOM + +**Solutions**: + +```yaml +# Add wait before interaction +- action: wait_for_element + selector: ".target-element" + timeout: 10s +- action: click + selector: ".target-element" + +# Use more specific selector +- action: click + selector: "button[data-testid='submit-button']" + +# Handle iframe +- action: switch_to_iframe + selector: "iframe#payment-frame" +- action: click + selector: ".pay-now-button" +``` + +### Test Passes Locally, Fails in CI + +**Symptom**: Test works on dev machine but fails in CI environment + +**Causes**: + +- Different screen size (web/Electron) +- Missing dependencies +- Timing differences (slower CI machines) +- Environment variable differences + +**Solutions**: + +```yaml +# Set explicit viewport size (web/Electron) +scenario: + environment: + viewport: + width: 1920 + height: 1080 + +# Add longer timeouts in CI +- action: wait_for_element + selector: ".element" + timeout: 30s # Generous for CI + +# Verify prerequisites +prerequisites: + - "Chrome browser installed" + - "Environment variable API_KEY is set" +``` + +### Output Doesn't Match Expected + +**Symptom**: `verify_output` fails even though output looks correct + +**Causes**: + +- Extra whitespace or newlines +- ANSI color codes in output +- Case sensitivity + +**Solutions**: + +```yaml +# Use flexible matching +- action: verify_output + matches: "Result:\\s+Success" # Allow flexible whitespace + +# Strip ANSI codes +- action: verify_output + contains: "Success" + strip_ansi: true + +# Case-insensitive match +- action: verify_output + contains: "success" + case_sensitive: false +``` + +## Reference: Action Catalog [LEVEL 3] + +### CLI Actions + +| Action | Parameters | Description | +| ------------------ | -------------------------------- | -------------------------------------- | +| `launch` | `target`, `args`, `cwd`, `env` | Start CLI application | +| `send_input` | `value`, `delay` | Send text to stdin | +| `send_signal` | `signal` | Send OS signal (SIGINT, SIGTERM, etc.) | +| `wait_for_output` | `contains`, `matches`, `timeout` | Wait for text in stdout/stderr | +| `verify_output` | `contains`, `matches`, `stream` | Check output content | +| `verify_exit_code` | `expected` | Validate exit code | +| `capture_output` | `save_as`, `stream` | Save output to file | + +### TUI Actions + +| Action | Parameters | Description | +| -------------------- | --------------------------------- | ------------------------ | +| `launch` | `target`, `args`, `terminal_size` | Start TUI application | +| `send_keypress` | `value`, `times`, `modifiers` | Send keyboard input | +| `wait_for_screen` | `contains`, `timeout` | Wait for text on screen | +| `verify_screen` | `contains`, `matches`, `region` | Check screen content | +| `capture_screenshot` | `save_as` | Save terminal screenshot | +| `navigate_menu` | `path` | Navigate menu structure | +| `fill_form` | `fields` | Fill TUI form fields | + +### Web Actions + +| Action | Parameters | Description | +| ------------------ | ----------------------------------------- | ---------------------- | +| `navigate` | `url`, `wait_for_load` | Go to URL | +| `click` | `selector`, `text`, `nth` | Click element | +| `type` | `selector`, `value`, `delay` | Type into input | +| `wait_for_element` | `selector`, `timeout`, `disappears` | Wait for element | +| `verify_element` | `selector`, `contains`, `count`, `exists` | Check element state | +| `verify_url` | `equals`, `contains`, `matches` | Validate URL | +| `screenshot` | `save_as`, `selector`, `full_page` | Capture screenshot | +| `scroll` | `selector`, `direction`, `amount` | Scroll page/element | +| `select_option` | `selector`, `value` | Select dropdown option | +| `checkbox` | `selector`, `checked` | Check/uncheck checkbox | + +### Electron Actions + +| Action | Parameters | Description | +| --------------- | -------------------------------------- | -------------------------- | +| `launch` | `target`, `args`, `wait_for_window` | Start Electron app | +| `window_action` | `window`, `action` | Interact with windows | +| `menu_click` | `path` | Click menu items | +| `dialog_action` | `type`, `action`, `filename` | Handle dialogs | +| `ipc_send` | `channel`, `data` | Send IPC message | +| `ipc_expect` | `channel`, `timeout` | Wait for IPC message | +| `verify_window` | `title`, `visible`, `focused`, `count` | Check window state | +| All web actions | | Electron includes Chromium | + +### Common Parameters + +| Parameter | Type | Description | +| --------------------- | ---------- | ------------------------------------ | +| `timeout` | Duration | Maximum wait time (e.g., "5s", "2m") | +| `description` | String | Human-readable step explanation | +| `continue_on_failure` | Boolean | Don't fail scenario if step fails | +| `id` | String | Step identifier for conditionals | +| `condition` | Expression | Execute step only if condition true | + +## Quick Reference: YAML Template [LEVEL 1] + +```yaml +scenario: + # Required fields + name: "Test Name" + description: "What this test verifies" + type: cli | tui | web | electron + + # Optional metadata + tags: [smoke, critical] + timeout: 60s + + # What must be true before running + prerequisites: + - "Condition 1" + - "Condition 2" + + # Environment setup + environment: + variables: + VAR_NAME: "value" + + # Variables for templating + variables: + username: "testuser" + + # Test steps (executed in order) + steps: + - action: launch + target: "./app" + + - action: send_input + value: "command\n" + + - action: verify_output + contains: "Success" + timeout: 5s + + # Cleanup (always runs) + cleanup: + - action: stop_application +``` + +## Skill Usage Examples [LEVEL 1] + +### Invoke This Skill + +``` +Claude, use the qa-team skill to create a CLI test for my calculator app. + +Claude, use qa-team to generate web tests for the login flow. + +Claude, create TUI tests using the qa-team skill for file navigation. +``` + +### What You'll Get + +1. **Complete YAML test scenario** matching your requirements +2. **Inline documentation** explaining each section +3. **Best practices applied** (timeouts, waits, verification) +4. **Appropriate complexity level** (1, 2, or 3 based on needs) +5. **Instructions for running** the test + +### Example Request and Response + +**User**: "Create a web test for user login with valid credentials" + +**Claude** (using this skill): + +```yaml +scenario: + name: "User Login - Valid Credentials" + description: "Verifies user can successfully log in with valid email and password" + type: web + tags: [smoke, auth, critical] + + prerequisites: + - "User test@example.com exists with password 'TestPass123!'" + - "Application is running on http://localhost:3000" + + steps: + # Navigate to login page + - action: navigate + url: "http://localhost:3000/login" + description: "Open login page" + + # Wait for page to fully load + - action: wait_for_element + selector: "form.login-form" + timeout: 5s + description: "Wait for login form to render" + + # Fill in email field + - action: type + selector: "input[name='email']" + value: "test@example.com" + description: "Enter email address" + + # Fill in password field + - action: type + selector: "input[name='password']" + value: "TestPass123!" + description: "Enter password" + + # Submit form + - action: click + selector: "button[type='submit']" + description: "Click login button" + + # Wait for redirect to dashboard + - action: wait_for_url + contains: "/dashboard" + timeout: 10s + description: "Should redirect to dashboard after successful login" + + # Verify user is logged in + - action: verify_element + selector: ".user-profile" + contains: "test@example.com" + description: "User email should appear in profile widget" + + # Capture evidence + - action: screenshot + save_as: "successful-login.png" + description: "Save screenshot of logged-in state" +# To run this test: +# 1. Save as test-login-valid.yaml +# 2. Run: gadugi-test run test-login-valid.yaml +# 3. Check evidence in ./evidence/user-login-valid-credentials-TIMESTAMP/ +``` + +## Related Resources [LEVEL 1] + +### Official Documentation + +- **GitHub Repository**: https://github.com/rysweet/gadugi-agentic-test +- **Framework Docs**: See repo README and docs/ folder +- **Issue Tracker**: https://github.com/rysweet/MicrosoftHackathon2025-AgenticCoding/issues/1356 + +## Level 4: Shadow Environment Integration [LEVEL 4] + +Run your outside-in tests in **isolated shadow environments** to validate changes before pushing. This combines the behavioral testing power of gadugi-agentic-test with the clean-state isolation of shadow environments. + +### Why Use Shadow Environments for Testing + +1. **Clean State**: Fresh container, no host pollution +2. **Local Changes**: Test uncommitted code exactly as-is +3. **Multi-Repo**: Coordinate changes across multiple repos +4. **CI Parity**: What shadow sees ≈ what CI will see + +### Shadow Testing Workflow + +For complete shadow environment documentation, see the **shadow-testing** skill. Here's how to integrate it with outside-in tests: + +#### Pattern 1: CLI Tests in Shadow (Amplifier) + +```python +# Create shadow with your local library changes +shadow.create(local_sources=["~/repos/my-lib:org/my-lib"]) + +# Run outside-in test scenarios inside shadow +shadow.exec(shadow_id, "gadugi-test run test-scenario.yaml") + +# Extract evidence +shadow.extract(shadow_id, "/evidence", "./test-evidence") + +# Cleanup +shadow.destroy(shadow_id) +``` + +#### Pattern 2: CLI Tests in Shadow (Standalone) + +```bash +# Create shadow with local changes +amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name test + +# Run your test scenarios +amplifier-shadow exec test "gadugi-test run test-scenario.yaml" + +# Extract results +amplifier-shadow extract test /evidence ./test-evidence + +# Cleanup +amplifier-shadow destroy test +``` + +#### Pattern 3: Multi-Repo Integration Test + +```yaml +# test-multi-repo.yaml +scenario: + name: "Multi-Repo Integration Test" + type: cli + + prerequisites: + - "Shadow environment with core-lib and cli-tool" + + steps: + - action: launch + target: "cli-tool" + + - action: send_input + value: "process --lib core-lib\n" + + - action: verify_output + contains: "Success: Using core-lib" +``` + +```bash +# Setup shadow with both repos +amplifier-shadow create \ + --local ~/repos/core-lib:org/core-lib \ + --local ~/repos/cli-tool:org/cli-tool \ + --name multi-test + +# Run test that exercises both +amplifier-shadow exec multi-test "gadugi-test run test-multi-repo.yaml" +``` + +#### Pattern 4: Web App Testing in Shadow + +```yaml +# test-web-app.yaml +scenario: + name: "Web App with Local Library" + type: web + + steps: + - action: navigate + url: "http://localhost:3000" + + - action: click + selector: "button.process" + + - action: verify_element + selector: ".result" + contains: "Processed with v2.0" # Your local version +``` + +```bash +# Shadow with library changes +amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name web-test + +# Start web app inside shadow (uses your local lib) +amplifier-shadow exec web-test " + cd /workspace && + git clone https://github.com/org/web-app && + cd web-app && + npm install && # Pulls your local my-lib via git URL rewriting + npm start & +" + +# Wait for app to start, then run tests +amplifier-shadow exec web-test "sleep 5 && gadugi-test run test-web-app.yaml" +``` + +### Verification Best Practices + +When running tests in shadow, always verify your local sources are being used: + +```bash +# After shadow.create, check snapshot commits +shadow.status(shadow_id) +# Shows: snapshot_commits: {"org/my-lib": "abc1234..."} + +# When your test installs dependencies, verify commit matches +# Look in test output for: my-lib @ git+...@abc1234 +``` + +### Complete Example: Library Change Validation + +```yaml +# test-library-change.yaml - Outside-in test +scenario: + name: "Validate Library Breaking Change" + type: cli + description: "Test that dependent app still works with new library API" + + steps: + - action: launch + target: "/workspace/org/dependent-app/cli.py" + + - action: send_input + value: "process data.json\n" + + - action: verify_output + contains: "Processed successfully" + description: "New library API should still work" + + - action: verify_exit_code + expected: 0 +``` + +```bash +# Complete workflow +# 1. Create shadow with your breaking change +amplifier-shadow create --local ~/repos/my-lib:org/my-lib --name breaking-test + +# 2. Install dependent app (pulls your local lib) +amplifier-shadow exec breaking-test " + cd /workspace && + git clone https://github.com/org/dependent-app && + cd dependent-app && + pip install -e . && # This installs git+https://github.com/org/my-lib (your local version) + echo 'Ready to test' +" + +# 3. Run outside-in test +amplifier-shadow exec breaking-test "gadugi-test run test-library-change.yaml" + +# If test passes, your breaking change is compatible! +# If test fails, you've caught the issue before pushing +``` + +### When to Use Shadow Integration + +Use shadow + outside-in tests when: + +- ✅ Testing library changes with dependent projects +- ✅ Validating multi-repo coordinated changes +- ✅ Need clean-state validation before pushing +- ✅ Want to catch integration issues early +- ✅ Testing that setup/install procedures work + +Don't use shadow for: + +- ❌ Simple unit tests (too much overhead) +- ❌ Tests of already-committed code (shadow adds no value) +- ❌ Performance testing (container overhead skews results) + +### Learn More + +For complete shadow environment documentation, including: + +- Shell scripts for DIY setup +- Docker Compose examples +- Multi-language support (Python, Node, Rust, Go) +- Troubleshooting and verification techniques + +**Load the shadow-testing skill**: + +``` +Claude, use the shadow-testing skill to set up a shadow environment +``` + +Or for Amplifier users, the shadow tool is built-in: + +```python +shadow.create(local_sources=["~/repos/lib:org/lib"]) +``` + +--- + +### Related Skills + +- **shadow-testing**: Complete shadow environment setup and usage +- **test-gap-analyzer**: Find untested code paths +- **philosophy-guardian**: Review test philosophy compliance +- **pr-review-assistant**: Include tests in PR reviews +- **module-spec-generator**: Generate specs with test scenarios + +### Further Reading + +- Outside-in vs inside-out testing approaches +- Behavior-driven development (BDD) principles +- AI-powered testing best practices +- Test automation patterns +- Shadow environment testing methodology + +## Changelog [LEVEL 3] + +### Version 1.1.0 (2026-01-29) + +- **NEW**: Level 4 - Shadow Environment Integration +- Added complete shadow testing workflow patterns +- Integration examples for Amplifier native and standalone CLI +- Multi-repo integration test patterns +- Web app testing in shadow environments +- Complete workflow example for library change validation +- References to shadow-testing skill for deep-dive documentation + +### Version 1.0.0 (2025-11-16) + +- Initial skill release +- Support for CLI, TUI, Web, and Electron applications +- 15 complete working examples +- Progressive disclosure levels (1, 2, 3) +- Embedded gadugi-agentic-test framework documentation (v0.1.0) +- Freshness check script for version monitoring +- Full integration with amplihack philosophy +- Comprehensive troubleshooting guide +- Action reference catalog + +--- + +**Remember**: Outside-in tests verify WHAT your application does, not HOW it does it. Focus on user-visible behavior, and your tests will remain stable across refactorings while providing meaningful validation of critical workflows. + +Start at Level 1 with simple smoke tests, and progressively add complexity only when needed. The framework's AI agents handle the hard parts - you just describe what should happen. diff --git a/docs/claude/skills/outside-in-testing/examples/cli/calculator-basic.yaml b/docs/claude/skills/qa-team/examples/cli/calculator-basic.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/cli/calculator-basic.yaml rename to docs/claude/skills/qa-team/examples/cli/calculator-basic.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/cli/cli-error-handling.yaml b/docs/claude/skills/qa-team/examples/cli/cli-error-handling.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/cli/cli-error-handling.yaml rename to docs/claude/skills/qa-team/examples/cli/cli-error-handling.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml b/docs/claude/skills/qa-team/examples/cli/cli-interactive-session.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/cli/cli-interactive-session.yaml rename to docs/claude/skills/qa-team/examples/cli/cli-interactive-session.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml b/docs/claude/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/custom-agents/custom-comprehension-agent.yaml rename to docs/claude/skills/qa-team/examples/custom-agents/custom-comprehension-agent.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml b/docs/claude/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/custom-agents/custom-reporter-integration.yaml rename to docs/claude/skills/qa-team/examples/custom-agents/custom-reporter-integration.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml b/docs/claude/skills/qa-team/examples/electron/electron-ipc-testing.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/electron/electron-ipc-testing.yaml rename to docs/claude/skills/qa-team/examples/electron/electron-ipc-testing.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml b/docs/claude/skills/qa-team/examples/electron/electron-menu-testing.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/electron/electron-menu-testing.yaml rename to docs/claude/skills/qa-team/examples/electron/electron-menu-testing.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml b/docs/claude/skills/qa-team/examples/electron/multi-window-coordination.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/electron/multi-window-coordination.yaml rename to docs/claude/skills/qa-team/examples/electron/multi-window-coordination.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/electron/single-window-basic.yaml b/docs/claude/skills/qa-team/examples/electron/single-window-basic.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/electron/single-window-basic.yaml rename to docs/claude/skills/qa-team/examples/electron/single-window-basic.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml b/docs/claude/skills/qa-team/examples/tui/file-manager-navigation.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/tui/file-manager-navigation.yaml rename to docs/claude/skills/qa-team/examples/tui/file-manager-navigation.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/tui/tui-form-validation.yaml b/docs/claude/skills/qa-team/examples/tui/tui-form-validation.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/tui/tui-form-validation.yaml rename to docs/claude/skills/qa-team/examples/tui/tui-form-validation.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml b/docs/claude/skills/qa-team/examples/tui/tui-performance-monitoring.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/tui/tui-performance-monitoring.yaml rename to docs/claude/skills/qa-team/examples/tui/tui-performance-monitoring.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml b/docs/claude/skills/qa-team/examples/web/dashboard-smoke-test.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/web/dashboard-smoke-test.yaml rename to docs/claude/skills/qa-team/examples/web/dashboard-smoke-test.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/web/web-authentication-flow.yaml b/docs/claude/skills/qa-team/examples/web/web-authentication-flow.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/web/web-authentication-flow.yaml rename to docs/claude/skills/qa-team/examples/web/web-authentication-flow.yaml diff --git a/docs/claude/skills/outside-in-testing/examples/web/web-visual-regression.yaml b/docs/claude/skills/qa-team/examples/web/web-visual-regression.yaml similarity index 100% rename from docs/claude/skills/outside-in-testing/examples/web/web-visual-regression.yaml rename to docs/claude/skills/qa-team/examples/web/web-visual-regression.yaml diff --git a/docs/claude/skills/outside-in-testing/scripts/check-freshness.py b/docs/claude/skills/qa-team/scripts/check-freshness.py similarity index 100% rename from docs/claude/skills/outside-in-testing/scripts/check-freshness.py rename to docs/claude/skills/qa-team/scripts/check-freshness.py diff --git a/docs/claude/skills/outside-in-testing/tests/test_skill_examples.py b/docs/claude/skills/qa-team/tests/test_skill_examples.py similarity index 98% rename from docs/claude/skills/outside-in-testing/tests/test_skill_examples.py rename to docs/claude/skills/qa-team/tests/test_skill_examples.py index 5b57472e6..7ffeb669b 100644 --- a/docs/claude/skills/outside-in-testing/tests/test_skill_examples.py +++ b/docs/claude/skills/qa-team/tests/test_skill_examples.py @@ -1,5 +1,5 @@ """ -Tests for outside-in-testing skill example YAML files. +Tests for qa-team skill example YAML files. Validates that all example YAML files are: - Valid YAML syntax @@ -276,6 +276,7 @@ def test_skill_has_yaml_frontmatter(self): # Check required frontmatter fields assert "name" in metadata, "Frontmatter missing 'name'" + assert metadata["name"] == "qa-team", "Frontmatter name should be 'qa-team'" assert "description" in metadata, "Frontmatter missing 'description'" assert "version" in metadata, "Frontmatter missing 'version'" assert "embedded_framework_version" in metadata, ( diff --git a/src/amplihack/known_skills.py b/src/amplihack/known_skills.py index 76f2331c6..9971d7bf4 100644 --- a/src/amplihack/known_skills.py +++ b/src/amplihack/known_skills.py @@ -69,6 +69,7 @@ "n-version-workflow", "novelist-analyst", "outside-in-testing", + "qa-team", "pdf", "philosopher-analyst", "philosophy-compliance-workflow", diff --git a/tests/skills/test_qa_team_skill.py b/tests/skills/test_qa_team_skill.py new file mode 100644 index 000000000..4aa1ce721 --- /dev/null +++ b/tests/skills/test_qa_team_skill.py @@ -0,0 +1,87 @@ +"""Regression tests for the qa-team skill rename and alias layout.""" + +from pathlib import Path + +import yaml + + +REPO_ROOT = Path(__file__).resolve().parents[2] +SKILL_MIRRORS = [ + REPO_ROOT / ".claude" / "skills", + REPO_ROOT / "amplifier-bundle" / "skills", + REPO_ROOT / "docs" / "claude" / "skills", +] + + +def load_frontmatter(path: Path) -> dict: + """Load YAML frontmatter from a markdown file.""" + content = path.read_text(encoding="utf-8") + parts = content.split("---", 2) + assert len(parts) >= 3, f"{path} is missing closed YAML frontmatter" + metadata = yaml.safe_load(parts[1]) + assert isinstance(metadata, dict), f"{path} frontmatter should parse to a mapping" + return metadata + + +def test_known_skills_registry_includes_qa_team(): + """The known-skills registry should expose qa-team as a first-class skill.""" + known_skills = (REPO_ROOT / "src" / "amplihack" / "known_skills.py").read_text( + encoding="utf-8" + ) + assert '"qa-team"' in known_skills + assert '"outside-in-testing"' in known_skills + + +def test_bundle_registers_both_primary_and_alias(): + """The bundle index should advertise both the new primary skill and alias.""" + bundle = (REPO_ROOT / "amplifier-bundle" / "bundle.md").read_text(encoding="utf-8") + assert "outside-in-testing: { path: skills/outside-in-testing/SKILL.md }" in bundle + assert "qa-team: { path: skills/qa-team/SKILL.md }" in bundle + + +def test_qa_team_primary_skill_is_present_in_all_mirrors(): + """All shipped skill mirrors should contain qa-team with the new frontmatter name.""" + for skills_dir in SKILL_MIRRORS: + skill_file = skills_dir / "qa-team" / "SKILL.md" + metadata = load_frontmatter(skill_file) + content = skill_file.read_text(encoding="utf-8") + + assert metadata["name"] == "qa-team" + assert "--observable" in content + assert "--ssh-target" in content + assert "--shadow-mode" in content + assert "outside-in-testing" in content + + +def test_outside_in_testing_alias_points_to_qa_team_in_all_mirrors(): + """The legacy skill name should remain available as an alias that redirects to qa-team.""" + for skills_dir in SKILL_MIRRORS: + alias_dir = skills_dir / "outside-in-testing" + alias_skill = alias_dir / "SKILL.md" + metadata = load_frontmatter(alias_skill) + content = alias_skill.read_text(encoding="utf-8") + + assert metadata["name"] == "outside-in-testing" + assert "alias for `qa-team`" in content + + for name in ["README.md", "examples", "scripts", "tests"]: + alias_path = alias_dir / name + assert alias_path.is_symlink(), f"{alias_path} should be a symlink" + + +def test_workflow_and_profile_prefer_qa_team_name(): + """Core workflow surfaces should now recommend qa-team for new invocations.""" + default_workflow = ( + REPO_ROOT / "amplifier-bundle" / "recipes" / "default-workflow.yaml" + ).read_text(encoding="utf-8") + coding_profile = (REPO_ROOT / ".claude" / "profiles" / "coding.yaml").read_text( + encoding="utf-8" + ) + generator_skill = ( + REPO_ROOT / ".claude" / "skills" / "e2e-outside-in-test-generator" / "SKILL.md" + ).read_text(encoding="utf-8") + + assert 'Skill(skill="qa-team")' in default_workflow + assert "`outside-in-testing` remains an alias" in default_workflow + assert '- "qa-team"' in coding_profile + assert "qa-team (primary methodology validation" in generator_skill From 3c62ee4501b8bf9acfd55ec98d97b81f8063002f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Mar 2026 15:08:03 +0000 Subject: [PATCH 2/4] Sync smart-test qa-team references Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- amplifier-bundle/skills/smart-test/README.md | 2 +- amplifier-bundle/skills/smart-test/SKILL.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/amplifier-bundle/skills/smart-test/README.md b/amplifier-bundle/skills/smart-test/README.md index 868b90ac9..80e95d3a8 100644 --- a/amplifier-bundle/skills/smart-test/README.md +++ b/amplifier-bundle/skills/smart-test/README.md @@ -94,7 +94,7 @@ User: Rebuild test mapping cache ## Related Skills - `test-gap-analyzer`: Find untested code -- `outside-in-testing`: Create E2E tests +- `qa-team`: Create E2E and parity tests (`outside-in-testing` alias supported) - `pre-commit-diagnostic`: Fix hook failures --- diff --git a/amplifier-bundle/skills/smart-test/SKILL.md b/amplifier-bundle/skills/smart-test/SKILL.md index 5aa0126c6..17d54a568 100644 --- a/amplifier-bundle/skills/smart-test/SKILL.md +++ b/amplifier-bundle/skills/smart-test/SKILL.md @@ -28,7 +28,7 @@ invokes: - type: skill name: test-gap-analyzer - type: skill - name: outside-in-testing + name: qa-team - type: skill name: pre-commit-diagnostic - type: subagent @@ -360,7 +360,7 @@ Works with existing pytest markers from pyproject.toml: ## Complementary Skills - **test-gap-analyzer**: Identifies missing tests -- **outside-in-testing**: Creates E2E test scenarios +- **qa-team**: Creates E2E and parity test scenarios (`outside-in-testing` alias supported) - **tester agent**: Writes new tests for gaps - **pre-commit-diagnostic**: Fixes pre-commit failures From 2d30edd4b7fcc255d798fd01008274fa738ceb58 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Tue, 10 Mar 2026 15:16:08 +0000 Subject: [PATCH 3/4] [skip ci] chore: Auto-bump patch version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0de4dec46..701f70797 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ backend-path = ["."] [project] name = "amplihack" -version = "0.6.20" +version = "0.6.21" description = "Amplifier bundle for agentic coding with comprehensive skills, recipes, and workflows" requires-python = ">=3.11" dependencies = [ From d6a43b34fb5bfacd62de0355e032e03a77101884 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 10 Mar 2026 15:17:05 +0000 Subject: [PATCH 4/4] Sync bundle skill drift blockers Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- amplifier-bundle/skills/common/verification/verify_skill.py | 1 - amplifier-bundle/skills/context-management/automation.py | 1 - .../skills/pm-architect/scripts/generate_daily_status.py | 1 - .../skills/pm-architect/scripts/generate_roadmap_review.py | 1 - amplifier-bundle/skills/pm-architect/scripts/triage_pr.py | 1 - 5 files changed, 5 deletions(-) diff --git a/amplifier-bundle/skills/common/verification/verify_skill.py b/amplifier-bundle/skills/common/verification/verify_skill.py index a7d2db19e..50d5d100e 100755 --- a/amplifier-bundle/skills/common/verification/verify_skill.py +++ b/amplifier-bundle/skills/common/verification/verify_skill.py @@ -33,7 +33,6 @@ def check_python_package(package: str) -> tuple[bool, str]: version = getattr(mod, "__version__", "unknown") return True, f"Installed (v{version})" except ImportError: - print(f"WARNING: {package} not available", file=sys.stderr) return False, "Not installed" diff --git a/amplifier-bundle/skills/context-management/automation.py b/amplifier-bundle/skills/context-management/automation.py index 4ca6b5606..8ecc6f5c1 100644 --- a/amplifier-bundle/skills/context-management/automation.py +++ b/amplifier-bundle/skills/context-management/automation.py @@ -20,7 +20,6 @@ TokenMonitor, ) except ImportError: - print("WARNING: context_management not available", file=sys.stderr) # Fallback for when running from hooks from .context_extractor import ContextExtractor from .context_rehydrator import ContextRehydrator diff --git a/amplifier-bundle/skills/pm-architect/scripts/generate_daily_status.py b/amplifier-bundle/skills/pm-architect/scripts/generate_daily_status.py index 60f2b9ae4..ea7be0de4 100755 --- a/amplifier-bundle/skills/pm-architect/scripts/generate_daily_status.py +++ b/amplifier-bundle/skills/pm-architect/scripts/generate_daily_status.py @@ -21,7 +21,6 @@ CLAUDE_SDK_AVAILABLE = True except ImportError: - print("WARNING: claude_agent_sdk not available", file=sys.stderr) CLAUDE_SDK_AVAILABLE = False diff --git a/amplifier-bundle/skills/pm-architect/scripts/generate_roadmap_review.py b/amplifier-bundle/skills/pm-architect/scripts/generate_roadmap_review.py index b1bb36e72..32323e414 100755 --- a/amplifier-bundle/skills/pm-architect/scripts/generate_roadmap_review.py +++ b/amplifier-bundle/skills/pm-architect/scripts/generate_roadmap_review.py @@ -21,7 +21,6 @@ CLAUDE_SDK_AVAILABLE = True except ImportError: - print("WARNING: claude_agent_sdk not available", file=sys.stderr) CLAUDE_SDK_AVAILABLE = False diff --git a/amplifier-bundle/skills/pm-architect/scripts/triage_pr.py b/amplifier-bundle/skills/pm-architect/scripts/triage_pr.py index 2cd73d62c..8d25ce07f 100755 --- a/amplifier-bundle/skills/pm-architect/scripts/triage_pr.py +++ b/amplifier-bundle/skills/pm-architect/scripts/triage_pr.py @@ -21,7 +21,6 @@ CLAUDE_SDK_AVAILABLE = True except ImportError: - print("WARNING: claude_agent_sdk not available", file=sys.stderr) CLAUDE_SDK_AVAILABLE = False