diff --git a/skills/agent-browser/SKILL.md b/skills/agent-browser/SKILL.md index ab3ea3c6..00747388 100644 --- a/skills/agent-browser/SKILL.md +++ b/skills/agent-browser/SKILL.md @@ -69,6 +69,7 @@ agent-browser scroll down 500 # Scroll page (default: down 300px) agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto) agent-browser drag @e1 @e2 # Drag and drop agent-browser upload @e1 file.pdf # Upload files +agent-browser download @e1 ./file.pdf # Download file (click + wait combined) ``` ### Get information @@ -96,8 +97,8 @@ agent-browser is checked @e1 # Check if checked ### Screenshots & PDF ```bash -agent-browser screenshot # Save to a temporary directory -agent-browser screenshot path.png # Save to a specific path +agent-browser screenshot # Screenshot to stdout +agent-browser screenshot path.png # Save to file agent-browser screenshot --full # Full page agent-browser pdf output.pdf # Save as PDF ``` @@ -123,6 +124,8 @@ agent-browser wait --text "Success" # Wait for text (or -t) agent-browser wait --url "**/dashboard" # Wait for URL pattern (or -u) agent-browser wait --load networkidle # Wait for network idle (or -l) agent-browser wait --fn "window.ready" # Wait for JS condition (or -f) +agent-browser wait --download # Wait for download to complete (or -d) +agent-browser wait --download ./file.pdf # Wait and save to specific path ``` ### Mouse control @@ -167,7 +170,12 @@ agent-browser set media light reduced-motion # Light mode + reduced motion ```bash agent-browser cookies # Get all cookies -agent-browser cookies set name value # Set cookie +agent-browser cookies set name value # Set cookie (basic) +agent-browser cookies set name value --url https://app.example.com # Set for specific URL +agent-browser cookies set name value --domain example.com --path /api # With domain/path +agent-browser cookies set name value --httpOnly --secure # Security flags +agent-browser cookies set name value --sameSite Strict # SameSite policy (Strict|Lax|None) +agent-browser cookies set name value --expires 1735689600 # Unix timestamp expiry agent-browser cookies clear # Clear cookies agent-browser storage local # Get all localStorage agent-browser storage local key # Get specific key @@ -221,15 +229,21 @@ agent-browser eval "document.title" # Run JavaScript ```bash agent-browser --session ... # Isolated browser session +agent-browser --profile ... # Persistent browser profile (cookies, localStorage) agent-browser --json ... # JSON output for parsing agent-browser --headed ... # Show browser window (not headless) agent-browser --full ... # Full page screenshot (-f) agent-browser --cdp ... # Connect via Chrome DevTools Protocol agent-browser -p ... # Cloud browser provider (--provider) agent-browser --proxy ... # Use proxy server +agent-browser --proxy-bypass # Bypass proxy for hosts (comma-separated) +agent-browser --args ... # Browser launch args (comma-separated) +agent-browser --user-agent ... # Custom User-Agent string agent-browser --headers ... # HTTP headers scoped to URL's origin agent-browser --executable-path

# Custom browser executable agent-browser --extension ... # Load browser extension (repeatable) +agent-browser --state ... # Load saved browser state (cookies, storage) +agent-browser --debug ... # Debug output for troubleshooting agent-browser --help # Show help (-h) agent-browser --version # Show version (-V) agent-browser --help # Show detailed help for a command @@ -247,9 +261,15 @@ agent-browser --proxy socks5://proxy.com:1080 open example.com ```bash AGENT_BROWSER_SESSION="mysession" # Default session name +AGENT_BROWSER_PROFILE="~/.myapp" # Persistent browser profile path AGENT_BROWSER_EXECUTABLE_PATH="/path/chrome" # Custom browser path AGENT_BROWSER_EXTENSIONS="/ext1,/ext2" # Comma-separated extension paths -AGENT_BROWSER_PROVIDER="your-cloud-browser-provider" # Cloud browser provider (select browseruse or browserbase) +AGENT_BROWSER_PROVIDER="kernel" # Cloud browser provider (kernel, browserbase, browseruse) +AGENT_BROWSER_STATE="./auth.json" # Load storage state from JSON file +AGENT_BROWSER_ARGS="--no-sandbox,--disable-gpu" # Browser launch args +AGENT_BROWSER_USER_AGENT="CustomAgent/1.0" # Custom User-Agent string +AGENT_BROWSER_PROXY="http://proxy:8080" # Proxy server URL +AGENT_BROWSER_PROXY_BYPASS="localhost,*.local" # Bypass proxy for these hosts AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location (for daemon.js) ``` @@ -271,7 +291,7 @@ agent-browser snapshot -i # Check result ## Example: Authentication with saved state ```bash -# Login once +# Login once and save state agent-browser open https://app.example.com/login agent-browser snapshot -i agent-browser fill @e1 "username" @@ -279,10 +299,12 @@ agent-browser fill @e2 "password" agent-browser click @e3 agent-browser wait --url "**/dashboard" agent-browser state save auth.json +agent-browser close -# Later sessions: load saved state -agent-browser state load auth.json -agent-browser open https://app.example.com/dashboard +# Later sessions: load state at launch +agent-browser --state auth.json open https://app.example.com/dashboard +# Or via environment variable: +# AGENT_BROWSER_STATE="auth.json" agent-browser open https://app.example.com/dashboard ``` ## Sessions (parallel browsers) @@ -326,10 +348,15 @@ For detailed patterns and best practices, see: | Reference | Description | |-----------|-------------| | [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting | +| [references/semantic-locators.md](references/semantic-locators.md) | Role, text, label locators for stable automation | | [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping | | [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse | +| [references/network-mocking.md](references/network-mocking.md) | API mocking, request blocking, error simulation | | [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation | | [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies | +| [references/persistent-profiles.md](references/persistent-profiles.md) | Browser profile persistence, login state reuse | +| [references/cloud-providers.md](references/cloud-providers.md) | Browserbase and Browser Use cloud integration | +| [references/debugging.md](references/debugging.md) | Troubleshooting, traces, common issues | ## Ready-to-use templates @@ -340,12 +367,18 @@ Executable workflow scripts for common patterns: | [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation | | [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state | | [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots | +| [templates/download-workflow.sh](templates/download-workflow.sh) | File downloads, exports, PDFs | +| [templates/network-mocking.sh](templates/network-mocking.sh) | API mocking for testing UI states | +| [templates/multi-tab-workflow.sh](templates/multi-tab-workflow.sh) | Multi-tab comparison and parallel ops | Usage: ```bash ./templates/form-automation.sh https://example.com/form ./templates/authenticated-session.sh https://app.example.com/login ./templates/capture-workflow.sh https://example.com ./output +./templates/download-workflow.sh https://example.com/exports ./downloads +./templates/network-mocking.sh https://app.example.com ./screenshots +./templates/multi-tab-workflow.sh https://a.com https://b.com ./output ``` ## HTTPS Certificate Errors diff --git a/skills/agent-browser/references/cloud-providers.md b/skills/agent-browser/references/cloud-providers.md new file mode 100644 index 00000000..363074e6 --- /dev/null +++ b/skills/agent-browser/references/cloud-providers.md @@ -0,0 +1,222 @@ +# Cloud Browser Providers + +Connect to cloud browser infrastructure for scalable automation without managing local browsers. + +## Supported Providers + +| Provider | Description | +|----------|-------------| +| `kernel` | [Kernel](https://www.kernel.sh) - Cloud browsers with stealth mode and profiles | +| `browserbase` | [Browserbase](https://browserbase.com) - Headless browser infrastructure | +| `browseruse` | [Browser Use](https://browser-use.com) - AI-native browser automation | + +## Basic Usage + +```bash +# Via command line flag +agent-browser -p browserbase open https://example.com +agent-browser --provider browserbase open https://example.com + +# Via environment variable +export AGENT_BROWSER_PROVIDER="browserbase" +agent-browser open https://example.com +``` + +## Kernel Setup + +[Kernel](https://www.kernel.sh) provides cloud browser infrastructure for AI agents with stealth mode and persistent profiles. + +### 1. Get API Key + +Sign up at [dashboard.onkernel.com](https://dashboard.onkernel.com) and get your API key. + +### 2. Configure Environment + +```bash +export KERNEL_API_KEY="your-api-key" +``` + +### 3. Use with agent-browser + +```bash +agent-browser -p kernel open https://example.com +agent-browser -p kernel snapshot -i +agent-browser -p kernel click @e1 +agent-browser -p kernel close +``` + +### Kernel-Specific Options + +| Variable | Description | Default | +|----------|-------------|---------| +| `KERNEL_API_KEY` | Required API key | (none) | +| `KERNEL_HEADLESS` | Headless mode (`true`/`false`) | `false` | +| `KERNEL_STEALTH` | Stealth mode to avoid bot detection | `true` | +| `KERNEL_TIMEOUT_SECONDS` | Session timeout in seconds | `300` | +| `KERNEL_PROFILE_NAME` | Profile name for persistent cookies/logins | (none) | + +### Profile Persistence with Kernel + +Kernel uniquely supports persistent profiles in the cloud: + +```bash +# First session - login and save to profile +export KERNEL_PROFILE_NAME="my-app-profile" +agent-browser -p kernel open https://app.example.com/login +agent-browser -p kernel fill @e1 "username" +agent-browser -p kernel fill @e2 "password" +agent-browser -p kernel click @e3 +agent-browser -p kernel close # Cookies saved to profile + +# Later sessions - profile auto-loads +export KERNEL_PROFILE_NAME="my-app-profile" +agent-browser -p kernel open https://app.example.com/dashboard # Already logged in! +``` + +--- + +## Browserbase Setup + +### 1. Get API Key + +Sign up at [browserbase.com](https://browserbase.com) and get your API key. + +### 2. Configure Environment + +```bash +export BROWSERBASE_API_KEY="your-api-key" +export BROWSERBASE_PROJECT_ID="your-project-id" # Optional +``` + +### 3. Use with agent-browser + +```bash +agent-browser -p browserbase open https://example.com +agent-browser -p browserbase snapshot -i +agent-browser -p browserbase click @e1 +agent-browser -p browserbase screenshot ./result.png +agent-browser -p browserbase close +``` + +## Browser Use Setup + +### 1. Get API Key + +Sign up at [browser-use.com](https://browser-use.com) and get your API key. + +### 2. Configure Environment + +```bash +export BROWSER_USE_API_KEY="your-api-key" +``` + +### 3. Use with agent-browser + +```bash +agent-browser -p browseruse open https://example.com +agent-browser -p browseruse snapshot -i +agent-browser -p browseruse click @e1 +agent-browser -p browseruse close +``` + +## Remote CDP WebSocket + +For custom cloud browser setups, connect via WebSocket URL: + +```bash +# Connect to remote browser via WebSocket +agent-browser --cdp "wss://browser.example.com/ws" snapshot -i +``` + +## Provider vs CDP + +| Feature | `-p provider` | `--cdp` | +|---------|---------------|---------| +| Setup | API key only | URL/port required | +| Scaling | Provider handles | Self-managed | +| Extensions | Not supported | Supported (local) | +| Best for | Cloud infrastructure | Debugging, custom setups | + +## Limitations + +When using cloud providers: + +- **No extensions** - Browser extensions require local browser +- **No --headed** - Browsers run headless in the cloud (except Kernel: headful by default) +- **No --profile** - Persistent profiles are local-only (except Kernel: use `KERNEL_PROFILE_NAME`) + +```bash +# These will error with most providers +agent-browser -p browserbase --extension ./ext # Error +agent-browser -p browserbase --headed # Error + +# Kernel supports profiles via its own env var +KERNEL_PROFILE_NAME="myprofile" agent-browser -p kernel open https://example.com # Works! +``` + +## Common Patterns + +### CI/CD Integration + +```yaml +# GitHub Actions example +jobs: + test: + runs-on: ubuntu-latest + env: + AGENT_BROWSER_PROVIDER: browserbase + BROWSERBASE_API_KEY: ${{ secrets.BROWSERBASE_API_KEY }} + steps: + - run: agent-browser open https://example.com + - run: agent-browser snapshot -i + - run: agent-browser screenshot ./result.png +``` + +### Parallel Execution + +```bash +#!/bin/bash +# Run multiple browser sessions in cloud +for i in {1..10}; do + agent-browser -p browserbase --session "worker-$i" open "https://example.com/page$i" & +done +wait + +# Collect results +for i in {1..10}; do + agent-browser -p browserbase --session "worker-$i" screenshot "./result-$i.png" + agent-browser -p browserbase --session "worker-$i" close +done +``` + +### Fallback to Local + +```bash +#!/bin/bash +# Try cloud first, fall back to local +if [ -n "$BROWSERBASE_API_KEY" ]; then + agent-browser -p browserbase open https://example.com +else + agent-browser open https://example.com +fi +``` + +## Debugging Cloud Sessions + +```bash +# Get session info +agent-browser -p browserbase session + +# View console logs +agent-browser -p browserbase console + +# View errors +agent-browser -p browserbase errors +``` + +## Cost Optimization + +1. **Close sessions promptly** - Cloud sessions may bill by time +2. **Use snapshots efficiently** - Each command is a round-trip +3. **Batch operations** - Combine related actions +4. **Use local for development** - Only use cloud in CI/production diff --git a/skills/agent-browser/references/debugging.md b/skills/agent-browser/references/debugging.md new file mode 100644 index 00000000..5f2ffea0 --- /dev/null +++ b/skills/agent-browser/references/debugging.md @@ -0,0 +1,389 @@ +# Debugging Guide + +Tools and techniques for troubleshooting browser automation issues. + +## Quick Diagnostics + +```bash +# Show browser window (see what's happening) +agent-browser --headed open https://example.com + +# View console logs +agent-browser console + +# View page errors +agent-browser errors + +# Highlight element to verify selection +agent-browser highlight @e1 +``` + +## Debugging Commands + +### Console Logs + +```bash +# View all console messages +agent-browser console + +# Clear console +agent-browser console --clear +``` + +Console output includes: +- `console.log()` messages +- `console.warn()` warnings +- `console.error()` errors +- Unhandled promise rejections + +### Page Errors + +```bash +# View JavaScript errors +agent-browser errors + +# Clear errors +agent-browser errors --clear +``` + +### Element Highlighting + +```bash +# Visually highlight an element +agent-browser highlight @e1 + +# Useful for verifying you have the right element +agent-browser snapshot -i +agent-browser highlight @e5 # Is this the button I think it is? +``` + +### Trace Recording + +```bash +# Start recording all browser activity +agent-browser trace start + +# Perform actions +agent-browser open https://example.com +agent-browser click @e1 +agent-browser fill @e2 "test" + +# Stop and save trace +agent-browser trace stop ./trace.zip +``` + +Trace files can be viewed at [trace.playwright.dev](https://trace.playwright.dev). + +### Video Recording + +```bash +# Record video of browser actions +agent-browser record start ./debug-session.webm +agent-browser open https://example.com +agent-browser snapshot -i +agent-browser click @e1 +agent-browser record stop +``` + +## Headed Mode + +Run with visible browser window: + +```bash +# See exactly what the browser sees +agent-browser --headed open https://example.com +agent-browser --headed snapshot -i +agent-browser --headed click @e1 +``` + +Useful for: +- Seeing page state during automation +- Handling CAPTCHAs manually +- Debugging complex interactions +- Understanding timing issues + +## CDP Connection + +Connect to an existing browser for debugging: + +```bash +# Launch Chrome with debugging port +/Applications/Google\ Chrome.app/Contents/MacOS/Google\ Chrome --remote-debugging-port=9222 + +# Connect agent-browser +agent-browser --cdp 9222 snapshot -i +agent-browser --cdp 9222 click @e1 +``` + +Or use the connect command: + +```bash +agent-browser connect 9222 +agent-browser snapshot -i +``` + +## Common Issues + +### Element Not Found + +**Symptoms:** +``` +Error: Element @e5 not found +``` + +**Causes & Solutions:** + +1. **Refs changed after navigation** + ```bash + # Re-snapshot to get fresh refs + agent-browser snapshot -i + ``` + +2. **Element not yet loaded** + ```bash + # Wait for element + agent-browser wait @e5 + agent-browser click @e5 + + # Or wait for network + agent-browser wait --load networkidle + agent-browser snapshot -i + ``` + +3. **Element in iframe** + ```bash + # Switch to iframe first + agent-browser frame "#iframe-id" + agent-browser snapshot -i + agent-browser click @e1 + agent-browser frame main # Switch back + ``` + +4. **Element hidden/not interactive** + ```bash + # Scroll element into view + agent-browser scrollintoview @e5 + agent-browser click @e5 + ``` + +### Click Not Working + +**Symptoms:** Click succeeds but nothing happens + +**Solutions:** + +1. **Element obscured by overlay** + ```bash + # Check for modals/overlays + agent-browser snapshot -i + # Look for overlay elements, close them first + agent-browser click @overlay-close + ``` + +2. **Need to wait for JS handlers** + ```bash + agent-browser wait 500 # Brief delay for JS + agent-browser click @e1 + ``` + +3. **Wrong element** + ```bash + # Verify with highlight + agent-browser highlight @e1 + # Or use headed mode + agent-browser --headed click @e1 + ``` + +### Form Not Submitting + +**Symptoms:** Fill works but submit doesn't + +**Solutions:** + +1. **Form validation failing** + ```bash + # Check for validation errors + agent-browser snapshot -i + # Look for error messages + ``` + +2. **Need Enter key instead of click** + ```bash + agent-browser fill @e1 "value" + agent-browser press Enter + ``` + +3. **Multiple submit buttons** + ```bash + # Use more specific selector + agent-browser find role button click --name "Submit Form" + ``` + +### Page Not Loading + +**Symptoms:** Timeout or blank page + +**Solutions:** + +1. **Check URL** + ```bash + agent-browser get url + ``` + +2. **Check for redirects** + ```bash + agent-browser wait --url "**/expected-path" + ``` + +3. **Check network** + ```bash + agent-browser network requests + ``` + +4. **Use longer timeout** + ```bash + agent-browser wait --load networkidle --timeout 30000 + ``` + +### Stale References + +**Symptoms:** Refs worked before but now fail + +**Solution:** Always re-snapshot after: +- Page navigation +- Form submission +- Modal open/close +- Dynamic content load +- Tab switch + +```bash +agent-browser click @submit-button +agent-browser wait --load networkidle +agent-browser snapshot -i # ALWAYS re-snapshot +agent-browser click @e1 # Now use new refs +``` + +## Debugging Workflow + +### Step 1: Reproduce in Headed Mode + +```bash +agent-browser --headed open https://example.com +agent-browser --headed snapshot -i +# Follow your automation steps, watch what happens +``` + +### Step 2: Check Console and Errors + +```bash +agent-browser console +agent-browser errors +``` + +### Step 3: Record a Trace + +```bash +agent-browser trace start +# Run failing automation +agent-browser trace stop ./debug-trace.zip +# Open at trace.playwright.dev +``` + +### Step 4: Isolate the Problem + +```bash +# Test each step individually +agent-browser open https://example.com +agent-browser wait --load networkidle +agent-browser snapshot -i +# Stop here - does snapshot look right? + +agent-browser click @e1 +# Did click work? Check state +agent-browser get url +agent-browser snapshot -i +``` + +## Environment Debugging + +### Check Browser Version + +```bash +agent-browser --version +``` + +### Check Browser Path + +```bash +echo $AGENT_BROWSER_EXECUTABLE_PATH +agent-browser open about:version +agent-browser get text body +``` + +### Check Session State + +```bash +agent-browser session +agent-browser session list +``` + +### Debug Logs + +```bash +# Enable debug output +agent-browser --debug open https://example.com +``` + +## Performance Debugging + +### Slow Page Load + +```bash +# Check what's loading +agent-browser network requests + +# Block heavy resources +agent-browser network route "**/*.mp4" --abort +agent-browser network route "**/analytics**" --abort +``` + +### Memory Issues + +```bash +# Close unused sessions +agent-browser session list +agent-browser --session old-session close + +# Clear storage +agent-browser cookies clear +agent-browser storage local clear +``` + +## CI/CD Debugging + +### Capture Evidence + +```bash +#!/bin/bash +set -e + +# Always capture screenshot on failure +cleanup() { + agent-browser screenshot ./failure-screenshot.png || true + agent-browser console > ./console-logs.txt || true + agent-browser errors > ./error-logs.txt || true +} +trap cleanup EXIT + +# Run automation +agent-browser open https://example.com +# ... rest of automation +``` + +### Artifacts to Save + +- Screenshots at key steps +- Console logs +- Error logs +- Trace files for complex failures +- Video recordings for visual verification diff --git a/skills/agent-browser/references/network-mocking.md b/skills/agent-browser/references/network-mocking.md new file mode 100644 index 00000000..ccdab15e --- /dev/null +++ b/skills/agent-browser/references/network-mocking.md @@ -0,0 +1,250 @@ +# Network Mocking & Interception + +Intercept, mock, and block network requests for API testing, error simulation, and offline development. + +## Quick Start + +```bash +# Mock an API response +agent-browser network route "https://api.example.com/users" --body '{"users": []}' + +# Block analytics/tracking +agent-browser network route "**/analytics**" --abort + +# View intercepted requests +agent-browser network requests +``` + +## Route Commands + +```bash +# Intercept and log requests (no modification) +agent-browser network route + +# Block requests entirely +agent-browser network route --abort + +# Return custom response body +agent-browser network route --body '' + +# Remove a route +agent-browser network unroute + +# Remove all routes +agent-browser network unroute +``` + +## URL Patterns + +```bash +# Exact URL +agent-browser network route "https://api.example.com/users" + +# Wildcard matching +agent-browser network route "**/api/**" # Any path containing /api/ +agent-browser network route "**/*.png" # All PNG images +agent-browser network route "**/analytics**" # Anything with "analytics" + +# Domain matching +agent-browser network route "https://*.example.com/**" +``` + +## Common Patterns + +### Mock API Responses + +```bash +# Mock user list +agent-browser network route "https://api.example.com/users" \ + --body '{"users": [{"id": 1, "name": "Test User"}]}' + +# Mock empty state +agent-browser network route "https://api.example.com/notifications" \ + --body '{"notifications": [], "count": 0}' + +# Mock error response (body only - status codes not configurable) +agent-browser network route "https://api.example.com/profile" \ + --body '{"error": "Not found"}' +``` + +### Block Unwanted Requests + +```bash +# Block analytics and tracking +agent-browser network route "**/google-analytics.com/**" --abort +agent-browser network route "**/facebook.com/tr**" --abort +agent-browser network route "**/hotjar.com/**" --abort + +# Block ads +agent-browser network route "**/ads.**" --abort +agent-browser network route "**/doubleclick.net/**" --abort + +# Block media for faster loading +agent-browser network route "**/*.mp4" --abort +agent-browser network route "**/*.webm" --abort +``` + +### Simulate Network Conditions + +```bash +#!/bin/bash +# Test offline behavior + +# Block all external APIs +agent-browser network route "**/api.example.com/**" --abort + +# Navigate and test +agent-browser open https://app.example.com +agent-browser snapshot -i + +# Check for offline indicators +agent-browser get text ".error-message" +``` + +### Test Error Handling + +```bash +#!/bin/bash +# Simulate various API errors + +# Simulate server error response +agent-browser network route "https://api.example.com/submit" \ + --body '{"error": "Internal server error"}' + +agent-browser open https://app.example.com/form +agent-browser snapshot -i +agent-browser fill @e1 "test data" +agent-browser click @e2 # Submit button + +# Verify error handling UI +agent-browser snapshot -i +agent-browser screenshot ./error-handling.png +``` + +### Mock Authentication + +```bash +# Mock successful auth response +agent-browser network route "https://api.example.com/auth/login" \ + --body '{"token": "mock-jwt-token", "user": {"id": 1, "email": "test@example.com"}}' + +# Mock auth failure +agent-browser network route "https://api.example.com/auth/login" \ + --body '{"error": "Invalid credentials"}' +``` + +## Viewing Requests + +```bash +# View all tracked requests +agent-browser network requests + +# Filter by pattern +agent-browser network requests --filter api +agent-browser network requests --filter ".json" + +# Clear request log +agent-browser network requests --clear +``` + +### Request Log Format + +``` +[200] GET https://api.example.com/users (45ms) +[POST] https://api.example.com/login (120ms) -> 200 +[BLOCKED] https://analytics.example.com/track +``` + +## Advanced Patterns + +### API Version Testing + +```bash +#!/bin/bash +# Test app against different API versions + +# Mock v1 response format +agent-browser network route "https://api.example.com/v1/data" \ + --body '{"items": [...]}' + +agent-browser open https://app.example.com +agent-browser screenshot ./v1-response.png + +# Clear and mock v2 response format +agent-browser network unroute "https://api.example.com/v1/data" +agent-browser network route "https://api.example.com/v2/data" \ + --body '{"data": {"items": [...], "meta": {...}}}' + +agent-browser reload +agent-browser screenshot ./v2-response.png +``` + +### Rate Limit Simulation + +```bash +# Return rate limit error response +agent-browser network route "https://api.example.com/search" \ + --body '{"error": "Rate limit exceeded", "retry_after": 60}' +``` + +### Slow Network Simulation + +```bash +# Use set offline to simulate disconnection +agent-browser set offline on +# ... test offline behavior ... +agent-browser set offline off +``` + +## Best Practices + +1. **Clean up routes after tests** + ```bash + agent-browser network unroute # Remove all routes + ``` + +2. **Use specific patterns** - Avoid overly broad patterns that might block essential resources + +3. **Test with real APIs first** - Mock after understanding the actual API behavior + +4. **Log requests during development** + ```bash + agent-browser network route "**/api/**" # Just log, don't modify + agent-browser network requests + ``` + +5. **Combine with state save** - Save state after mocking for reproducible tests + ```bash + agent-browser network route "..." --body "..." + agent-browser open https://app.example.com + agent-browser state save ./mocked-state.json + ``` + +## Troubleshooting + +### Routes Not Working + +```bash +# Check active routes +agent-browser network requests + +# Ensure pattern matches - test with exact URL first +agent-browser network route "https://exact.url.com/path" +``` + +### Requests Still Going Through + +```bash +# Some requests may bypass routing (WebSocket, etc.) +# Use --abort for hard blocking +agent-browser network route "**/unwanted/**" --abort +``` + +### CORS Issues with Mocked Responses + +```bash +# Mock response may need CORS headers for browser to accept +agent-browser network route "https://api.example.com/data" \ + --body '{"data": []}' \ + --headers '{"Access-Control-Allow-Origin": "*"}' +``` diff --git a/skills/agent-browser/references/persistent-profiles.md b/skills/agent-browser/references/persistent-profiles.md new file mode 100644 index 00000000..a64f95dc --- /dev/null +++ b/skills/agent-browser/references/persistent-profiles.md @@ -0,0 +1,134 @@ +# Persistent Browser Profiles + +Store cookies, localStorage, and login sessions across browser restarts using the `--profile` flag. + +## Basic Usage + +```bash +# First session: login and build up state +agent-browser --profile ~/.myapp open https://app.example.com/login +agent-browser --profile ~/.myapp snapshot -i +agent-browser --profile ~/.myapp fill @e1 "username" +agent-browser --profile ~/.myapp fill @e2 "password" +agent-browser --profile ~/.myapp click @e3 +agent-browser --profile ~/.myapp close + +# Later session: already logged in +agent-browser --profile ~/.myapp open https://app.example.com/dashboard +# No login needed - cookies and session persist +``` + +## Environment Variable + +Set a default profile path: + +```bash +export AGENT_BROWSER_PROFILE="~/.myapp-browser" +agent-browser open https://app.example.com # Uses profile automatically +``` + +## Profile vs Session + +| Feature | `--session` | `--profile` | +|---------|-------------|-------------| +| Isolation | In-memory, lost on close | Persisted to disk | +| Cookies | Session only | Persist across restarts | +| localStorage | Session only | Persist across restarts | +| Use case | Parallel testing | Long-lived auth state | + +## Common Patterns + +### Login Once, Reuse Forever + +```bash +#!/bin/bash +PROFILE="$HOME/.app-profile" + +# Check if we need to login +agent-browser --profile "$PROFILE" open https://app.example.com +URL=$(agent-browser --profile "$PROFILE" get url) + +if [[ "$URL" == *"/login"* ]]; then + echo "Not logged in, performing login..." + agent-browser --profile "$PROFILE" snapshot -i + agent-browser --profile "$PROFILE" fill @e1 "$USERNAME" + agent-browser --profile "$PROFILE" fill @e2 "$PASSWORD" + agent-browser --profile "$PROFILE" click @e3 + agent-browser --profile "$PROFILE" wait --url "**/dashboard" +fi + +# Now authenticated - continue with automation +agent-browser --profile "$PROFILE" snapshot -i +``` + +### Multiple Accounts + +```bash +# Different profile per account +agent-browser --profile ~/.app-user1 open https://app.example.com +agent-browser --profile ~/.app-user2 open https://app.example.com +agent-browser --profile ~/.app-admin open https://app.example.com/admin +``` + +### Development vs Production + +```bash +# Separate profiles for different environments +agent-browser --profile ~/.app-dev open https://dev.example.com +agent-browser --profile ~/.app-staging open https://staging.example.com +agent-browser --profile ~/.app-prod open https://app.example.com +``` + +## What Gets Persisted + +- Cookies (including HttpOnly cookies) +- localStorage +- sessionStorage (restored on next launch) +- IndexedDB +- Service Worker registrations +- Cache Storage + +## Profile Location + +The profile is stored at the specified path: + +```bash +~/.myapp/ +├── Default/ +│ ├── Cookies +│ ├── Local Storage/ +│ ├── Session Storage/ +│ └── ... +└── ... +``` + +## Clearing Profile State + +```bash +# Clear specific data +agent-browser --profile ~/.myapp cookies clear +agent-browser --profile ~/.myapp storage local clear + +# Or delete the entire profile +rm -rf ~/.myapp +``` + +## Combining with Sessions + +You can use both `--profile` and `--session` together: + +```bash +# Named session within a profile +agent-browser --profile ~/.myapp --session test1 open https://example.com +agent-browser --profile ~/.myapp --session test2 open https://example.com +``` + +This creates isolated sessions that still share the same persistent profile data. + +## Best Practices + +1. **Use descriptive profile paths** - `~/.myapp-browser` instead of `~/profile1` +2. **One profile per application/account** - Don't mix different apps in one profile +3. **Gitignore profiles** - Add profile paths to `.gitignore` to avoid committing auth state +4. **Rotate profiles periodically** - Delete and recreate if sessions become stale +5. **Use environment variables in CI** - Set `AGENT_BROWSER_PROFILE` for consistent automation diff --git a/skills/agent-browser/references/proxy-support.md b/skills/agent-browser/references/proxy-support.md index 05fcec26..df39a433 100644 --- a/skills/agent-browser/references/proxy-support.md +++ b/skills/agent-browser/references/proxy-support.md @@ -2,12 +2,43 @@ Configure proxy servers for browser automation, useful for geo-testing, rate limiting avoidance, and corporate environments. +## Quick Start + +```bash +# Via command line flag +agent-browser --proxy http://proxy.example.com:8080 open https://example.com + +# With authentication +agent-browser --proxy http://user:pass@proxy.example.com:8080 open https://example.com + +# SOCKS5 proxy +agent-browser --proxy socks5://proxy.example.com:1080 open https://example.com +``` + +## Command Line Flags + +```bash +--proxy # Proxy server URL +--proxy-bypass # Bypass proxy for these hosts (comma-separated) +``` + +## Environment Variables + +```bash +AGENT_BROWSER_PROXY="http://proxy:8080" # Proxy server URL +AGENT_BROWSER_PROXY_BYPASS="localhost,*.local" # Bypass hosts +``` + ## Basic Proxy Configuration Set proxy via environment variable before starting: ```bash # HTTP proxy +export AGENT_BROWSER_PROXY="http://proxy.example.com:8080" +agent-browser open https://example.com + +# Or use standard env vars export HTTP_PROXY="http://proxy.example.com:8080" agent-browser open https://example.com @@ -166,6 +197,45 @@ agent-browser open https://example.com --ignore-https-errors export NO_PROXY="*.cdn.com,*.static.com" # Direct CDN access ``` +## Additional Browser Configuration + +These flags are often used alongside proxy configuration for stealth automation. + +### Custom User-Agent + +```bash +# Override default User-Agent +agent-browser --user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0" open https://example.com + +# Via environment variable +export AGENT_BROWSER_USER_AGENT="CustomBot/1.0" +agent-browser open https://example.com +``` + +### Browser Launch Arguments + +```bash +# Pass custom Chromium flags (comma or newline separated) +agent-browser --args "--no-sandbox,--disable-blink-features=AutomationControlled" open https://example.com + +# Via environment variable +export AGENT_BROWSER_ARGS="--no-sandbox,--disable-gpu" +agent-browser open https://example.com +``` + +### Combined Stealth Configuration + +```bash +#!/bin/bash +# Full stealth setup with proxy +agent-browser \ + --proxy "http://proxy.example.com:8080" \ + --proxy-bypass "localhost,*.local" \ + --user-agent "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" \ + --args "--disable-blink-features=AutomationControlled" \ + open https://example.com +``` + ## Best Practices 1. **Use environment variables** - Don't hardcode proxy credentials @@ -173,3 +243,4 @@ export NO_PROXY="*.cdn.com,*.static.com" # Direct CDN access 3. **Test proxy before automation** - Verify connectivity with simple requests 4. **Handle proxy failures gracefully** - Implement retry logic for unstable proxies 5. **Rotate proxies for large scraping jobs** - Distribute load and avoid bans +6. **Match User-Agent to proxy location** - Use appropriate User-Agent for the proxy's region diff --git a/skills/agent-browser/references/semantic-locators.md b/skills/agent-browser/references/semantic-locators.md new file mode 100644 index 00000000..477c1269 --- /dev/null +++ b/skills/agent-browser/references/semantic-locators.md @@ -0,0 +1,319 @@ +# Semantic Locators + +Use semantic locators as an alternative to refs for stable, readable element selection. + +## Quick Start + +```bash +# By role and name +agent-browser find role button click --name "Submit" + +# By visible text +agent-browser find text "Sign In" click + +# By label +agent-browser find label "Email" fill "user@example.com" +``` + +## Refs vs Semantic Locators + +| Feature | Refs (`@e1`) | Semantic Locators | +|---------|--------------|-------------------| +| Speed | Fastest | Slightly slower | +| Stability | Change on DOM update | Stable across changes | +| Readability | Requires snapshot context | Self-documenting | +| Best for | Interactive exploration | Scripted automation | + +### When to Use Refs + +```bash +# Interactive session - explore and act quickly +agent-browser snapshot -i +# See: @e5 [button] "Submit" +agent-browser click @e5 +``` + +### When to Use Semantic Locators + +```bash +# Scripted automation - stable across page changes +agent-browser find role button click --name "Submit" +``` + +## Locator Types + +### Role Locator + +Find by ARIA role (accessibility role): + +```bash +agent-browser find role button click --name "Submit" +agent-browser find role textbox fill "hello" --name "Search" +agent-browser find role link click --name "Learn more" +agent-browser find role checkbox check --name "Remember me" +agent-browser find role combobox click --name "Country" +``` + +Common roles: `button`, `textbox`, `link`, `checkbox`, `radio`, `combobox`, `listbox`, `menu`, `menuitem`, `tab`, `dialog`, `alert` + +### Text Locator + +Find by visible text content: + +```bash +agent-browser find text "Sign In" click +agent-browser find text "Welcome back" get text +agent-browser find text "Add to Cart" click + +# Exact match only (no partial matching) +agent-browser find text "Sign In" click --exact +``` + +### Label Locator + +Find form fields by their label: + +```bash +agent-browser find label "Email" fill "user@example.com" +agent-browser find label "Password" fill "secret123" +agent-browser find label "Remember me" check +``` + +### Placeholder Locator + +Find inputs by placeholder text: + +```bash +agent-browser find placeholder "Search..." type "query" +agent-browser find placeholder "Enter your email" fill "test@example.com" +``` + +### Alt Locator + +Find images by alt text: + +```bash +agent-browser find alt "Company Logo" click +agent-browser find alt "User avatar" get attr src +``` + +### Title Locator + +Find elements by title attribute: + +```bash +agent-browser find title "Close dialog" click +agent-browser find title "More options" hover +``` + +### Test ID Locator + +Find by data-testid attribute (common in React/Vue apps): + +```bash +agent-browser find testid "submit-button" click +agent-browser find testid "user-email-input" fill "test@example.com" +agent-browser find testid "error-message" get text +``` + +### Position Locators + +Find by position when multiple matches exist: + +```bash +# First matching element +agent-browser find first ".card" click + +# Last matching element +agent-browser find last ".card" click + +# Nth element (0-indexed) +agent-browser find nth 2 ".card" click +``` + +## Actions with Locators + +All standard actions work with semantic locators: + +```bash +# Click actions +agent-browser find text "Submit" click +agent-browser find text "Submit" dblclick + +# Input actions +agent-browser find label "Name" fill "John Doe" +agent-browser find label "Name" type "additional text" +agent-browser find label "Name" clear + +# Form controls +agent-browser find label "Country" select "United States" +agent-browser find label "Agree" check +agent-browser find label "Newsletter" uncheck + +# Information +agent-browser find text "Total" get text +agent-browser find role img get attr src + +# Visibility +agent-browser find text "Error" is visible +agent-browser find label "Submit" is enabled +``` + +## Exact Matching + +By default, text matching is partial. Use `--exact` for exact matches: + +```bash +# Partial match - finds "Sign In", "Sign In Now", "Please Sign In" +agent-browser find text "Sign In" click + +# Exact match - only finds exactly "Sign In" +agent-browser find text "Sign In" click --exact +``` + +## Chaining Locators + +Combine locators for precise selection: + +```bash +# Find button within a specific form +agent-browser find role form --name "Login" find role button click --name "Submit" + +# Find link in specific section +agent-browser find role navigation find text "Home" click +``` + +## Common Patterns + +### Form Filling + +```bash +#!/bin/bash +# Fill a registration form using semantic locators + +agent-browser open https://example.com/register + +agent-browser find label "First Name" fill "John" +agent-browser find label "Last Name" fill "Doe" +agent-browser find label "Email" fill "john@example.com" +agent-browser find label "Password" fill "SecurePass123!" +agent-browser find label "Confirm Password" fill "SecurePass123!" +agent-browser find label "I agree to the terms" check +agent-browser find role button click --name "Create Account" +``` + +### Navigation + +```bash +#!/bin/bash +# Navigate using semantic locators + +agent-browser open https://example.com + +# Use nav links +agent-browser find role link click --name "Products" +agent-browser find role link click --name "Pricing" +agent-browser find role link click --name "Contact" + +# Use menu +agent-browser find role button click --name "Menu" +agent-browser find role menuitem click --name "Settings" +``` + +### Accessibility Testing + +```bash +#!/bin/bash +# Verify accessibility attributes + +# Check all buttons have accessible names +agent-browser find role button get text --all + +# Verify form labels +agent-browser find role textbox get attr aria-label --all + +# Check images have alt text +agent-browser find role img get attr alt --all +``` + +### Testing Multiple Items + +```bash +#!/bin/bash +# Interact with lists + +# Click first item +agent-browser find first ".product-card" click + +# Click last item +agent-browser find last ".product-card" click + +# Click specific item by index +agent-browser find nth 2 ".product-card" click +``` + +## Best Practices + +1. **Prefer role + name for interactive elements** + ```bash + # Good - stable and semantic + agent-browser find role button click --name "Submit" + + # Avoid - fragile CSS selector + agent-browser click "#form-submit-btn" + ``` + +2. **Use testid for complex UIs** + ```bash + # When role/text isn't sufficient + agent-browser find testid "checkout-submit" click + ``` + +3. **Use exact matching for precision** + ```bash + # Avoid matching "Sign In Now" when you want "Sign In" + agent-browser find text "Sign In" click --exact + ``` + +4. **Combine with waits for dynamic content** + ```bash + agent-browser wait --text "Loading complete" + agent-browser find role button click --name "Continue" + ``` + +5. **Fall back to refs for complex scenarios** + ```bash + # When semantic locators don't work, snapshot and use refs + agent-browser snapshot -i + agent-browser click @e5 + ``` + +## Troubleshooting + +### Element Not Found + +```bash +# Use snapshot to see what's available +agent-browser snapshot -i + +# Try less specific locator +agent-browser find text "Sign" click # Instead of "Sign In" +``` + +### Multiple Matches + +```bash +# Use position locators +agent-browser find first ".btn" click + +# Or add more specificity +agent-browser find role button click --name "Submit Form" +``` + +### Dynamic Content + +```bash +# Wait for element to appear +agent-browser wait --text "Submit" +agent-browser find text "Submit" click +``` diff --git a/skills/agent-browser/references/snapshot-refs.md b/skills/agent-browser/references/snapshot-refs.md index 0b17a4d4..a5d7caa7 100644 --- a/skills/agent-browser/references/snapshot-refs.md +++ b/skills/agent-browser/references/snapshot-refs.md @@ -120,11 +120,11 @@ agent-browser click @e7 # Select item ### 4. Snapshot Specific Regions -For complex pages, snapshot specific areas: +For complex pages, snapshot specific areas using CSS selectors: ```bash # Snapshot just the form -agent-browser snapshot @e9 +agent-browser snapshot -s "#contact-form" ``` ## Ref Notation Details @@ -166,8 +166,8 @@ agent-browser snapshot -i ### Element Not Visible in Snapshot ```bash -# Scroll to reveal element -agent-browser scroll --bottom +# Scroll down to reveal element +agent-browser scroll down 500 agent-browser snapshot -i # Or wait for dynamic content @@ -178,8 +178,8 @@ agent-browser snapshot -i ### Too Many Elements ```bash -# Snapshot specific container -agent-browser snapshot @e5 +# Snapshot specific container using CSS selector +agent-browser snapshot -s ".main-content" # Or use get text for content-only extraction agent-browser get text @e5 diff --git a/skills/agent-browser/templates/download-workflow.sh b/skills/agent-browser/templates/download-workflow.sh new file mode 100755 index 00000000..5ae2656f --- /dev/null +++ b/skills/agent-browser/templates/download-workflow.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Template: Download Workflow +# Downloads files triggered by clicking elements (PDFs, exports, reports) + +set -euo pipefail + +URL="${1:?Usage: $0 [output-dir]}" +OUTPUT_DIR="${2:-./downloads}" + +mkdir -p "$OUTPUT_DIR" + +echo "Starting download workflow from: $URL" + +# Navigate to page with downloadable content +agent-browser open "$URL" +agent-browser wait --load networkidle + +# Get interactive snapshot to identify download triggers +echo "Analyzing page for download links..." +agent-browser snapshot -i + +# Example: Download by clicking element +# Uncomment and modify refs based on snapshot output + +# Method 1: Wait for download after click +# agent-browser click @e1 +# agent-browser wait --download "$OUTPUT_DIR/export.xlsx" + +# Method 2: Download with timeout +# agent-browser click @e1 +# agent-browser wait --download "$OUTPUT_DIR/large-file.zip" --timeout 60000 + +# Method 3: Multiple downloads +# for i in 1 2 3; do +# agent-browser click "@e$i" +# agent-browser wait --download "$OUTPUT_DIR/file-$i.pdf" +# done + +# Example: Export workflow (common pattern) +# 1. Click export button +# agent-browser find role button click --name "Export" +# +# 2. Select format from dropdown/modal +# agent-browser wait 500 +# agent-browser snapshot -i +# agent-browser find text "PDF" click +# +# 3. Confirm and download +# agent-browser find role button click --name "Download" +# agent-browser wait --download "$OUTPUT_DIR/export.pdf" + +# Verify downloads +echo "Downloads saved to: $OUTPUT_DIR" +ls -la "$OUTPUT_DIR" + +# Take screenshot of final state +agent-browser screenshot "$OUTPUT_DIR/download-complete.png" + +# Cleanup +agent-browser close + +echo "Download workflow complete" diff --git a/skills/agent-browser/templates/multi-tab-workflow.sh b/skills/agent-browser/templates/multi-tab-workflow.sh new file mode 100755 index 00000000..af550e05 --- /dev/null +++ b/skills/agent-browser/templates/multi-tab-workflow.sh @@ -0,0 +1,143 @@ +#!/bin/bash +# Template: Multi-Tab Workflow +# Work with multiple tabs for comparison testing, parallel operations, and complex flows + +set -euo pipefail + +URL1="${1:?Usage: $0 [url2] [output-dir]}" +URL2="${2:-$URL1}" +OUTPUT_DIR="${3:-./multi-tab-output}" + +mkdir -p "$OUTPUT_DIR" + +echo "Starting multi-tab workflow" + +# ============================================ +# Basic Multi-Tab Operations +# ============================================ + +# Open first tab +agent-browser open "$URL1" +agent-browser wait --load networkidle +echo "Tab 1 opened: $URL1" + +# List current tabs +agent-browser tab + +# Open second tab +agent-browser tab new "$URL2" +agent-browser wait --load networkidle +echo "Tab 2 opened: $URL2" + +# List tabs again +agent-browser tab + +# ============================================ +# Example 1: Comparison Screenshots +# ============================================ +echo "Taking comparison screenshots..." + +# Screenshot tab 2 (current tab) +agent-browser screenshot "$OUTPUT_DIR/tab2.png" + +# Switch to tab 1 (index 0) +agent-browser tab 0 +agent-browser screenshot "$OUTPUT_DIR/tab1.png" + +# ============================================ +# Example 2: Cross-Tab Data Transfer +# ============================================ +echo "Extracting data across tabs..." + +# Get data from tab 1 +agent-browser tab 0 +agent-browser snapshot -i +# TEXT1=$(agent-browser get text @e1) + +# Switch to tab 2 and use the data +agent-browser tab 1 +agent-browser snapshot -i +# agent-browser fill @e1 "$TEXT1" + +# ============================================ +# Example 3: A/B Comparison Testing +# ============================================ +echo "A/B comparison example..." + +# Tab 1: Version A +agent-browser tab 0 +# agent-browser open "$URL1?variant=a" +agent-browser snapshot -i +agent-browser screenshot "$OUTPUT_DIR/variant-a.png" + +# Tab 2: Version B +agent-browser tab 1 +# agent-browser open "$URL1?variant=b" +agent-browser snapshot -i +agent-browser screenshot "$OUTPUT_DIR/variant-b.png" + +# ============================================ +# Example 4: Parallel Form Filling +# ============================================ +echo "Parallel operations example..." + +# Fill form in tab 1 +agent-browser tab 0 +agent-browser snapshot -i +# agent-browser fill @e1 "Form 1 Data" +# agent-browser click @e2 + +# Fill form in tab 2 +agent-browser tab 1 +agent-browser snapshot -i +# agent-browser fill @e1 "Form 2 Data" +# agent-browser click @e2 + +# ============================================ +# Example 5: Open Multiple Tabs Programmatically +# ============================================ +echo "Opening multiple tabs..." + +URLS=( + "https://example.com/page1" + "https://example.com/page2" + "https://example.com/page3" +) + +# Uncomment to use: +# for url in "${URLS[@]}"; do +# agent-browser tab new "$url" +# agent-browser wait --load networkidle +# done + +# Process each tab +# for i in "${!URLS[@]}"; do +# agent-browser tab "$i" +# agent-browser screenshot "$OUTPUT_DIR/page-$i.png" +# done + +# ============================================ +# Tab Management +# ============================================ +echo "Managing tabs..." + +# List all tabs +agent-browser tab + +# Close specific tab by index +# agent-browser tab close 2 + +# Close current tab +# agent-browser tab close + +# ============================================ +# Cleanup +# ============================================ +echo "Cleaning up..." + +# Close all tabs (closes browser) +agent-browser close + +echo "Multi-tab workflow complete" +echo "Output saved to: $OUTPUT_DIR" +ls -la "$OUTPUT_DIR" diff --git a/skills/agent-browser/templates/network-mocking.sh b/skills/agent-browser/templates/network-mocking.sh new file mode 100755 index 00000000..d9adb613 --- /dev/null +++ b/skills/agent-browser/templates/network-mocking.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# Template: Network Mocking Workflow +# Mock API responses for testing UI states, error handling, and edge cases + +set -euo pipefail + +URL="${1:?Usage: $0 }" +SCREENSHOT_DIR="${2:-./mock-screenshots}" + +mkdir -p "$SCREENSHOT_DIR" + +echo "Starting network mocking workflow for: $URL" + +# ============================================ +# Example 1: Mock Empty State +# ============================================ +echo "Testing empty state..." + +# Mock API to return empty data +agent-browser network route "https://api.example.com/items" \ + --body '{"items": [], "total": 0}' + +agent-browser open "$URL" +agent-browser wait --load networkidle +agent-browser screenshot "$SCREENSHOT_DIR/empty-state.png" + +# Clear route for next test +agent-browser network unroute + +# ============================================ +# Example 2: Mock Error Response +# ============================================ +echo "Testing error handling..." + +# Mock 500 server error +agent-browser network route "https://api.example.com/items" \ + --body '{"error": "Internal server error"}' --status 500 + +agent-browser reload +agent-browser wait --load networkidle +agent-browser snapshot -i +agent-browser screenshot "$SCREENSHOT_DIR/error-state.png" + +# Verify error UI is shown +# agent-browser find text "Something went wrong" is visible + +agent-browser network unroute + +# ============================================ +# Example 3: Mock Loading State (block request) +# ============================================ +echo "Testing loading state..." + +# Block API to keep UI in loading state +agent-browser network route "https://api.example.com/items" --abort + +agent-browser reload +agent-browser wait 1000 # Brief wait to see loading state +agent-browser screenshot "$SCREENSHOT_DIR/loading-state.png" + +agent-browser network unroute + +# ============================================ +# Example 4: Mock Success with Data +# ============================================ +echo "Testing populated state..." + +# Mock successful response with sample data +agent-browser network route "https://api.example.com/items" \ + --body '{ + "items": [ + {"id": 1, "name": "Test Item 1", "status": "active"}, + {"id": 2, "name": "Test Item 2", "status": "pending"}, + {"id": 3, "name": "Test Item 3", "status": "completed"} + ], + "total": 3 + }' + +agent-browser reload +agent-browser wait --load networkidle +agent-browser snapshot -i +agent-browser screenshot "$SCREENSHOT_DIR/populated-state.png" + +agent-browser network unroute + +# ============================================ +# Example 5: Block Analytics/Tracking +# ============================================ +echo "Blocking analytics for clean testing..." + +agent-browser network route "**/google-analytics.com/**" --abort +agent-browser network route "**/facebook.com/tr**" --abort +agent-browser network route "**/hotjar.com/**" --abort +agent-browser network route "**/segment.com/**" --abort + +# Continue with automation (analytics won't interfere) +agent-browser reload +agent-browser wait --load networkidle + +# ============================================ +# Example 6: View Tracked Requests +# ============================================ +echo "Viewing network requests..." +agent-browser network requests + +# Cleanup +agent-browser network unroute +agent-browser close + +echo "Network mocking workflow complete" +echo "Screenshots saved to: $SCREENSHOT_DIR" +ls -la "$SCREENSHOT_DIR" diff --git a/test/skills.test.ts b/test/skills.test.ts new file mode 100644 index 00000000..f464e809 --- /dev/null +++ b/test/skills.test.ts @@ -0,0 +1,424 @@ +import { describe, it, expect, beforeAll } from 'vitest'; +import { readFileSync, readdirSync, existsSync } from 'fs'; +import { join } from 'path'; +import { execSync } from 'child_process'; + +const SKILLS_DIR = join(__dirname, '../skills/agent-browser'); +const SKILL_MD = join(SKILLS_DIR, 'SKILL.md'); +const REFERENCES_DIR = join(SKILLS_DIR, 'references'); +const TEMPLATES_DIR = join(SKILLS_DIR, 'templates'); + +describe('Skills Documentation', () => { + describe('SKILL.md', () => { + let content: string; + + beforeAll(() => { + content = readFileSync(SKILL_MD, 'utf-8'); + }); + + it('should exist', () => { + expect(existsSync(SKILL_MD)).toBe(true); + }); + + it('should have valid YAML frontmatter', () => { + const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/); + expect(frontmatterMatch).not.toBeNull(); + + const frontmatter = frontmatterMatch![1]; + expect(frontmatter).toContain('name:'); + expect(frontmatter).toContain('description:'); + expect(frontmatter).toContain('allowed-tools:'); + }); + + it('should have required sections', () => { + const requiredSections = [ + '## Quick start', + '## Core workflow', + '## Commands', + '## Global options', + '## Environment variables', + '## Deep-dive documentation', + '## Ready-to-use templates', + ]; + + for (const section of requiredSections) { + expect(content).toContain(section); + } + }); + + it('should have valid internal reference links', () => { + const linkPattern = /\[.*?\]\((references\/[^)]+\.md)\)/g; + const links = [...content.matchAll(linkPattern)].map((m) => m[1]); + + for (const link of links) { + const fullPath = join(SKILLS_DIR, link); + expect(existsSync(fullPath), `Missing reference: ${link}`).toBe(true); + } + }); + + it('should have valid internal template links', () => { + const linkPattern = /\[.*?\]\((templates\/[^)]+\.sh)\)/g; + const links = [...content.matchAll(linkPattern)].map((m) => m[1]); + + for (const link of links) { + const fullPath = join(SKILLS_DIR, link); + expect(existsSync(fullPath), `Missing template: ${link}`).toBe(true); + } + }); + + it('should have properly formatted code blocks', () => { + const codeBlockPattern = /```(\w+)?\n[\s\S]*?```/g; + const codeBlocks = content.match(codeBlockPattern) || []; + + expect(codeBlocks.length).toBeGreaterThan(0); + + // Check that code blocks have language specifiers + const blocksWithLang = codeBlocks.filter((block) => block.startsWith('```bash')); + expect(blocksWithLang.length).toBeGreaterThan(10); // Most should be bash + }); + + it('should document all major command categories', () => { + const commandCategories = [ + '### Navigation', + '### Snapshot', + '### Interactions', + '### Get information', + '### Screenshots', + '### Wait', + '### Cookies & Storage', + '### Network', + '### Tabs & Windows', + ]; + + for (const category of commandCategories) { + expect(content).toContain(category); + } + }); + }); + + describe('Reference Documents', () => { + let referenceFiles: string[]; + + beforeAll(() => { + referenceFiles = readdirSync(REFERENCES_DIR).filter((f: string) => f.endsWith('.md')); + }); + + it('should have reference documents', () => { + expect(referenceFiles.length).toBeGreaterThan(0); + }); + + it('should have expected reference files', () => { + const expectedRefs = [ + 'snapshot-refs.md', + 'session-management.md', + 'authentication.md', + 'video-recording.md', + 'proxy-support.md', + 'persistent-profiles.md', + 'cloud-providers.md', + 'semantic-locators.md', + 'network-mocking.md', + 'debugging.md', + ]; + + for (const ref of expectedRefs) { + expect(referenceFiles, `Missing reference: ${ref}`).toContain(ref); + } + }); + + it('each reference should have a title heading', () => { + for (const file of referenceFiles) { + const content = readFileSync(join(REFERENCES_DIR, file), 'utf-8'); + expect(content.startsWith('# '), `${file} missing title`).toBe(true); + } + }); + + it('each reference should have code examples', () => { + for (const file of referenceFiles) { + const content = readFileSync(join(REFERENCES_DIR, file), 'utf-8'); + expect(content, `${file} missing code blocks`).toContain('```'); + } + }); + + it('references should not have broken internal links', () => { + for (const file of referenceFiles) { + const content = readFileSync(join(REFERENCES_DIR, file), 'utf-8'); + const linkPattern = /\[.*?\]\((?!http)([^)]+)\)/g; + const links = [...content.matchAll(linkPattern)].map((m) => m[1]); + + for (const link of links) { + // Skip anchor links + if (link.startsWith('#')) continue; + + const fullPath = join(REFERENCES_DIR, link); + expect(existsSync(fullPath), `${file}: broken link ${link}`).toBe(true); + } + } + }); + }); + + describe('Template Scripts', () => { + let templateFiles: string[]; + + beforeAll(() => { + templateFiles = readdirSync(TEMPLATES_DIR).filter((f: string) => f.endsWith('.sh')); + }); + + it('should have template scripts', () => { + expect(templateFiles.length).toBeGreaterThan(0); + }); + + it('should have expected template files', () => { + const expectedTemplates = [ + 'form-automation.sh', + 'authenticated-session.sh', + 'capture-workflow.sh', + 'download-workflow.sh', + 'network-mocking.sh', + 'multi-tab-workflow.sh', + ]; + + for (const template of expectedTemplates) { + expect(templateFiles, `Missing template: ${template}`).toContain(template); + } + }); + + it('each template should have a shebang', () => { + for (const file of templateFiles) { + const content = readFileSync(join(TEMPLATES_DIR, file), 'utf-8'); + expect(content.startsWith('#!/bin/bash'), `${file} missing shebang`).toBe(true); + } + }); + + it('each template should have a description comment', () => { + for (const file of templateFiles) { + const content = readFileSync(join(TEMPLATES_DIR, file), 'utf-8'); + expect(content, `${file} missing description`).toContain('# Template:'); + } + }); + + it('each template should use set -euo pipefail', () => { + for (const file of templateFiles) { + const content = readFileSync(join(TEMPLATES_DIR, file), 'utf-8'); + expect(content, `${file} missing strict mode`).toContain('set -euo pipefail'); + } + }); + + it('each template should have valid bash syntax', () => { + for (const file of templateFiles) { + const fullPath = join(TEMPLATES_DIR, file); + try { + execSync(`bash -n "${fullPath}"`, { encoding: 'utf-8' }); + } catch (error) { + throw new Error(`${file} has invalid bash syntax: ${error}`); + } + } + }); + + it('each template should be executable', () => { + for (const file of templateFiles) { + const fullPath = join(TEMPLATES_DIR, file); + try { + const stats = execSync(`ls -l "${fullPath}"`, { encoding: 'utf-8' }); + expect(stats, `${file} not executable`).toMatch(/^-rwx/); + } catch { + // Skip if ls fails + } + } + }); + + it('each template should have usage documentation', () => { + for (const file of templateFiles) { + const content = readFileSync(join(TEMPLATES_DIR, file), 'utf-8'); + // Should have either Usage: comment or parameter extraction + const hasUsage = content.includes('Usage:') || content.includes('${1:'); + expect(hasUsage, `${file} missing usage docs`).toBe(true); + } + }); + }); + + describe('Command Documentation Coverage', () => { + let skillContent: string; + + beforeAll(() => { + skillContent = readFileSync(SKILL_MD, 'utf-8'); + }); + + it('should document core navigation commands', () => { + const navCommands = ['open', 'back', 'forward', 'reload', 'close', 'connect']; + for (const cmd of navCommands) { + expect(skillContent, `Missing nav command: ${cmd}`).toContain(`agent-browser ${cmd}`); + } + }); + + it('should document interaction commands', () => { + const interactionCommands = [ + 'click', + 'dblclick', + 'fill', + 'type', + 'press', + 'hover', + 'check', + 'uncheck', + 'select', + 'scroll', + 'drag', + 'upload', + ]; + for (const cmd of interactionCommands) { + expect(skillContent, `Missing interaction: ${cmd}`).toContain(`agent-browser ${cmd}`); + } + }); + + it('should document get commands', () => { + const getCommands = ['text', 'html', 'value', 'attr', 'title', 'url', 'count', 'box', 'styles']; + for (const cmd of getCommands) { + expect(skillContent, `Missing get ${cmd}`).toContain(`get ${cmd}`); + } + }); + + it('should document wait options', () => { + const waitOptions = ['--text', '--url', '--load', '--fn', '--download']; + for (const opt of waitOptions) { + expect(skillContent, `Missing wait option: ${opt}`).toContain(`wait ${opt}`); + } + }); + + it('should document global flags', () => { + const globalFlags = [ + '--session', + '--profile', + '--json', + '--headed', + '--cdp', + '--proxy', + '--proxy-bypass', + '--args', + '--user-agent', + '--headers', + '--extension', + ]; + for (const flag of globalFlags) { + expect(skillContent, `Missing global flag: ${flag}`).toContain(flag); + } + }); + + it('should document environment variables', () => { + const envVars = [ + 'AGENT_BROWSER_SESSION', + 'AGENT_BROWSER_PROFILE', + 'AGENT_BROWSER_PROVIDER', + 'AGENT_BROWSER_PROXY', + 'AGENT_BROWSER_ARGS', + 'AGENT_BROWSER_USER_AGENT', + ]; + for (const envVar of envVars) { + expect(skillContent, `Missing env var: ${envVar}`).toContain(envVar); + } + }); + }); +}); + +describe('Skills Integration', () => { + describe('CLI Command Existence', () => { + it('agent-browser CLI should be available', () => { + try { + const output = execSync('agent-browser --version', { encoding: 'utf-8' }); + expect(output).toMatch(/\d+\.\d+\.\d+/); + } catch { + // Skip if CLI not installed (CI may not have it) + console.log('Skipping: agent-browser CLI not available'); + } + }); + + it('agent-browser --help should list core commands', () => { + try { + const output = execSync('agent-browser --help', { encoding: 'utf-8' }); + const coreCommands = ['open', 'click', 'snapshot', 'screenshot', 'close']; + for (const cmd of coreCommands) { + expect(output).toContain(cmd); + } + } catch { + // Skip if CLI not installed + console.log('Skipping: agent-browser CLI not available'); + } + }); + }); + + describe('Documented Commands Match CLI', () => { + let cliHelp: string | null = null; + let skillContent: string; + + beforeAll(() => { + skillContent = readFileSync(SKILL_MD, 'utf-8'); + try { + cliHelp = execSync('agent-browser --help', { encoding: 'utf-8' }); + } catch { + cliHelp = null; + } + }); + + it('documented commands should exist in CLI help', () => { + if (!cliHelp) { + console.log('Skipping: agent-browser CLI not available'); + return; + } + + // Extract command names from SKILL.md (first word after agent-browser) + const docCommands = new Set(); + const cmdPattern = /agent-browser\s+(\w+)/g; + let match: RegExpExecArray | null; + while ((match = cmdPattern.exec(skillContent)) !== null) { + // Skip flags, common words, and frontmatter fields + const cmd = match[1]; + const skipWords = ['browser', 'description', 'name', 'allowed']; + if (!cmd.startsWith('-') && !skipWords.includes(cmd)) { + docCommands.add(cmd); + } + } + + // Commands that are aliases or subcommands of documented commands + const aliases: Record = { + goto: 'open', + navigate: 'open', + quit: 'close', + exit: 'close', + key: 'press', + keydown: 'press', + keyup: 'press', + scrollinto: 'scroll', + scrollintoview: 'scroll', + geolocation: 'geo', + auth: 'credentials', + }; + + // Commands implemented but not shown in main help (advanced/subcommands) + const advancedCommands = new Set([ + 'download', // Implemented in Rust CLI but not in main help listing + 'waitfordownload', // Wait subcommand variant + ]); + + // Check each documented command exists in help + const missingFromHelp: string[] = []; + for (const cmd of docCommands) { + // Skip advanced commands that are implemented but not in main help + if (advancedCommands.has(cmd)) continue; + + // Some commands are subcommands or aliases, just check they're mentioned + const primaryCmd = aliases[cmd] || cmd; + const exists = cliHelp.toLowerCase().includes(primaryCmd.toLowerCase()); + + if (!exists) { + missingFromHelp.push(cmd); + } + } + + // Should have very few commands not in help (only advanced ones) + expect( + missingFromHelp.length, + `Commands documented but not in CLI help: ${missingFromHelp.join(', ')}` + ).toBeLessThanOrEqual(3); + }); + }); +});