diff --git a/.changeset/patch-add-block-domains-support.md b/.changeset/patch-add-block-domains-support.md new file mode 100644 index 0000000000..2dbf58226f --- /dev/null +++ b/.changeset/patch-add-block-domains-support.md @@ -0,0 +1,7 @@ +--- +"gh-aw": patch +--- + +Add domain blocklist support via `--block-domains` flag. + +This change adds support for specifying blocked domains in workflow frontmatter and passes the `--block-domains` flag to Copilot/Claude/Codex engines during compilation. Includes parser updates, unit and integration tests, and documentation updates. diff --git a/.changeset/patch-add-blocked-domains.md b/.changeset/patch-add-blocked-domains.md new file mode 100644 index 0000000000..00354588f1 --- /dev/null +++ b/.changeset/patch-add-blocked-domains.md @@ -0,0 +1,13 @@ +--- +"gh-aw": patch +--- + +Add domain blocklist support via the `--block-domains` flag and the +`blocked` frontmatter field. This enables specifying domains or ecosystem +identifiers to block in workflows and ensures the flag is only added when +blocked domains are present. + +Supported engines: Copilot, Claude, Codex. + +Ref: githubnext/gh-aw#9063 + diff --git a/.github/aw/schemas/agentic-workflow.json b/.github/aw/schemas/agentic-workflow.json index f5267da56e..7efbf15361 100644 --- a/.github/aw/schemas/agentic-workflow.json +++ b/.github/aw/schemas/agentic-workflow.json @@ -2038,6 +2038,15 @@ }, "$comment": "Empty array is valid and means deny all network access. Omit the field entirely or use network: defaults to use default network permissions." }, + "blocked": { + "type": "array", + "description": "List of blocked domains or ecosystem identifiers (e.g., 'python', 'node', 'tracker.example.com'). Blocked domains take precedence over allowed domains.", + "items": { + "type": "string", + "description": "Domain name or ecosystem identifier to block (supports wildcards like '*.example.com' and ecosystem names like 'python', 'node')" + }, + "$comment": "Blocked domains are subtracted from the allowed list. Useful for blocking specific domains or ecosystems within broader allowed categories." + }, "firewall": { "description": "AWF (Agent Workflow Firewall) configuration for network egress control. Only supported for Copilot engine.", "deprecated": true, diff --git a/docs/src/content/docs/reference/frontmatter-full.md b/docs/src/content/docs/reference/frontmatter-full.md index fd56515d19..027bedfe03 100644 --- a/docs/src/content/docs/reference/frontmatter-full.md +++ b/docs/src/content/docs/reference/frontmatter-full.md @@ -823,6 +823,13 @@ network: # Array of Domain name or ecosystem identifier (supports wildcards like # '*.example.com' and ecosystem names like 'python', 'node') + # List of blocked domains or ecosystem identifiers (e.g., 'python', 'node', + # 'tracker.example.com'). Blocked domains take precedence over allowed domains. + # (optional) + blocked: [] + # Array of Domain name or ecosystem identifier to block (supports wildcards like + # '*.example.com' and ecosystem names like 'python', 'node') + # Sandbox configuration for AI engines. Controls agent sandbox (AWF or Sandbox # Runtime) and MCP gateway. # (optional) diff --git a/docs/src/content/docs/reference/network.md b/docs/src/content/docs/reference/network.md index fa502636f2..37613e2393 100644 --- a/docs/src/content/docs/reference/network.md +++ b/docs/src/content/docs/reference/network.md @@ -39,9 +39,72 @@ network: # No network access network: {} + +# Block specific domains +network: + allowed: + - defaults # Basic infrastructure + - python # Python/PyPI ecosystem + blocked: + - "tracker.example.com" # Block specific tracking domain + - "analytics.example.com" # Block analytics + +# Block entire ecosystems +network: + allowed: + - defaults + - github + - node + blocked: + - python # Block Python/PyPI even if in defaults +``` + +## Blocking Domains + +Use the `blocked` field to block specific domains or ecosystems while allowing others. Blocked domains take precedence over allowed domains, enabling fine-grained control: + +```yaml wrap +# Block specific tracking/analytics domains +network: + allowed: + - defaults + - github + blocked: + - "tracker.example.com" + - "analytics.example.com" + +# Block entire ecosystem within broader allowed set +network: + allowed: + - defaults # Includes many ecosystems + blocked: + - python # Block Python/PyPI specifically + +# Combine domain and ecosystem blocking +network: + allowed: + - defaults + - github + - node + blocked: + - python # Block Python ecosystem + - "cdn.example.com" # Block specific CDN ``` -## Security Model +:::tip[When to Use Blocked Domains] +- **Privacy**: Block tracking and analytics domains while allowing legitimate services +- **Security**: Block known malicious or compromised domains +- **Compliance**: Enforce organizational network policies +- **Fine-grained control**: Allow broad ecosystem access but block specific problematic domains +::: + +**Key behaviors**: +- Blocked domains are subtracted from the allowed list +- Supports both individual domains and ecosystem identifiers +- Blocked domains include all subdomains (like allowed domains) +- Useful for blocking specific domains within broader ecosystem allowlists + +## Configuration Network permissions follow the principle of least privilege with four access levels: diff --git a/pkg/cli/workflows/example-blocked-domains.lock.yml b/pkg/cli/workflows/example-blocked-domains.lock.yml new file mode 100644 index 0000000000..8bbed06659 --- /dev/null +++ b/pkg/cli/workflows/example-blocked-domains.lock.yml @@ -0,0 +1,449 @@ +# +# ___ _ _ +# / _ \ | | (_) +# | |_| | __ _ ___ _ __ | |_ _ ___ +# | _ |/ _` |/ _ \ '_ \| __| |/ __| +# | | | | (_| | __/ | | | |_| | (__ +# \_| |_/\__, |\___|_| |_|\__|_|\___| +# __/ | +# _ _ |___/ +# | | | | / _| | +# | | | | ___ _ __ _ __| |_| | _____ ____ +# | |/\| |/ _ \ '__| |/ /| _| |/ _ \ \ /\ / / ___| +# \ /\ / (_) | | | | ( | | | | (_) \ V V /\__ \ +# \/ \/ \___/|_| |_|\_\|_| |_|\___/ \_/\_/ |___/ +# +# This file was automatically generated by gh-aw. DO NOT EDIT. +# +# To update this file, edit the corresponding .md file and run: +# gh aw compile +# For more information: https://github.com/githubnext/gh-aw/blob/main/.github/aw/github-agentic-workflows.md +# + +name: "Example: Blocked Domains" +"on": + workflow_dispatch: + +permissions: + contents: read + +concurrency: + group: "gh-aw-${{ github.workflow }}" + +run-name: "Example: Blocked Domains" + +jobs: + activation: + runs-on: ubuntu-slim + permissions: + contents: read + outputs: + comment_id: "" + comment_repo: "" + steps: + - name: Checkout actions folder + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: /tmp/gh-aw/actions + - name: Check workflow file timestamps + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + GH_AW_WORKFLOW_FILE: "example-blocked-domains.lock.yml" + with: + script: | + const { setupGlobals } = require('/tmp/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/tmp/gh-aw/actions/check_workflow_timestamp_api.cjs'); + await main(); + + agent: + needs: activation + runs-on: ubuntu-latest + permissions: + contents: read + concurrency: + group: "gh-aw-copilot-${{ github.workflow }}" + outputs: + model: ${{ steps.generate_aw_info.outputs.model }} + steps: + - name: Checkout actions folder + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + sparse-checkout: | + actions + persist-credentials: false + - name: Setup Scripts + uses: ./actions/setup + with: + destination: /tmp/gh-aw/actions + - name: Checkout repository + uses: actions/checkout@93cb6efe18208431cddfb8368fd83d5badbf9bfd # v5.0.1 + with: + persist-credentials: false + - name: Create gh-aw temp directory + run: bash /tmp/gh-aw/actions/create_gh_aw_tmp_dir.sh + - name: Configure Git credentials + env: + REPO_NAME: ${{ github.repository }} + SERVER_URL: ${{ github.server_url }} + run: | + git config --global user.email "github-actions[bot]@users.noreply.github.com" + git config --global user.name "github-actions[bot]" + # Re-authenticate git with GitHub token + SERVER_URL_STRIPPED="${SERVER_URL#https://}" + git remote set-url origin "https://x-access-token:${{ github.token }}@${SERVER_URL_STRIPPED}/${REPO_NAME}.git" + echo "Git configured with standard GitHub Actions identity" + - name: Checkout PR branch + if: | + github.event.pull_request + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + GH_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + with: + github-token: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + script: | + const { setupGlobals } = require('/tmp/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/tmp/gh-aw/actions/checkout_pr_branch.cjs'); + await main(); + - name: Validate COPILOT_GITHUB_TOKEN secret + run: /tmp/gh-aw/actions/validate_multi_secret.sh COPILOT_GITHUB_TOKEN GitHub Copilot CLI https://githubnext.github.io/gh-aw/reference/engines/#github-copilot-default + env: + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + - name: Install GitHub Copilot CLI + run: | + # Download official Copilot CLI installer script + curl -fsSL https://raw.githubusercontent.com/github/copilot-cli/main/install.sh -o /tmp/copilot-install.sh + + # Execute the installer with the specified version + export VERSION=0.0.374 && sudo bash /tmp/copilot-install.sh + + # Cleanup + rm -f /tmp/copilot-install.sh + + # Verify installation + copilot --version + - name: Install awf binary + run: | + echo "Installing awf via installer script (requested version: v0.8.1)" + curl -sSL https://raw.githubusercontent.com/githubnext/gh-aw-firewall/main/install.sh | sudo AWF_VERSION=v0.8.1 bash + which awf + awf --version + - name: Determine automatic lockdown mode for GitHub MCP server + id: determine-automatic-lockdown + env: + TOKEN_CHECK: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + if: env.TOKEN_CHECK != '' + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + script: | + const determineAutomaticLockdown = require('/tmp/gh-aw/actions/determine_automatic_lockdown.cjs'); + await determineAutomaticLockdown(github, context, core); + - name: Downloading container images + run: bash /tmp/gh-aw/actions/download_docker_images.sh ghcr.io/github/github-mcp-server:v0.27.0 + - name: Setup MCPs + env: + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + run: | + mkdir -p /tmp/gh-aw/mcp-config + mkdir -p /home/runner/.copilot + cat > /home/runner/.copilot/mcp-config.json << EOF + { + "mcpServers": { + "github": { + "type": "local", + "command": "docker", + "args": [ + "run", + "-i", + "--rm", + "-e", + "GITHUB_PERSONAL_ACCESS_TOKEN", + "-e", + "GITHUB_READ_ONLY=1", + "-e", + "GITHUB_LOCKDOWN_MODE=${{ steps.determine-automatic-lockdown.outputs.lockdown == 'true' && '1' || '0' }}", + "-e", + "GITHUB_TOOLSETS=context,repos,issues,pull_requests", + "ghcr.io/github/github-mcp-server:v0.27.0" + ], + "tools": ["*"], + "env": { + "GITHUB_PERSONAL_ACCESS_TOKEN": "\${GITHUB_MCP_SERVER_TOKEN}" + } + } + } + } + EOF + echo "-------START MCP CONFIG-----------" + cat /home/runner/.copilot/mcp-config.json + echo "-------END MCP CONFIG-----------" + echo "-------/home/runner/.copilot-----------" + find /home/runner/.copilot + echo "HOME: $HOME" + echo "GITHUB_COPILOT_CLI_MODE: $GITHUB_COPILOT_CLI_MODE" + - name: Generate agentic run info + id: generate_aw_info + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + script: | + const fs = require('fs'); + + const awInfo = { + engine_id: "copilot", + engine_name: "GitHub Copilot CLI", + model: process.env.GH_AW_MODEL_AGENT_COPILOT || "", + version: "", + agent_version: "0.0.374", + workflow_name: "Example: Blocked Domains", + experimental: false, + supports_tools_allowlist: true, + supports_http_transport: true, + run_id: context.runId, + run_number: context.runNumber, + run_attempt: process.env.GITHUB_RUN_ATTEMPT, + repository: context.repo.owner + '/' + context.repo.repo, + ref: context.ref, + sha: context.sha, + actor: context.actor, + event_name: context.eventName, + staged: false, + network_mode: "defaults", + allowed_domains: ["defaults","github","node"], + firewall_enabled: true, + awf_version: "v0.8.1", + steps: { + firewall: "squid" + }, + created_at: new Date().toISOString() + }; + + // Write to /tmp/gh-aw directory to avoid inclusion in PR + const tmpPath = '/tmp/gh-aw/aw_info.json'; + fs.writeFileSync(tmpPath, JSON.stringify(awInfo, null, 2)); + console.log('Generated aw_info.json at:', tmpPath); + console.log(JSON.stringify(awInfo, null, 2)); + + // Set model as output for reuse in other steps/jobs + core.setOutput('model', awInfo.model); + - name: Generate workflow overview + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + script: | + const { generateWorkflowOverview } = require('/tmp/gh-aw/actions/generate_workflow_overview.cjs'); + await generateWorkflowOverview(core); + - name: Create prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: | + bash /tmp/gh-aw/actions/create_prompt_first.sh + cat << 'PROMPT_EOF' > "$GH_AW_PROMPT" + # Example: Blocked Domains + + This workflow demonstrates using the `blocked` field in network configuration to block specific domains while allowing others. + + The workflow allows access to: + - Basic infrastructure (`defaults`) + - GitHub domains (`github`) + - Node.js/NPM ecosystem (`node`) + + But explicitly blocks: + - `tracker.example.com` (tracking domain) + - `analytics.example.com` (analytics domain) + + Blocked domains take precedence over allowed domains, providing fine-grained control over network access. + + PROMPT_EOF + - name: Append XPIA security instructions to prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: | + cat "/tmp/gh-aw/prompts/xpia_prompt.md" >> "$GH_AW_PROMPT" + - name: Append temporary folder instructions to prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: | + cat "/tmp/gh-aw/prompts/temp_folder_prompt.md" >> "$GH_AW_PROMPT" + - name: Append GitHub context to prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + run: | + cat << 'PROMPT_EOF' >> "$GH_AW_PROMPT" + + The following GitHub context information is available for this workflow: + {{#if __GH_AW_GITHUB_ACTOR__ }} + - **actor**: __GH_AW_GITHUB_ACTOR__ + {{/if}} + {{#if __GH_AW_GITHUB_REPOSITORY__ }} + - **repository**: __GH_AW_GITHUB_REPOSITORY__ + {{/if}} + {{#if __GH_AW_GITHUB_WORKSPACE__ }} + - **workspace**: __GH_AW_GITHUB_WORKSPACE__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ }} + - **issue-number**: #__GH_AW_GITHUB_EVENT_ISSUE_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ }} + - **discussion-number**: #__GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ }} + - **pull-request-number**: #__GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER__ + {{/if}} + {{#if __GH_AW_GITHUB_EVENT_COMMENT_ID__ }} + - **comment-id**: __GH_AW_GITHUB_EVENT_COMMENT_ID__ + {{/if}} + {{#if __GH_AW_GITHUB_RUN_ID__ }} + - **workflow-run-id**: __GH_AW_GITHUB_RUN_ID__ + {{/if}} + + + PROMPT_EOF + - name: Substitute placeholders + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GH_AW_GITHUB_ACTOR: ${{ github.actor }} + GH_AW_GITHUB_EVENT_COMMENT_ID: ${{ github.event.comment.id }} + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: ${{ github.event.discussion.number }} + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: ${{ github.event.issue.number }} + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: ${{ github.event.pull_request.number }} + GH_AW_GITHUB_REPOSITORY: ${{ github.repository }} + GH_AW_GITHUB_RUN_ID: ${{ github.run_id }} + GH_AW_GITHUB_WORKSPACE: ${{ github.workspace }} + with: + script: | + const substitutePlaceholders = require('/tmp/gh-aw/actions/substitute_placeholders.cjs'); + + // Call the substitution function + return await substitutePlaceholders({ + file: process.env.GH_AW_PROMPT, + substitutions: { + GH_AW_GITHUB_ACTOR: process.env.GH_AW_GITHUB_ACTOR, + GH_AW_GITHUB_EVENT_COMMENT_ID: process.env.GH_AW_GITHUB_EVENT_COMMENT_ID, + GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER: process.env.GH_AW_GITHUB_EVENT_DISCUSSION_NUMBER, + GH_AW_GITHUB_EVENT_ISSUE_NUMBER: process.env.GH_AW_GITHUB_EVENT_ISSUE_NUMBER, + GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER: process.env.GH_AW_GITHUB_EVENT_PULL_REQUEST_NUMBER, + GH_AW_GITHUB_REPOSITORY: process.env.GH_AW_GITHUB_REPOSITORY, + GH_AW_GITHUB_RUN_ID: process.env.GH_AW_GITHUB_RUN_ID, + GH_AW_GITHUB_WORKSPACE: process.env.GH_AW_GITHUB_WORKSPACE + } + }); + - name: Interpolate variables and render templates + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + with: + script: | + const { setupGlobals } = require('/tmp/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/tmp/gh-aw/actions/interpolate_prompt.cjs'); + await main(); + - name: Print prompt + env: + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + run: bash /tmp/gh-aw/actions/print_prompt_summary.sh + - name: Execute GitHub Copilot CLI + id: agentic_execution + # Copilot CLI tool arguments (sorted): + # --allow-tool github + timeout-minutes: 20 + run: | + set -o pipefail + sudo -E awf --env-all --container-workdir "${GITHUB_WORKSPACE}" --mount /tmp:/tmp:rw --mount "${GITHUB_WORKSPACE}:${GITHUB_WORKSPACE}:rw" --mount /usr/bin/date:/usr/bin/date:ro --mount /usr/bin/gh:/usr/bin/gh:ro --mount /usr/bin/yq:/usr/bin/yq:ro --mount /usr/local/bin/copilot:/usr/local/bin/copilot:ro --mount /home/runner/.copilot:/home/runner/.copilot:rw --allow-domains '*.githubusercontent.com,api.business.githubcopilot.com,api.enterprise.githubcopilot.com,api.github.com,api.githubcopilot.com,api.individual.githubcopilot.com,api.npms.io,api.snapcraft.io,archive.ubuntu.com,azure.archive.ubuntu.com,bun.sh,codeload.github.com,crl.geotrust.com,crl.globalsign.com,crl.identrust.com,crl.sectigo.com,crl.thawte.com,crl.usertrust.com,crl.verisign.com,crl3.digicert.com,crl4.digicert.com,crls.ssl.com,deb.nodesource.com,deno.land,get.pnpm.io,github-cloud.githubusercontent.com,github-cloud.s3.amazonaws.com,github.com,github.githubassets.com,host.docker.internal,json-schema.org,json.schemastore.org,keyserver.ubuntu.com,lfs.github.com,nodejs.org,npm.pkg.github.com,npmjs.com,npmjs.org,objects.githubusercontent.com,ocsp.digicert.com,ocsp.geotrust.com,ocsp.globalsign.com,ocsp.identrust.com,ocsp.sectigo.com,ocsp.ssl.com,ocsp.thawte.com,ocsp.usertrust.com,ocsp.verisign.com,packagecloud.io,packages.cloud.google.com,packages.microsoft.com,ppa.launchpad.net,raw.githubusercontent.com,registry.bower.io,registry.npmjs.com,registry.npmjs.org,registry.yarnpkg.com,repo.yarnpkg.com,s.symcb.com,s.symcd.com,security.ubuntu.com,skimdb.npmjs.com,ts-crl.ws.symantec.com,ts-ocsp.ws.symantec.com,www.npmjs.com,www.npmjs.org,yarnpkg.com' --log-level info --proxy-logs-dir /tmp/gh-aw/sandbox/firewall/logs --image-tag 0.8.1 \ + -- /usr/local/bin/copilot --add-dir /tmp/gh-aw/ --log-level all --log-dir /tmp/gh-aw/sandbox/agent/logs/ --add-dir "${GITHUB_WORKSPACE}" --disable-builtin-mcps --allow-tool github --prompt "$(cat /tmp/gh-aw/aw-prompts/prompt.txt)"${GH_AW_MODEL_DETECTION_COPILOT:+ --model "$GH_AW_MODEL_DETECTION_COPILOT"} \ + 2>&1 | tee /tmp/gh-aw/agent-stdio.log + env: + COPILOT_AGENT_RUNNER_TYPE: STANDALONE + COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + GH_AW_MCP_CONFIG: /home/runner/.copilot/mcp-config.json + GH_AW_MODEL_DETECTION_COPILOT: ${{ vars.GH_AW_MODEL_DETECTION_COPILOT || '' }} + GH_AW_PROMPT: /tmp/gh-aw/aw-prompts/prompt.txt + GITHUB_HEAD_REF: ${{ github.head_ref }} + GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN || secrets.GH_AW_GITHUB_TOKEN || secrets.GITHUB_TOKEN }} + GITHUB_REF_NAME: ${{ github.ref_name }} + GITHUB_STEP_SUMMARY: ${{ env.GITHUB_STEP_SUMMARY }} + GITHUB_WORKSPACE: ${{ github.workspace }} + XDG_CONFIG_HOME: /home/runner + - name: Redact secrets in logs + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + script: | + const { setupGlobals } = require('/tmp/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/tmp/gh-aw/actions/redact_secrets.cjs'); + await main(); + env: + GH_AW_SECRET_NAMES: 'COPILOT_GITHUB_TOKEN,GH_AW_GITHUB_MCP_SERVER_TOKEN,GH_AW_GITHUB_TOKEN,GITHUB_TOKEN' + SECRET_COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }} + SECRET_GH_AW_GITHUB_MCP_SERVER_TOKEN: ${{ secrets.GH_AW_GITHUB_MCP_SERVER_TOKEN }} + SECRET_GH_AW_GITHUB_TOKEN: ${{ secrets.GH_AW_GITHUB_TOKEN }} + SECRET_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + - name: Upload engine output files + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: agent_outputs + path: | + /tmp/gh-aw/sandbox/agent/logs/ + /tmp/gh-aw/redacted-urls.log + if-no-files-found: ignore + - name: Parse agent logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ + with: + script: | + const { setupGlobals } = require('/tmp/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/tmp/gh-aw/actions/parse_copilot_log.cjs'); + await main(); + - name: Parse firewall logs for step summary + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + with: + script: | + const { setupGlobals } = require('/tmp/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/tmp/gh-aw/actions/parse_firewall_logs.cjs'); + await main(); + - name: Validate agent logs for errors + if: always() + uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0 + env: + GH_AW_AGENT_OUTPUT: /tmp/gh-aw/sandbox/agent/logs/ + GH_AW_ERROR_PATTERNS: "[{\"id\":\"\",\"pattern\":\"::(error)(?:\\\\s+[^:]*)?::(.+)\",\"level_group\":1,\"message_group\":2,\"description\":\"GitHub Actions workflow command - error\"},{\"id\":\"\",\"pattern\":\"::(warning)(?:\\\\s+[^:]*)?::(.+)\",\"level_group\":1,\"message_group\":2,\"description\":\"GitHub Actions workflow command - warning\"},{\"id\":\"\",\"pattern\":\"::(notice)(?:\\\\s+[^:]*)?::(.+)\",\"level_group\":1,\"message_group\":2,\"description\":\"GitHub Actions workflow command - notice\"},{\"id\":\"\",\"pattern\":\"(ERROR|Error):\\\\s+(.+)\",\"level_group\":1,\"message_group\":2,\"description\":\"Generic ERROR messages\"},{\"id\":\"\",\"pattern\":\"(WARNING|Warning):\\\\s+(.+)\",\"level_group\":1,\"message_group\":2,\"description\":\"Generic WARNING messages\"},{\"id\":\"\",\"pattern\":\"(\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}Z)\\\\s+\\\\[(ERROR)\\\\]\\\\s+(.+)\",\"level_group\":2,\"message_group\":3,\"description\":\"Copilot CLI timestamped ERROR messages\"},{\"id\":\"\",\"pattern\":\"(\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}Z)\\\\s+\\\\[(WARN|WARNING)\\\\]\\\\s+(.+)\",\"level_group\":2,\"message_group\":3,\"description\":\"Copilot CLI timestamped WARNING messages\"},{\"id\":\"\",\"pattern\":\"\\\\[(\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}Z)\\\\]\\\\s+(CRITICAL|ERROR):\\\\s+(.+)\",\"level_group\":2,\"message_group\":3,\"description\":\"Copilot CLI bracketed critical/error messages with timestamp\"},{\"id\":\"\",\"pattern\":\"\\\\[(\\\\d{4}-\\\\d{2}-\\\\d{2}T\\\\d{2}:\\\\d{2}:\\\\d{2}\\\\.\\\\d{3}Z)\\\\]\\\\s+(WARNING):\\\\s+(.+)\",\"level_group\":2,\"message_group\":3,\"description\":\"Copilot CLI bracketed warning messages with timestamp\"},{\"id\":\"\",\"pattern\":\"✗\\\\s+(.+)\",\"level_group\":0,\"message_group\":1,\"description\":\"Copilot CLI failed command indicator\"},{\"id\":\"\",\"pattern\":\"(?:command not found|not found):\\\\s*(.+)|(.+):\\\\s*(?:command not found|not found)\",\"level_group\":0,\"message_group\":0,\"description\":\"Shell command not found error\"},{\"id\":\"\",\"pattern\":\"Cannot find module\\\\s+['\\\"](.+)['\\\"]\",\"level_group\":0,\"message_group\":1,\"description\":\"Node.js module not found error\"},{\"id\":\"\",\"pattern\":\"Permission denied and could not request permission from user\",\"level_group\":0,\"message_group\":0,\"description\":\"Copilot CLI permission denied warning (user interaction required)\"},{\"id\":\"\",\"pattern\":\"\\\\berror\\\\b.*permission.*denied\",\"level_group\":0,\"message_group\":0,\"description\":\"Permission denied error (requires error context)\"},{\"id\":\"\",\"pattern\":\"\\\\berror\\\\b.*unauthorized\",\"level_group\":0,\"message_group\":0,\"description\":\"Unauthorized access error (requires error context)\"},{\"id\":\"\",\"pattern\":\"\\\\berror\\\\b.*forbidden\",\"level_group\":0,\"message_group\":0,\"description\":\"Forbidden access error (requires error context)\"}]" + with: + script: | + const { setupGlobals } = require('/tmp/gh-aw/actions/setup_globals.cjs'); + setupGlobals(core, github, context, exec, io); + const { main } = require('/tmp/gh-aw/actions/validate_errors.cjs'); + await main(); + - name: Upload agent artifacts + if: always() + continue-on-error: true + uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0 + with: + name: agent-artifacts + path: | + /tmp/gh-aw/aw-prompts/prompt.txt + /tmp/gh-aw/aw_info.json + /tmp/gh-aw/mcp-logs/ + /tmp/gh-aw/sandbox/firewall/logs/ + /tmp/gh-aw/agent-stdio.log + if-no-files-found: ignore + diff --git a/pkg/cli/workflows/example-blocked-domains.md b/pkg/cli/workflows/example-blocked-domains.md new file mode 100644 index 0000000000..5dc3edf475 --- /dev/null +++ b/pkg/cli/workflows/example-blocked-domains.md @@ -0,0 +1,30 @@ +--- +engine: copilot +on: + workflow_dispatch: + +network: + firewall: true + allowed: + - defaults + - github + - node + blocked: + - tracker.example.com + - analytics.example.com +--- + +# Example: Blocked Domains + +This workflow demonstrates using the `blocked` field in network configuration to block specific domains while allowing others. + +The workflow allows access to: +- Basic infrastructure (`defaults`) +- GitHub domains (`github`) +- Node.js/NPM ecosystem (`node`) + +But explicitly blocks: +- `tracker.example.com` (tracking domain) +- `analytics.example.com` (analytics domain) + +Blocked domains take precedence over allowed domains, providing fine-grained control over network access. diff --git a/pkg/cli/workflows/test-claude-blocked-domains.md b/pkg/cli/workflows/test-claude-blocked-domains.md new file mode 100644 index 0000000000..ee6db9d812 --- /dev/null +++ b/pkg/cli/workflows/test-claude-blocked-domains.md @@ -0,0 +1,63 @@ +--- +description: Smoke test for blocked domains with Claude engine +on: + workflow_dispatch: + pull_request: + types: [labeled] + names: ["smoke"] +permissions: + contents: read + issues: read + pull-requests: read +name: Smoke Blocked Domains Claude +engine: claude +network: + firewall: true + allowed: + - defaults + - github + blocked: + - npmjs.org + - registry.npmjs.org +safe-outputs: + add-comment: + hide-older-comments: true + create-issue: + expires: 2h + add-labels: + allowed: [smoke-blocked-domains-claude] + messages: + footer: "> 🚫 *Blocked domains tested by [{workflow_name}]({run_url})*" + run-started: "🚫 Testing blocked domains... [{workflow_name}]({run_url}) is validating domain blocking for {event_type}..." + run-success: "✅ Blocked domains test complete... [{workflow_name}]({run_url}) confirmed domain blocking is operational. 🛡️" + run-failure: "❌ Blocked domains test failed... [{workflow_name}]({run_url}) {status}. Domain blocking may not be working correctly." +timeout-minutes: 5 +tools: + github: + bash: + - "*" +--- + +# Smoke Test: Blocked Domains with Claude + +**IMPORTANT: Keep all outputs extremely short and concise. Use single-line responses where possible.** + +## Test Requirements + +This workflow validates that the blocked domains feature works correctly with the Claude engine and AWF firewall. + +1. **Allowed Domain Testing**: Test that GitHub domains (allowed) are accessible using bash tools - this should succeed +2. **Blocked Domain Testing**: Attempt to access NPM domains (blocked) using `curl https://registry.npmjs.org` - this should FAIL or be blocked by the firewall +3. **Blocked Ecosystem Testing**: Verify that `npmjs.org` is also blocked (part of Node ecosystem blocking) +4. **GitHub MCP Testing**: Verify GitHub MCP server works (allowed domains should not affect GitHub toolset functionality) +5. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-blocked-domains-claude-${{ github.run_id }}.txt` with content "Blocked domains test for Claude run ${{ github.run_id }}" + +## Output + +Add a **very brief** comment (max 5-10 lines) to the current pull request with: +- ✅ or ❌ for each test result +- List which domains were blocked successfully +- List which domains were allowed successfully +- Overall status: PASS or FAIL + +If all tests pass (GitHub allowed, NPM blocked), add the label `smoke-blocked-domains-claude` to the pull request. diff --git a/pkg/cli/workflows/test-codex-blocked-domains.md b/pkg/cli/workflows/test-codex-blocked-domains.md new file mode 100644 index 0000000000..8eecbc5329 --- /dev/null +++ b/pkg/cli/workflows/test-codex-blocked-domains.md @@ -0,0 +1,63 @@ +--- +description: Smoke test for blocked domains with Codex engine +on: + workflow_dispatch: + pull_request: + types: [labeled] + names: ["smoke"] +permissions: + contents: read + issues: read + pull-requests: read +name: Smoke Blocked Domains Codex +engine: codex +network: + firewall: true + allowed: + - defaults + - github + blocked: + - tracker.example.com + - analytics.example.com +safe-outputs: + add-comment: + hide-older-comments: true + create-issue: + expires: 2h + add-labels: + allowed: [smoke-blocked-domains-codex] + messages: + footer: "> 🚫 *Blocked domains tested by [{workflow_name}]({run_url})*" + run-started: "🚫 Testing blocked domains... [{workflow_name}]({run_url}) is validating domain blocking for {event_type}..." + run-success: "✅ Blocked domains test complete... [{workflow_name}]({run_url}) confirmed domain blocking is operational. 🛡️" + run-failure: "❌ Blocked domains test failed... [{workflow_name}]({run_url}) {status}. Domain blocking may not be working correctly." +timeout-minutes: 5 +tools: + github: + bash: + - "*" +--- + +# Smoke Test: Blocked Domains with Codex + +**IMPORTANT: Keep all outputs extremely short and concise. Use single-line responses where possible.** + +## Test Requirements + +This workflow validates that the blocked domains feature works correctly with the Codex engine and AWF firewall. + +1. **Allowed Domain Testing**: Test that GitHub domains (allowed) are accessible - verify GitHub API access works +2. **Blocked Domain Testing**: Attempt to access explicitly blocked domains using `curl https://tracker.example.com` - this should FAIL or be blocked by the firewall +3. **Multiple Blocked Domains**: Verify that both `tracker.example.com` and `analytics.example.com` are blocked +4. **GitHub MCP Testing**: Verify GitHub MCP server works (allowed domains should not affect GitHub toolset functionality) +5. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-blocked-domains-codex-${{ github.run_id }}.txt` with content "Blocked domains test for Codex run ${{ github.run_id }}" + +## Output + +Add a **very brief** comment (max 5-10 lines) to the current pull request with: +- ✅ or ❌ for each test result +- List which domains were blocked successfully +- List which domains were allowed successfully +- Overall status: PASS or FAIL + +If all tests pass (GitHub allowed, example.com domains blocked), add the label `smoke-blocked-domains-codex` to the pull request. diff --git a/pkg/cli/workflows/test-copilot-blocked-domains.md b/pkg/cli/workflows/test-copilot-blocked-domains.md new file mode 100644 index 0000000000..6f1d286f44 --- /dev/null +++ b/pkg/cli/workflows/test-copilot-blocked-domains.md @@ -0,0 +1,63 @@ +--- +description: Smoke test for blocked domains with Copilot engine +on: + workflow_dispatch: + pull_request: + types: [labeled] + names: ["smoke"] +permissions: + contents: read + issues: read + pull-requests: read +name: Smoke Blocked Domains Copilot +engine: copilot +network: + firewall: true + allowed: + - defaults + - github + blocked: + - pypi.org + - files.pythonhosted.org +safe-outputs: + add-comment: + hide-older-comments: true + create-issue: + expires: 2h + add-labels: + allowed: [smoke-blocked-domains-copilot] + messages: + footer: "> 🚫 *Blocked domains tested by [{workflow_name}]({run_url})*" + run-started: "🚫 Testing blocked domains... [{workflow_name}]({run_url}) is validating domain blocking for {event_type}..." + run-success: "✅ Blocked domains test complete... [{workflow_name}]({run_url}) confirmed domain blocking is operational. 🛡️" + run-failure: "❌ Blocked domains test failed... [{workflow_name}]({run_url}) {status}. Domain blocking may not be working correctly." +timeout-minutes: 5 +tools: + github: + bash: + - "*" +--- + +# Smoke Test: Blocked Domains with Copilot + +**IMPORTANT: Keep all outputs extremely short and concise. Use single-line responses where possible.** + +## Test Requirements + +This workflow validates that the blocked domains feature works correctly with the Copilot engine and AWF firewall. + +1. **Allowed Domain Testing**: Test that GitHub domains (allowed) are accessible using `curl https://api.github.com` - this should succeed +2. **Blocked Domain Testing**: Attempt to access PyPI domains (blocked) using `curl https://pypi.org` - this should FAIL or be blocked by the firewall +3. **Blocked Ecosystem Testing**: Verify that `files.pythonhosted.org` is also blocked (part of Python ecosystem) +4. **GitHub MCP Testing**: Verify GitHub MCP server works (allowed domains should not affect GitHub toolset functionality) +5. **File Writing Testing**: Create a test file `/tmp/gh-aw/agent/smoke-test-blocked-domains-${{ github.run_id }}.txt` with content "Blocked domains test for run ${{ github.run_id }}" + +## Output + +Add a **very brief** comment (max 5-10 lines) to the current pull request with: +- ✅ or ❌ for each test result +- List which domains were blocked successfully +- List which domains were allowed successfully +- Overall status: PASS or FAIL + +If all tests pass (GitHub allowed, PyPI blocked), add the label `smoke-blocked-domains-copilot` to the pull request. diff --git a/pkg/parser/schemas/main_workflow_schema.json b/pkg/parser/schemas/main_workflow_schema.json index f5267da56e..7efbf15361 100644 --- a/pkg/parser/schemas/main_workflow_schema.json +++ b/pkg/parser/schemas/main_workflow_schema.json @@ -2038,6 +2038,15 @@ }, "$comment": "Empty array is valid and means deny all network access. Omit the field entirely or use network: defaults to use default network permissions." }, + "blocked": { + "type": "array", + "description": "List of blocked domains or ecosystem identifiers (e.g., 'python', 'node', 'tracker.example.com'). Blocked domains take precedence over allowed domains.", + "items": { + "type": "string", + "description": "Domain name or ecosystem identifier to block (supports wildcards like '*.example.com' and ecosystem names like 'python', 'node')" + }, + "$comment": "Blocked domains are subtracted from the allowed list. Useful for blocking specific domains or ecosystems within broader allowed categories." + }, "firewall": { "description": "AWF (Agent Workflow Firewall) configuration for network egress control. Only supported for Copilot engine.", "deprecated": true, diff --git a/pkg/workflow/blocked_domains_integration_test.go b/pkg/workflow/blocked_domains_integration_test.go new file mode 100644 index 0000000000..d558855728 --- /dev/null +++ b/pkg/workflow/blocked_domains_integration_test.go @@ -0,0 +1,346 @@ +package workflow + +import ( + "os" + "path/filepath" + "strings" + "testing" + + "github.com/githubnext/gh-aw/pkg/testutil" +) + +// TestBlockedDomainsIntegration tests that blocked domains are properly compiled into workflows +func TestBlockedDomainsIntegration(t *testing.T) { + t.Run("workflow with blocked domains compiles correctly", func(t *testing.T) { + // Create temporary directory for test + tmpDir := testutil.TempDir(t, "test-*") + workflowsDir := filepath.Join(tmpDir, ".github", "workflows") + err := os.MkdirAll(workflowsDir, 0755) + if err != nil { + t.Fatalf("Failed to create workflows directory: %v", err) + } + + // Create test workflow with blocked domains + workflowContent := `--- +on: workflow_dispatch +permissions: + contents: read +engine: copilot +network: + firewall: true + allowed: + - defaults + - github + blocked: + - tracker.example.com + - analytics.example.com +--- + +# Test Workflow + +Test workflow with blocked domains. +` + + workflowPath := filepath.Join(workflowsDir, "test-blocked-domains.md") + err = os.WriteFile(workflowPath, []byte(workflowContent), 0644) + if err != nil { + t.Fatalf("Failed to write workflow file: %v", err) + } + + // Compile the workflow + compiler := NewCompiler(false, "", "test-blocked-domains") + compiler.SetSkipValidation(true) + + if err := compiler.CompileWorkflow(workflowPath); err != nil { + t.Fatalf("Failed to compile workflow: %v", err) + } + + // Read the compiled workflow + lockPath := filepath.Join(workflowsDir, "test-blocked-domains.lock.yml") + lockContent, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("Failed to read compiled workflow: %v", err) + } + + lockYAML := string(lockContent) + + // Verify --block-domains flag is present + if !strings.Contains(lockYAML, "--block-domains") { + t.Error("Compiled workflow should contain '--block-domains' flag") + } + + // Verify blocked domains are in the command + if !strings.Contains(lockYAML, "analytics.example.com") { + t.Error("Compiled workflow should contain blocked domain 'analytics.example.com'") + } + + if !strings.Contains(lockYAML, "tracker.example.com") { + t.Error("Compiled workflow should contain blocked domain 'tracker.example.com'") + } + + // Verify standard AWF flags are still present + if !strings.Contains(lockYAML, "--allow-domains") { + t.Error("Compiled workflow should still contain '--allow-domains' flag") + } + + if !strings.Contains(lockYAML, "--log-level") { + t.Error("Compiled workflow should still contain '--log-level' flag") + } + }) + + t.Run("workflow with blocked ecosystem identifiers compiles correctly", func(t *testing.T) { + // Create temporary directory for test + tmpDir := testutil.TempDir(t, "test-*") + workflowsDir := filepath.Join(tmpDir, ".github", "workflows") + err := os.MkdirAll(workflowsDir, 0755) + if err != nil { + t.Fatalf("Failed to create workflows directory: %v", err) + } + + // Create test workflow with blocked ecosystem + workflowContent := `--- +on: workflow_dispatch +permissions: + contents: read +engine: copilot +network: + firewall: true + allowed: + - defaults + - github + blocked: + - python +--- + +# Test Workflow + +Test workflow with blocked ecosystem. +` + + workflowPath := filepath.Join(workflowsDir, "test-blocked-ecosystem.md") + err = os.WriteFile(workflowPath, []byte(workflowContent), 0644) + if err != nil { + t.Fatalf("Failed to write workflow file: %v", err) + } + + // Compile the workflow + compiler := NewCompiler(false, "", "test-blocked-ecosystem") + compiler.SetSkipValidation(true) + + if err := compiler.CompileWorkflow(workflowPath); err != nil { + t.Fatalf("Failed to compile workflow: %v", err) + } + + // Read the compiled workflow + lockPath := filepath.Join(workflowsDir, "test-blocked-ecosystem.lock.yml") + lockContent, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("Failed to read compiled workflow: %v", err) + } + + lockYAML := string(lockContent) + + // Verify --block-domains flag is present + if !strings.Contains(lockYAML, "--block-domains") { + t.Error("Compiled workflow should contain '--block-domains' flag") + } + + // Verify at least one Python ecosystem domain is blocked + pythonDomains := []string{"pypi.org", "files.pythonhosted.org"} + foundPythonDomain := false + for _, domain := range pythonDomains { + if strings.Contains(lockYAML, domain) { + foundPythonDomain = true + break + } + } + if !foundPythonDomain { + t.Error("Compiled workflow should contain at least one Python ecosystem domain in blocked list") + } + }) + + t.Run("workflow without blocked domains does not have block-domains flag", func(t *testing.T) { + // Create temporary directory for test + tmpDir := testutil.TempDir(t, "test-*") + workflowsDir := filepath.Join(tmpDir, ".github", "workflows") + err := os.MkdirAll(workflowsDir, 0755) + if err != nil { + t.Fatalf("Failed to create workflows directory: %v", err) + } + + // Create test workflow without blocked domains + workflowContent := `--- +on: workflow_dispatch +permissions: + contents: read +engine: copilot +network: + firewall: true + allowed: + - defaults + - github +--- + +# Test Workflow + +Test workflow without blocked domains. +` + + workflowPath := filepath.Join(workflowsDir, "test-no-blocked.md") + err = os.WriteFile(workflowPath, []byte(workflowContent), 0644) + if err != nil { + t.Fatalf("Failed to write workflow file: %v", err) + } + + // Compile the workflow + compiler := NewCompiler(false, "", "test-no-blocked") + compiler.SetSkipValidation(true) + + if err := compiler.CompileWorkflow(workflowPath); err != nil { + t.Fatalf("Failed to compile workflow: %v", err) + } + + // Read the compiled workflow + lockPath := filepath.Join(workflowsDir, "test-no-blocked.lock.yml") + lockContent, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("Failed to read compiled workflow: %v", err) + } + + lockYAML := string(lockContent) + + // Verify --block-domains flag is NOT present + if strings.Contains(lockYAML, "--block-domains") { + t.Error("Compiled workflow should NOT contain '--block-domains' flag when no domains are blocked") + } + + // Verify --allow-domains is still present + if !strings.Contains(lockYAML, "--allow-domains") { + t.Error("Compiled workflow should still contain '--allow-domains' flag") + } + }) + + t.Run("claude workflow with blocked domains compiles correctly", func(t *testing.T) { + // Create temporary directory for test + tmpDir := testutil.TempDir(t, "test-*") + workflowsDir := filepath.Join(tmpDir, ".github", "workflows") + err := os.MkdirAll(workflowsDir, 0755) + if err != nil { + t.Fatalf("Failed to create workflows directory: %v", err) + } + + // Create test workflow with blocked domains for Claude + workflowContent := `--- +on: workflow_dispatch +permissions: + contents: read +engine: claude +network: + firewall: true + allowed: + - defaults + blocked: + - tracker.example.com +--- + +# Test Workflow + +Test Claude workflow with blocked domains. +` + + workflowPath := filepath.Join(workflowsDir, "test-claude-blocked.md") + err = os.WriteFile(workflowPath, []byte(workflowContent), 0644) + if err != nil { + t.Fatalf("Failed to write workflow file: %v", err) + } + + // Compile the workflow + compiler := NewCompiler(false, "", "test-claude-blocked") + compiler.SetSkipValidation(true) + + if err := compiler.CompileWorkflow(workflowPath); err != nil { + t.Fatalf("Failed to compile workflow: %v", err) + } + + // Read the compiled workflow + lockPath := filepath.Join(workflowsDir, "test-claude-blocked.lock.yml") + lockContent, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("Failed to read compiled workflow: %v", err) + } + + lockYAML := string(lockContent) + + // Verify --block-domains flag is present + if !strings.Contains(lockYAML, "--block-domains") { + t.Error("Compiled Claude workflow should contain '--block-domains' flag") + } + + // Verify blocked domain is in the command + if !strings.Contains(lockYAML, "tracker.example.com") { + t.Error("Compiled Claude workflow should contain blocked domain 'tracker.example.com'") + } + }) + + t.Run("codex workflow with blocked domains compiles correctly", func(t *testing.T) { + // Create temporary directory for test + tmpDir := testutil.TempDir(t, "test-*") + workflowsDir := filepath.Join(tmpDir, ".github", "workflows") + err := os.MkdirAll(workflowsDir, 0755) + if err != nil { + t.Fatalf("Failed to create workflows directory: %v", err) + } + + // Create test workflow with blocked domains for Codex + workflowContent := `--- +on: workflow_dispatch +permissions: + contents: read +engine: codex +network: + firewall: true + allowed: + - defaults + blocked: + - tracker.example.com +--- + +# Test Workflow + +Test Codex workflow with blocked domains. +` + + workflowPath := filepath.Join(workflowsDir, "test-codex-blocked.md") + err = os.WriteFile(workflowPath, []byte(workflowContent), 0644) + if err != nil { + t.Fatalf("Failed to write workflow file: %v", err) + } + + // Compile the workflow + compiler := NewCompiler(false, "", "test-codex-blocked") + compiler.SetSkipValidation(true) + + if err := compiler.CompileWorkflow(workflowPath); err != nil { + t.Fatalf("Failed to compile workflow: %v", err) + } + + // Read the compiled workflow + lockPath := filepath.Join(workflowsDir, "test-codex-blocked.lock.yml") + lockContent, err := os.ReadFile(lockPath) + if err != nil { + t.Fatalf("Failed to read compiled workflow: %v", err) + } + + lockYAML := string(lockContent) + + // Verify --block-domains flag is present + if !strings.Contains(lockYAML, "--block-domains") { + t.Error("Compiled Codex workflow should contain '--block-domains' flag") + } + + // Verify blocked domain is in the command + if !strings.Contains(lockYAML, "tracker.example.com") { + t.Error("Compiled Codex workflow should contain blocked domain 'tracker.example.com'") + } + }) +} diff --git a/pkg/workflow/claude_engine.go b/pkg/workflow/claude_engine.go index 9b6869dd61..f1261f295c 100644 --- a/pkg/workflow/claude_engine.go +++ b/pkg/workflow/claude_engine.go @@ -279,6 +279,14 @@ func (e *ClaudeEngine) GetExecutionSteps(workflowData *WorkflowData, logFile str } awfArgs = append(awfArgs, "--allow-domains", allowedDomains) + + // Add blocked domains if specified + blockedDomains := formatBlockedDomains(workflowData.NetworkPermissions) + if blockedDomains != "" { + awfArgs = append(awfArgs, "--block-domains", blockedDomains) + claudeLog.Printf("Added blocked domains: %s", blockedDomains) + } + awfArgs = append(awfArgs, "--log-level", awfLogLevel) awfArgs = append(awfArgs, "--proxy-logs-dir", "/tmp/gh-aw/sandbox/firewall/logs") diff --git a/pkg/workflow/codex_engine.go b/pkg/workflow/codex_engine.go index f635872c5a..95b73dd0cb 100644 --- a/pkg/workflow/codex_engine.go +++ b/pkg/workflow/codex_engine.go @@ -191,6 +191,14 @@ func (e *CodexEngine) GetExecutionSteps(workflowData *WorkflowData, logFile stri } awfArgs = append(awfArgs, "--allow-domains", allowedDomains) + + // Add blocked domains if specified + blockedDomains := formatBlockedDomains(workflowData.NetworkPermissions) + if blockedDomains != "" { + awfArgs = append(awfArgs, "--block-domains", blockedDomains) + codexEngineLog.Printf("Added blocked domains: %s", blockedDomains) + } + awfArgs = append(awfArgs, "--log-level", awfLogLevel) awfArgs = append(awfArgs, "--proxy-logs-dir", "/tmp/gh-aw/sandbox/firewall/logs") diff --git a/pkg/workflow/copilot_engine_execution.go b/pkg/workflow/copilot_engine_execution.go index 3017129df6..48cb361a9c 100644 --- a/pkg/workflow/copilot_engine_execution.go +++ b/pkg/workflow/copilot_engine_execution.go @@ -275,6 +275,14 @@ func (e *CopilotEngine) GetExecutionSteps(workflowData *WorkflowData, logFile st } awfArgs = append(awfArgs, "--allow-domains", allowedDomains) + + // Add blocked domains if specified + blockedDomains := formatBlockedDomains(workflowData.NetworkPermissions) + if blockedDomains != "" { + awfArgs = append(awfArgs, "--block-domains", blockedDomains) + copilotExecLog.Printf("Added blocked domains: %s", blockedDomains) + } + awfArgs = append(awfArgs, "--log-level", awfLogLevel) awfArgs = append(awfArgs, "--proxy-logs-dir", "/tmp/gh-aw/sandbox/firewall/logs") diff --git a/pkg/workflow/domains.go b/pkg/workflow/domains.go index a65b29aac5..f2e4090280 100644 --- a/pkg/workflow/domains.go +++ b/pkg/workflow/domains.go @@ -285,6 +285,67 @@ func GetClaudeAllowedDomainsWithSafeInputs(network *NetworkPermissions, hasSafeI return mergeDomainsWithNetwork(ClaudeDefaultDomains, network) } +// GetBlockedDomains returns the blocked domains from network permissions +// Returns empty slice if no network permissions configured or no domains blocked +// The returned list is sorted and deduplicated +// Supports ecosystem identifiers (same as allowed domains) +func GetBlockedDomains(network *NetworkPermissions) []string { + if network == nil { + domainsLog.Print("No network permissions specified, no blocked domains") + return []string{} + } + + // Handle empty blocked list + if len(network.Blocked) == 0 { + domainsLog.Print("Empty blocked list, no domains blocked") + return []string{} + } + + domainsLog.Printf("Processing %d blocked domains/ecosystems", len(network.Blocked)) + + // Process the blocked list, expanding ecosystem identifiers if present + // Use a map to deduplicate domains + domainMap := make(map[string]bool) + for _, domain := range network.Blocked { + // Try to get domains for this ecosystem category + ecosystemDomains := getEcosystemDomains(domain) + if len(ecosystemDomains) > 0 { + // This was an ecosystem identifier, expand it + domainsLog.Printf("Expanded ecosystem '%s' to %d domains", domain, len(ecosystemDomains)) + for _, d := range ecosystemDomains { + domainMap[d] = true + } + } else { + // Add the domain as-is (regular domain name) + domainMap[domain] = true + } + } + + // Convert map to sorted slice + expandedDomains := make([]string, 0, len(domainMap)) + for domain := range domainMap { + expandedDomains = append(expandedDomains, domain) + } + SortStrings(expandedDomains) + + return expandedDomains +} + +// formatBlockedDomains formats blocked domains as a comma-separated string suitable for AWF's --block-domains flag +// Returns empty string if no blocked domains +func formatBlockedDomains(network *NetworkPermissions) string { + if network == nil { + return "" + } + + blockedDomains := GetBlockedDomains(network) + if len(blockedDomains) == 0 { + return "" + } + + return strings.Join(blockedDomains, ",") +} + // computeAllowedDomainsForSanitization computes the allowed domains for sanitization // based on the engine and network configuration, matching what's provided to the firewall func (c *Compiler) computeAllowedDomainsForSanitization(data *WorkflowData) string { diff --git a/pkg/workflow/domains_blocked_test.go b/pkg/workflow/domains_blocked_test.go new file mode 100644 index 0000000000..da21680a87 --- /dev/null +++ b/pkg/workflow/domains_blocked_test.go @@ -0,0 +1,169 @@ +package workflow + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestGetBlockedDomains tests the GetBlockedDomains function +func TestGetBlockedDomains(t *testing.T) { + tests := []struct { + name string + network *NetworkPermissions + expected []string + }{ + { + name: "nil network permissions", + network: nil, + expected: []string{}, + }, + { + name: "empty blocked list", + network: &NetworkPermissions{ + Blocked: []string{}, + }, + expected: []string{}, + }, + { + name: "single domain", + network: &NetworkPermissions{ + Blocked: []string{"tracker.example.com"}, + }, + expected: []string{"tracker.example.com"}, + }, + { + name: "multiple domains", + network: &NetworkPermissions{ + Blocked: []string{"tracker.example.com", "analytics.example.com"}, + }, + expected: []string{"analytics.example.com", "tracker.example.com"}, // Sorted + }, + { + name: "ecosystem identifier", + network: &NetworkPermissions{ + Blocked: []string{"python"}, + }, + expected: func() []string { + // Get python ecosystem domains and sort them + domains := getEcosystemDomains("python") + SortStrings(domains) + return domains + }(), + }, + { + name: "mixed domains and ecosystems", + network: &NetworkPermissions{ + Blocked: []string{"python", "tracker.example.com"}, + }, + expected: func() []string { + // Get python ecosystem domains and add custom domain + domainMap := make(map[string]bool) + for _, d := range getEcosystemDomains("python") { + domainMap[d] = true + } + domainMap["tracker.example.com"] = true + + domains := make([]string, 0, len(domainMap)) + for d := range domainMap { + domains = append(domains, d) + } + SortStrings(domains) + return domains + }(), + }, + { + name: "duplicate domains are deduplicated", + network: &NetworkPermissions{ + Blocked: []string{"tracker.example.com", "tracker.example.com"}, + }, + expected: []string{"tracker.example.com"}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := GetBlockedDomains(tt.network) + assert.Equal(t, tt.expected, result, "GetBlockedDomains should return expected domains") + }) + } +} + +// TestFormatBlockedDomains tests the formatBlockedDomains function +func TestFormatBlockedDomains(t *testing.T) { + tests := []struct { + name string + network *NetworkPermissions + expected string + }{ + { + name: "nil network permissions", + network: nil, + expected: "", + }, + { + name: "empty blocked list", + network: &NetworkPermissions{ + Blocked: []string{}, + }, + expected: "", + }, + { + name: "single domain", + network: &NetworkPermissions{ + Blocked: []string{"tracker.example.com"}, + }, + expected: "tracker.example.com", + }, + { + name: "multiple domains", + network: &NetworkPermissions{ + Blocked: []string{"tracker.example.com", "analytics.example.com"}, + }, + expected: "analytics.example.com,tracker.example.com", // Sorted and comma-separated + }, + { + name: "ecosystem identifier", + network: &NetworkPermissions{ + Blocked: []string{"python"}, + }, + expected: func() string { + // Get python ecosystem domains, sort, and join + domains := getEcosystemDomains("python") + SortStrings(domains) + return strings.Join(domains, ",") + }(), + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := formatBlockedDomains(tt.network) + assert.Equal(t, tt.expected, result, "formatBlockedDomains should return expected string") + }) + } +} + +// TestBlockedDomainsWithEngines tests that blocked domains are properly formatted for each engine +func TestBlockedDomainsWithEngines(t *testing.T) { + network := &NetworkPermissions{ + Allowed: []string{"defaults", "github"}, + Blocked: []string{"tracker.example.com", "analytics.example.com"}, + } + + t.Run("blocked domains formatted correctly", func(t *testing.T) { + blockedStr := formatBlockedDomains(network) + assert.NotEmpty(t, blockedStr, "blocked domains string should not be empty") + assert.Contains(t, blockedStr, "tracker.example.com", "should contain tracker.example.com") + assert.Contains(t, blockedStr, "analytics.example.com", "should contain analytics.example.com") + + // Verify comma-separated format + blockedDomains := strings.Split(blockedStr, ",") + assert.Len(t, blockedDomains, 2, "should have 2 blocked domains") + + // Verify sorted order + assert.Equal(t, "analytics.example.com", blockedDomains[0], "first domain should be analytics.example.com (sorted)") + assert.Equal(t, "tracker.example.com", blockedDomains[1], "second domain should be tracker.example.com") + }) +} diff --git a/pkg/workflow/engine.go b/pkg/workflow/engine.go index a6447914e3..f0d611bc7e 100644 --- a/pkg/workflow/engine.go +++ b/pkg/workflow/engine.go @@ -30,6 +30,7 @@ type EngineConfig struct { type NetworkPermissions struct { Mode string `yaml:"mode,omitempty"` // "defaults" for default access Allowed []string `yaml:"allowed,omitempty"` // List of allowed domains + Blocked []string `yaml:"blocked,omitempty"` // List of blocked domains Firewall *FirewallConfig `yaml:"firewall,omitempty"` // AWF firewall configuration (see firewall.go) ExplicitlyDefined bool `yaml:"-"` // Internal flag: true if network field was explicitly set in frontmatter } diff --git a/pkg/workflow/firewall_blocked_domains_test.go b/pkg/workflow/firewall_blocked_domains_test.go new file mode 100644 index 0000000000..f755755d00 --- /dev/null +++ b/pkg/workflow/firewall_blocked_domains_test.go @@ -0,0 +1,173 @@ +package workflow + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/assert" +) + +// TestFirewallBlockedDomainsInCopilotEngine tests that blocked domains are included in AWF command +func TestFirewallBlockedDomainsInCopilotEngine(t *testing.T) { + t.Run("blocked domains are added to AWF command", func(t *testing.T) { + workflowData := &WorkflowData{ + Name: "test-workflow", + EngineConfig: &EngineConfig{ + ID: "copilot", + }, + NetworkPermissions: &NetworkPermissions{ + Allowed: []string{"defaults", "github"}, + Blocked: []string{"tracker.example.com", "analytics.example.com"}, + Firewall: &FirewallConfig{ + Enabled: true, + }, + }, + } + + engine := NewCopilotEngine() + steps := engine.GetExecutionSteps(workflowData, "test.log") + + assert.NotEmpty(t, steps, "Expected at least one execution step") + + stepContent := strings.Join(steps[0], "\n") + + // Verify --allow-domains is present + assert.Contains(t, stepContent, "--allow-domains", "Expected command to contain '--allow-domains'") + + // Verify --block-domains is present + assert.Contains(t, stepContent, "--block-domains", "Expected command to contain '--block-domains'") + + // Verify blocked domains are in the command + assert.Contains(t, stepContent, "analytics.example.com", "Expected command to contain blocked domain") + assert.Contains(t, stepContent, "tracker.example.com", "Expected command to contain blocked domain") + }) + + t.Run("no blocked domains means no --block-domains flag", func(t *testing.T) { + workflowData := &WorkflowData{ + Name: "test-workflow", + EngineConfig: &EngineConfig{ + ID: "copilot", + }, + NetworkPermissions: &NetworkPermissions{ + Allowed: []string{"defaults", "github"}, + Firewall: &FirewallConfig{ + Enabled: true, + }, + }, + } + + engine := NewCopilotEngine() + steps := engine.GetExecutionSteps(workflowData, "test.log") + + assert.NotEmpty(t, steps, "Expected at least one execution step") + + stepContent := strings.Join(steps[0], "\n") + + // Verify --allow-domains is present + assert.Contains(t, stepContent, "--allow-domains", "Expected command to contain '--allow-domains'") + + // Verify --block-domains is NOT present when there are no blocked domains + assert.NotContains(t, stepContent, "--block-domains", "Expected command to NOT contain '--block-domains' when no domains are blocked") + }) + + t.Run("ecosystem identifiers are expanded in blocked domains", func(t *testing.T) { + workflowData := &WorkflowData{ + Name: "test-workflow", + EngineConfig: &EngineConfig{ + ID: "copilot", + }, + NetworkPermissions: &NetworkPermissions{ + Allowed: []string{"defaults", "github"}, + Blocked: []string{"python"}, + Firewall: &FirewallConfig{ + Enabled: true, + }, + }, + } + + engine := NewCopilotEngine() + steps := engine.GetExecutionSteps(workflowData, "test.log") + + assert.NotEmpty(t, steps, "Expected at least one execution step") + + stepContent := strings.Join(steps[0], "\n") + + // Verify --block-domains is present + assert.Contains(t, stepContent, "--block-domains", "Expected command to contain '--block-domains'") + + // Verify that python ecosystem domains are expanded and included + // Get python domains to verify at least one is present + pythonDomains := getEcosystemDomains("python") + assert.NotEmpty(t, pythonDomains, "Python ecosystem should have domains") + + // Check that at least one python domain is in the blocked domains list + foundPythonDomain := false + for _, domain := range pythonDomains { + if strings.Contains(stepContent, domain) { + foundPythonDomain = true + break + } + } + assert.True(t, foundPythonDomain, "Expected at least one Python ecosystem domain in blocked domains") + }) +} + +// TestFirewallBlockedDomainsInClaudeEngine tests that blocked domains work with Claude engine +func TestFirewallBlockedDomainsInClaudeEngine(t *testing.T) { + t.Run("blocked domains are added to Claude AWF command", func(t *testing.T) { + workflowData := &WorkflowData{ + Name: "test-workflow", + EngineConfig: &EngineConfig{ + ID: "claude", + }, + NetworkPermissions: &NetworkPermissions{ + Allowed: []string{"defaults"}, + Blocked: []string{"tracker.example.com"}, + Firewall: &FirewallConfig{ + Enabled: true, + }, + }, + } + + engine := NewClaudeEngine() + steps := engine.GetExecutionSteps(workflowData, "test.log") + + assert.NotEmpty(t, steps, "Expected at least one execution step") + + stepContent := strings.Join(steps[0], "\n") + + // Verify --block-domains is present + assert.Contains(t, stepContent, "--block-domains", "Expected command to contain '--block-domains'") + assert.Contains(t, stepContent, "tracker.example.com", "Expected command to contain blocked domain") + }) +} + +// TestFirewallBlockedDomainsInCodexEngine tests that blocked domains work with Codex engine +func TestFirewallBlockedDomainsInCodexEngine(t *testing.T) { + t.Run("blocked domains are added to Codex AWF command", func(t *testing.T) { + workflowData := &WorkflowData{ + Name: "test-workflow", + EngineConfig: &EngineConfig{ + ID: "codex", + }, + NetworkPermissions: &NetworkPermissions{ + Allowed: []string{"defaults"}, + Blocked: []string{"tracker.example.com"}, + Firewall: &FirewallConfig{ + Enabled: true, + }, + }, + } + + engine := NewCodexEngine() + steps := engine.GetExecutionSteps(workflowData, "test.log") + + assert.NotEmpty(t, steps, "Expected at least one execution step") + + stepContent := strings.Join(steps[0], "\n") + + // Verify --block-domains is present + assert.Contains(t, stepContent, "--block-domains", "Expected command to contain '--block-domains'") + assert.Contains(t, stepContent, "tracker.example.com", "Expected command to contain blocked domain") + }) +} diff --git a/pkg/workflow/frontmatter_extraction_security.go b/pkg/workflow/frontmatter_extraction_security.go index 6ed891ef7f..76587fa4e0 100644 --- a/pkg/workflow/frontmatter_extraction_security.go +++ b/pkg/workflow/frontmatter_extraction_security.go @@ -42,6 +42,18 @@ func (c *Compiler) extractNetworkPermissions(frontmatter map[string]any) *Networ } } + // Extract blocked domains if present + if blocked, hasBlocked := networkObj["blocked"]; hasBlocked { + if blockedSlice, ok := blocked.([]any); ok { + for _, domain := range blockedSlice { + if domainStr, ok := domain.(string); ok { + permissions.Blocked = append(permissions.Blocked, domainStr) + } + } + frontmatterExtractionSecurityLog.Printf("Extracted %d blocked domains", len(permissions.Blocked)) + } + } + // Extract firewall configuration if present if firewall, hasFirewall := networkObj["firewall"]; hasFirewall { frontmatterExtractionSecurityLog.Print("Extracting firewall configuration")