diff --git a/.cspell.json b/.cspell.json index d1f57b149..fdc17a15f 100644 --- a/.cspell.json +++ b/.cspell.json @@ -11,6 +11,11 @@ } ], "words": [ + "Microbenchmarks", + "isin", + "tolist", + "dataframe", + "textposition", "Andriushchenko", "Chao", "GitHub", diff --git a/.github/workflows/agent-governance-gate.yml b/.github/workflows/agent-governance-gate.yml deleted file mode 100644 index e4e641cdd..000000000 --- a/.github/workflows/agent-governance-gate.yml +++ /dev/null @@ -1,122 +0,0 @@ -name: Agent Governance Gate - -# Reusable workflow — call from any deployment workflow: -# -# jobs: -# governance: -# uses: microsoft/agent-governance-toolkit/.github/workflows/agent-governance-gate.yml@main -# with: -# policy_file: .agents/security.yaml -# agent_manifest: agents.yaml -# require_receipt: true -# secrets: -# signing_key: ${{ secrets.GOVERNANCE_SIGNING_KEY }} - -on: - workflow_call: - inputs: - policy_file: - description: "Path to the agent governance policy YAML file" - required: false - default: ".agents/security.yaml" - type: string - agent_manifest: - description: "Path to the agent manifest YAML file" - required: false - default: "agents.yaml" - type: string - python_version: - description: "Python version to use" - required: false - default: "3.11" - type: string - require_receipt: - description: "Fail if a signed receipt cannot be produced" - required: false - default: false - type: boolean - audit_log: - description: "Path to write the JSONL audit log entry" - required: false - default: ".governance/audit.jsonl" - type: string - secrets: - signing_key: - description: "Ed25519 private key PEM for signing deployment receipts (optional)" - required: false - outputs: - gate_result: - description: "Governance gate result: 'passed' or 'failed'" - value: ${{ jobs.governance-gate.outputs.gate_result }} - receipt_id: - description: "Generated deployment receipt ID" - value: ${{ jobs.governance-gate.outputs.receipt_id }} - -permissions: - contents: read - -jobs: - governance-gate: - name: Governance Gate - runs-on: ubuntu-latest - - outputs: - gate_result: ${{ steps.gate.outputs.gate_result }} - receipt_id: ${{ steps.gate.outputs.receipt_id }} - - steps: - - name: Checkout - uses: actions/checkout@df4cb1c069e1874edd31b4311f1884172cec0e10 # v6.0.3 - - - name: Set up Python - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: ${{ inputs.python_version }} - - - name: Install dependencies - run: pip install --no-cache-dir pyyaml==6.0.2 cryptography==44.0.3 # Scorecard: version-pinned - - - name: Run governance gate - id: gate - env: - GOVERNANCE_POLICY: ${{ inputs.policy_file }} - GOVERNANCE_MANIFEST: ${{ inputs.agent_manifest }} - GITHUB_SHA: ${{ github.sha }} - GITHUB_ACTOR: ${{ github.actor }} - GOVERNANCE_SIGNING_KEY: ${{ secrets.signing_key }} - GOVERNANCE_REQUIRE_RECEIPT: ${{ inputs.require_receipt }} - AUDIT_LOG: ${{ inputs.audit_log }} - run: | - set +e - python scripts/governance_gate.py \ - --policy "$GOVERNANCE_POLICY" \ - --manifest "$GOVERNANCE_MANIFEST" \ - --commit "$GITHUB_SHA" \ - --deployer "$GITHUB_ACTOR" \ - --audit-log "$AUDIT_LOG" \ - ${{ inputs.require_receipt && '--require-receipt' || '' }} - EXIT_CODE=$? - set -e - - if [ "$EXIT_CODE" -eq 0 ]; then - echo "gate_result=passed" >> "$GITHUB_OUTPUT" - else - echo "gate_result=failed" >> "$GITHUB_OUTPUT" - fi - - # Extract receipt ID from audit log for downstream jobs - if [ -f "$AUDIT_LOG" ]; then - RECEIPT_ID=$(tail -1 "$AUDIT_LOG" | python3 -c "import sys,json; print(json.load(sys.stdin).get('receipt_id',''))" 2>/dev/null || echo "") - echo "receipt_id=$RECEIPT_ID" >> "$GITHUB_OUTPUT" - fi - - exit "$EXIT_CODE" - - - name: Upload audit log - if: always() - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 - with: - name: governance-audit-log - path: ${{ inputs.audit_log }} - if-no-files-found: ignore - retention-days: 90 diff --git a/BREAKING_CHANGES.md b/BREAKING_CHANGES.md index 2095d6958..16151691b 100644 --- a/BREAKING_CHANGES.md +++ b/BREAKING_CHANGES.md @@ -55,15 +55,56 @@ a default action. --- -## Composite actions: `toolkit-version` is now **required** +## `agent-hypervisor` removes joint-liability, blockchain-commitment, and advanced-saga symbols + +**Date:** TBD (next release of `microsoft/agent-governance-toolkit`) + +**Affected:** + +- `agent-hypervisor` (`hypervisor`) +- `agentmesh-runtime` (`agent_runtime`, which re-exported these symbols) + +**What changed:** + +The following public symbols are removed from `hypervisor` and from the +`agent_runtime` re-exports. Each backed a documented no-op stub (the ledger +always admitted, slashing and quarantine recorded events but enforced nothing, +the commitment engine stored in memory with no anchoring, and the saga DSL, +fan-out, and checkpoint modules had no runtime), so the removal is behavior +preserving: + +- Joint liability: `VouchingEngine`, `VouchRecord`, `SlashingEngine`, + `LiabilityLedger`, `LedgerEntryType`, `LiabilityMatrix`, `QuarantineManager`, + `QuarantineReason`, `CausalAttributor`, `AttributionResult` +- Session intent locks: `IntentLockManager`, `LockIntent`, + `LockContentionError`, `DeadlockError` +- Advanced saga: `FanOutOrchestrator`, `FanOutPolicy`, `SagaDSLParser`, + `SagaDefinition`, `CheckpointManager`, `SemanticCheckpoint` +- Audit and clock internals: `CommitmentEngine`, `EphemeralGC`, + `VectorClockManager` + +**Why:** + +These surfaces were Public Preview stubs that advertised capabilities the +runtime never applied, so keeping them exported implied enforcement that did +not exist. Public Preview status permits removal without a deprecation cycle. + +**How to migrate:** + +Remove imports of these symbols. The supported runtime surface is unchanged: +execution rings, session isolation, the hash-chained delta audit trail, +`SagaOrchestrator` (ordered steps, retries, timeout handling, and reverse-order +compensation), the kill switch, rate limiting, and observability. + +--- + +## Composite action: `toolkit-version` is now **required** **Date:** TBD (next release of `microsoft/agent-governance-toolkit`) **Affected:** - `microsoft/agent-governance-toolkit/action` -- `microsoft/agent-governance-toolkit/action/security-scan` -- `microsoft/agent-governance-toolkit/action/governance-attestation` **What changed:** diff --git a/action/governance-attestation/README.md b/action/governance-attestation/README.md deleted file mode 100644 index 3863689cd..000000000 --- a/action/governance-attestation/README.md +++ /dev/null @@ -1,238 +0,0 @@ -# Governance Attestation GitHub Action - -Automated validation of PR governance attestation checklists using the Agent Governance Toolkit. - -Ensures PRs contain properly filled governance attestations with exactly one checkbox marked per required section. - -## Quick Start - -> **Breaking change (vNEXT):** `toolkit-version` is now **required**. Pin to an exact published release (e.g. `3.7.0`); wildcards, floating refs, post-releases (`.post1`), dev-releases (`.dev0`), and local-version identifiers (`+local`) are rejected. See [Accepted version syntax](#accepted-version-syntax) below. Consumers should pin this action to the major-tag they were already using (e.g. `@v3`) and bump `toolkit-version` as new releases ship. - -```yaml -- uses: microsoft/agent-governance-toolkit/action/governance-attestation@v2 - with: - toolkit-version: "3.7.0" -``` - -This validates the current PR's description against the standard 7-section governance attestation. - -## Usage Examples - -### Basic validation (default sections) - -```yaml -- name: Governance Attestation - uses: microsoft/agent-governance-toolkit/action/governance-attestation@v2 -``` - -### Custom sections - -```yaml -- name: Governance Attestation - uses: microsoft/agent-governance-toolkit/action/governance-attestation@v2 - with: - required-sections: | - Security review - Privacy review - CELA review -``` - -### Validate specific PR body - -```yaml -- name: Governance Attestation - uses: microsoft/agent-governance-toolkit/action/governance-attestation@v2 - with: - pr-body: ${{ github.event.pull_request.body }} -``` - -## Inputs - -| Input | Description | Required | Default | -|-------|-------------|----------|---------| -| `pr-body` | PR body text to validate | No | (current PR from context) | -| `required-sections` | YAML list of section titles (one per line) | No | Standard 7 sections | -| `min-body-length` | Minimum PR body length | No | `40` | -| `python-version` | Python version to use | No | `3.12` | -| `toolkit-version` | Exact toolkit version to install (e.g. `3.7.0`) | **Yes** | | - -## Outputs - -| Output | Description | -|--------|-------------| -| `status` | `pass` or `fail` | -| `errors` | Newline-separated list of errors | -| `sections-found` | JSON mapping sections to checkbox counts | -| `message` | Formatted validation message | - -## Default Required Sections - -1. Security review -2. Privacy review -3. CELA review -4. Responsible AI review -5. Accessibility review -6. Release Readiness / Safe Deployment -7. Org-specific Launch Gates - -Each section must have **exactly one** checkbox marked: -- ✅ `[x] Yes` -- ✅ `[x] No` -- ✅ `[x] Not needed (explain below)` - -## PR Template Format - -Your `.github/pull_request_template.md` should follow this structure: - -> **GitHub Issue Forms compatibility:** GitHub Issue Forms with `type: checkboxes` automatically render checkbox group labels as `###` (h3) headings. Both `##` (h2) and `###` (h3) heading levels are accepted, so Issue Form–generated bodies and hand-written PR templates work without any extra configuration. - -```markdown -# Governance Attestations (required) - -## 1) Security review -- [ ] ✅ Yes -- [ ] ❌ No -- [ ] ⚠️ Not needed (explain below) - -## 2) Privacy review -- [ ] ✅ Yes -- [ ] ❌ No -- [ ] ⚠️ Not needed (explain below) - - - ---- - -# Notes / Links - -Provide justifications for N/A selections: - -- -``` - -## Complete Workflow Example - -```yaml -name: Governance Attestation - -on: - pull_request: - types: [opened, edited, reopened, synchronize] - -permissions: - pull-requests: read - -jobs: - verify-attestation: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Verify PR governance attestation - uses: microsoft/agent-governance-toolkit/action/governance-attestation@v2 - id: attestation - - - name: Comment on PR (on failure) - if: failure() - uses: actions/github-script@v7 - with: - script: | - const errors = `${{ steps.attestation.outputs.errors }}`.split('\n'); - const body = `❌ **Governance attestation validation failed:**\n\n${errors.map(e => `- ${e}`).join('\n')}`; - - github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: body - }); -``` - -## Validation Rules - -### ✅ Valid Example -```markdown -## 1) Security review -- [ ] ✅ Yes -- [x] ⚠️ Not needed (explain below) -- [ ] ❌ No -``` -**Exactly one** checkbox marked. - -### ❌ Invalid Examples - -**No checkbox marked:** -```markdown -## 1) Security review -- [ ] ✅ Yes -- [ ] ⚠️ Not needed (explain below) -- [ ] ❌ No -``` - -**Multiple checkboxes marked:** -```markdown -## 1) Security review -- [x] ✅ Yes -- [x] ⚠️ Not needed (explain below) -- [ ] ❌ No -``` - -**Section missing:** -```markdown -# Governance Attestations - -(Security review section not found) -``` - -## Error Messages - -| Error | Meaning | Fix | -|-------|---------|-----| -| `Missing section: "X"` | Required section not found | Add section to PR description | -| `Section "X" must have exactly ONE checked box, found 0` | No checkbox marked | Mark exactly one checkbox | -| `Section "X" must have exactly ONE checked box, found 2` | Multiple checkboxes marked | Uncheck all but one | -| `PR description is too short` | Template not used | Use governance attestation template | - -## Customization - -### Organization-Specific Sections - -Override default sections for your organization: - -```yaml -- uses: microsoft/agent-governance-toolkit/action/governance-attestation@v2 - with: - required-sections: | - Security review - Privacy review - Legal review - Compliance review - Architecture review - Product review -``` - -### Lenient Mode (Warnings Only) - -Use `continue-on-error` to treat failures as warnings: - -```yaml -- name: Governance Attestation - uses: microsoft/agent-governance-toolkit/action/governance-attestation@v2 - continue-on-error: true -``` - -## License - -MIT License - see [LICENSE](../../LICENSE) for details. - -## Accepted version syntax - -The `toolkit-version` input is validated against this regex before installation: - -```regex -^[0-9]+\.[0-9]+\.[0-9]+((a|b|rc)[0-9]+)?$ -``` - -Accepted: `3.7.0`, `3.7.0a1`, `3.7.0b2`, `3.7.0rc1`. - -Rejected (and why): `3.7.0.post1` / `3.7.0.dev0` (transient pre/post artifacts), `3.7.0+local` (PEP 440 local-version identifiers can override registry resolution under some pip resolvers), `3.7.*` / `>=3.7` (floating), `3.7.0; python_version > '3'` (environment markers), URL/VCS references, and anything else outside the regex above. diff --git a/action/governance-attestation/action.yml b/action/governance-attestation/action.yml deleted file mode 100644 index 390120983..000000000 --- a/action/governance-attestation/action.yml +++ /dev/null @@ -1,164 +0,0 @@ -name: 'Governance Attestation' -description: 'Validate PR governance attestation checklist using Agent Governance Toolkit' -author: 'Microsoft' -branding: - icon: 'check-square' - color: 'blue' - -inputs: - pr-body: - description: 'PR body to validate (defaults to current PR)' - required: false - default: '' - required-sections: - description: | - YAML list of required section titles (one per line). - Defaults to standard 7-section attestation. - required: false - default: | - 1) Security review - 2) Privacy review - 3) CELA review - 4) Responsible AI review - 5) Accessibility review - 6) Release Readiness / Safe Deployment - 7) Org-specific Launch Gates - min-body-length: - description: 'Minimum length for PR body to be considered valid' - required: false - default: '40' - python-version: - description: 'Python version to use' - required: false - default: '3.12' - toolkit-version: - description: 'Exact Agent Governance Toolkit version to install (for example, 3.7.0)' - required: true - -outputs: - status: - description: 'Validation status: pass or fail' - value: ${{ steps.validate.outputs.status }} - errors: - description: 'List of validation errors (newline-separated)' - value: ${{ steps.validate.outputs.errors }} - sections-found: - description: 'JSON object mapping sections to checkbox counts' - value: ${{ steps.validate.outputs.sections_found }} - message: - description: 'Formatted validation message' - value: ${{ steps.validate.outputs.message }} - -runs: - using: 'composite' - steps: - - name: Set up Python - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: ${{ inputs.python-version }} - - - name: Install Agent Governance Toolkit - shell: bash - env: - AGT_TOOLKIT_VERSION: ${{ inputs.toolkit-version }} - run: | - if [ -z "$AGT_TOOLKIT_VERSION" ]; then - echo "::error::toolkit-version must be set to an exact published version" - exit 1 - fi - # Enforce exact version (PEP 440-like). Only release / pre-release - # forms are accepted: X.Y.Z, X.Y.ZaN, X.Y.ZbN, X.Y.ZrcN. Post / dev / - # local-version syntax is rejected so the install line below cannot be - # coerced into resolving an unintended distribution. - if ! printf '%s' "$AGT_TOOLKIT_VERSION" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+((a|b|rc)[0-9]+)?$'; then - echo "::error::toolkit-version must be an exact release or pre-release (e.g. 3.7.0 or 3.7.0rc1); got: $AGT_TOOLKIT_VERSION" - exit 1 - fi - python -m pip install --quiet --no-cache-dir --disable-pip-version-check "agent-governance-toolkit[governance]==$AGT_TOOLKIT_VERSION" - - - name: Validate governance attestation - id: validate - shell: bash - env: - PR_BODY: ${{ inputs.pr-body }} - REQUIRED_SECTIONS: ${{ inputs.required-sections }} - MIN_BODY_LENGTH: ${{ inputs.min-body-length }} - run: | - python3 << 'PYEOF' - import json - import os - import sys - from agent_compliance.governance import validate_attestation - - # Get PR body (from input or context) - pr_body = os.environ.get('PR_BODY', '') - if not pr_body: - # Try to get from GitHub context - import subprocess - try: - result = subprocess.run( - ['gh', 'pr', 'view', '--json', 'body', '-q', '.body'], - capture_output=True, - text=True, - timeout=10 - ) - if result.returncode == 0: - pr_body = result.stdout - except: - pass - - # Parse required sections - sections_input = os.environ.get('REQUIRED_SECTIONS', '').strip() - if sections_input: - required_sections = [s.strip() for s in sections_input.split('\n') if s.strip()] - else: - required_sections = None # Use defaults - - # Get min body length - min_body_length = int(os.environ.get('MIN_BODY_LENGTH', '40')) - - # Validate - result = validate_attestation( - pr_body=pr_body, - required_sections=required_sections, - min_body_length=min_body_length - ) - - # Set outputs - status = 'pass' if result.valid else 'fail' - with open(os.environ['GITHUB_OUTPUT'], 'a') as f: - f.write(f"status={status}\n") - - # Errors (newline-separated) - if result.errors: - f.write("errors< **Breaking change (vNEXT):** `toolkit-version` is now **required**. Pin to an exact published release (e.g. `3.7.0`); wildcards, floating refs, post-releases (`.post1`), dev-releases (`.dev0`), and local-version identifiers (`+local`) are rejected. See [Accepted version syntax](#accepted-version-syntax) below. Consumers should pin this action to the major-tag they were already using (e.g. `@v3`) and bump `toolkit-version` as new releases ship. - -```yaml -- uses: microsoft/agent-governance-toolkit/action/security-scan@v2 - with: - toolkit-version: "3.7.0" - paths: 'plugins/' -``` - -## Usage Examples - -### Basic plugin scan - -```yaml -- name: Security Scan - uses: microsoft/agent-governance-toolkit/action/security-scan@v2 - with: - paths: 'plugins/my-plugin' - plugin-name: 'my-plugin' -``` - -### Scan multiple directories - -```yaml -- name: Security Scan - uses: microsoft/agent-governance-toolkit/action/security-scan@v2 - with: - paths: 'plugins/ scripts/' -``` - -### With custom severity threshold - -```yaml -- name: Security Scan - uses: microsoft/agent-governance-toolkit/action/security-scan@v2 - with: - paths: 'plugins/' - min-severity: 'critical' # Only block on critical issues -``` - -### With exemptions file - -```yaml -- name: Security Scan - uses: microsoft/agent-governance-toolkit/action/security-scan@v2 - with: - paths: 'plugins/' - exemptions-file: '.security-exemptions.json' -``` - -## Inputs - -| Input | Description | Required | Default | -|-------|-------------|----------|---------| -| `paths` | Paths to scan (space-separated) | Yes | | -| `plugin-name` | Plugin name for error messages | No | (basename of first path) | -| `exemptions-file` | Path to exemptions JSON file | No | `.security-exemptions.json` | -| `min-severity` | Minimum severity to block (`critical`, `high`, `medium`, `low`) | No | `high` | -| `verbose` | Enable verbose output | No | `false` | -| `python-version` | Python version to use | No | `3.12` | -| `toolkit-version` | Exact toolkit version to install (e.g. `3.7.0`) | **Yes** | | - -## Outputs - -| Output | Description | -|--------|-------------| -| `status` | `pass` or `fail` | -| `findings-count` | Total number of security findings | -| `blocking-count` | Number of blocking findings (critical/high) | -| `findings` | Security findings in text format | - -## Severity Levels - -| Severity | Emoji | Action | Examples | -|----------|-------|--------|----------| -| **Critical** | 🔴 | BLOCKS MERGE | Hardcoded secrets, RCE vulnerabilities, CVSS ≥ 9.0 | -| **High** | 🟡 | BLOCKS MERGE | CVE CVSS 7.0-8.9, command injection, SQL injection | -| **Medium** | 🟠 | Warning | CVE CVSS 4.0-6.9, weak crypto, missing validation | -| **Low** | 🟢 | Info | CVE CVSS < 4.0, best practice suggestions | - -## Security Exemptions - -Create `.security-exemptions.json` in your repository to suppress false positives: - -```json -{ - "version": "1.0", - "exemptions": [ - { - "tool": "detect-secrets", - "file": "tests/fixtures/mock_credentials.py", - "line": 12, - "reason": "Test fixture with intentionally fake credentials", - "approved_by": "security-team" - }, - { - "tool": "pip-audit", - "package": "requests", - "version": "2.25.0", - "cve": "CVE-2023-32681", - "reason": "Not exploitable - only internal API calls", - "temporary": true, - "expires": "2026-06-30", - "ticket": "ADO-67890" - } - ] -} -``` - -See [schema](../../agent-governance-python/agent-compliance/src/agent_compliance/security/schemas/security-exemptions.schema.json) for full format. - -## Complete Workflow Example - -```yaml -name: Security Scan - -on: - pull_request: - paths: ['plugins/**', 'scripts/**'] - -jobs: - security: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Security Scan - uses: microsoft/agent-governance-toolkit/action/security-scan@v2 - with: - paths: 'plugins/' - exemptions-file: '.security-exemptions.json' - verbose: 'true' - - - name: Comment on PR (on failure) - if: failure() - uses: actions/github-script@v7 - with: - script: | - github.rest.issues.createComment({ - issue_number: context.issue.number, - owner: context.repo.owner, - repo: context.repo.repo, - body: '❌ Security scan failed. Please review the findings and update your PR.' - }) -``` - -## What Gets Scanned - -### File Types -- ✅ Python files (`*.py`) -- ✅ JavaScript/TypeScript files (`*.js`, `*.ts`) -- ✅ Shell scripts (`*.sh`, `*.bash`) -- ✅ PowerShell scripts (`*.ps1`) -- ✅ Dependency files (`requirements.txt`, `package.json`, `pyproject.toml`) -- ✅ **Code blocks in markdown files** (skills and agents) - -### Exclusions -The scanner automatically skips: -- ❌ Test fixtures and mock data (`tests/fixtures/`, `**/*.test.py`) -- ❌ Example files (`**/*.example.*`, `examples/`, `samples/`) -- ❌ Template files (`**/*.template.*`, `**/*.sample.*`) -- ❌ Build artifacts (`dist/`, `build/`, `node_modules/`) - -## Tools Used - -| Tool | Purpose | -|------|---------| -| [detect-secrets](https://github.com/Yelp/detect-secrets) | Secret detection | -| [pip-audit](https://github.com/pypa/pip-audit) | Python CVE scanning | -| [npm audit](https://docs.npmjs.com/cli/v8/commands/npm-audit) | Node.js CVE scanning | -| [bandit](https://bandit.readthedocs.io/) | Python SAST | - -## License - -MIT License - see [LICENSE](../../LICENSE) for details. - -## Accepted version syntax - -The `toolkit-version` input is validated against this regex before installation: - -```regex -^[0-9]+\.[0-9]+\.[0-9]+((a|b|rc)[0-9]+)?$ -``` - -Accepted: `3.7.0`, `3.7.0a1`, `3.7.0b2`, `3.7.0rc1`. - -Rejected (and why): `3.7.0.post1` / `3.7.0.dev0` (transient pre/post artifacts), `3.7.0+local` (PEP 440 local-version identifiers can override registry resolution under some pip resolvers), `3.7.*` / `>=3.7` (floating), `3.7.0; python_version > '3'` (environment markers), URL/VCS references, and anything else outside the regex above. diff --git a/action/security-scan/action.yml b/action/security-scan/action.yml deleted file mode 100644 index ee898679d..000000000 --- a/action/security-scan/action.yml +++ /dev/null @@ -1,147 +0,0 @@ -name: 'Security Scan' -description: 'Scan for secrets, CVEs, and dangerous code patterns using Agent Governance Toolkit' -author: 'Microsoft' -branding: - icon: 'shield' - color: 'red' - -inputs: - paths: - description: 'Paths to scan (space-separated, e.g., "plugins/ scripts/")' - required: true - plugin-name: - description: 'Plugin name for error messages' - required: false - default: '' - exemptions-file: - description: 'Path to .security-exemptions.json file' - required: false - default: '.security-exemptions.json' - min-severity: - description: 'Minimum severity to block merge (critical|high|medium|low)' - required: false - default: 'high' - verbose: - description: 'Enable verbose output' - required: false - default: 'false' - python-version: - description: 'Python version to use' - required: false - default: '3.12' - toolkit-version: - description: 'Exact Agent Governance Toolkit version to install (for example, 3.7.0)' - required: true - -outputs: - status: - description: 'Scan status: pass or fail' - value: ${{ steps.scan.outputs.status }} - findings-count: - description: 'Total number of security findings' - value: ${{ steps.scan.outputs.findings_count }} - blocking-count: - description: 'Number of blocking findings (critical/high)' - value: ${{ steps.scan.outputs.blocking_count }} - findings: - description: 'Security findings in JSON format' - value: ${{ steps.scan.outputs.findings }} - -runs: - using: 'composite' - steps: - - name: Set up Python - uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 - with: - python-version: ${{ inputs.python-version }} - - - name: Install Agent Governance Toolkit - shell: bash - env: - AGT_TOOLKIT_VERSION: ${{ inputs.toolkit-version }} - run: | - if [ -z "$AGT_TOOLKIT_VERSION" ]; then - echo "::error::toolkit-version must be set to an exact published version" - exit 1 - fi - # Enforce exact version (PEP 440-like). Only release / pre-release - # forms are accepted: X.Y.Z, X.Y.ZaN, X.Y.ZbN, X.Y.ZrcN. Post / dev / - # local-version syntax is rejected so the install line below cannot be - # coerced into resolving an unintended distribution. - if ! printf '%s' "$AGT_TOOLKIT_VERSION" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+((a|b|rc)[0-9]+)?$'; then - echo "::error::toolkit-version must be an exact release or pre-release (e.g. 3.7.0 or 3.7.0rc1); got: $AGT_TOOLKIT_VERSION" - exit 1 - fi - python -m pip install --quiet --no-cache-dir --disable-pip-version-check "agent-governance-toolkit[security]==$AGT_TOOLKIT_VERSION" - - - name: Run security scan - id: scan - shell: bash - env: - AGT_PATHS: ${{ inputs.paths }} - AGT_PLUGIN_NAME: ${{ inputs.plugin-name }} - AGT_VERBOSE: ${{ inputs.verbose }} - run: | - set +e - - # Run scanner for each path - PLUGIN_NAME="$AGT_PLUGIN_NAME" - VERBOSE_FLAG="" - - if [ "$AGT_VERBOSE" = "true" ]; then - VERBOSE_FLAG="--verbose" - fi - - # Use plugin name from first path if not provided - if [ -z "$PLUGIN_NAME" ]; then - PLUGIN_NAME=$(basename "$(echo "$AGT_PATHS" | awk '{print $1}')") - fi - - # Run the security scanner - AGT_PLUGIN="$PLUGIN_NAME" python -c " - from pathlib import Path - from agent_compliance.security import scan_plugin_security - import json - import sys - import os - - paths = os.environ['AGT_PATHS'].split() - verbose = os.environ.get('AGT_VERBOSE', '') == 'true' - - # Scan first path (primary plugin directory) - plugin_dir = Path(paths[0]) - plugin_name = os.environ.get('AGT_PLUGIN', '') - - exit_code, error_msg = scan_plugin_security( - plugin_dir=plugin_dir, - plugin_name=plugin_name, - verbose=verbose - ) - - # Output for GitHub Actions - print(f'exit_code={exit_code}', file=sys.stderr) - if error_msg: - print(error_msg) - - sys.exit(exit_code) - " 2>&1 - - EXIT_CODE=$? - - # Set outputs - if [ $EXIT_CODE -eq 0 ]; then - echo "status=pass" >> "$GITHUB_OUTPUT" - else - echo "status=fail" >> "$GITHUB_OUTPUT" - fi - - echo "findings_count=0" >> "$GITHUB_OUTPUT" - echo "blocking_count=0" >> "$GITHUB_OUTPUT" - - # Fail if blocking issues found - if [ $EXIT_CODE -ne 0 ]; then - echo "::error::Security scan failed" - exit 1 - fi - - echo "::notice::Security scan passed - no blocking issues found" diff --git a/agent-governance-python/agent-compliance/AGENTS.md b/agent-governance-python/agent-compliance/AGENTS.md index ca607befa..d27137da3 100644 --- a/agent-governance-python/agent-compliance/AGENTS.md +++ b/agent-governance-python/agent-compliance/AGENTS.md @@ -1,12 +1,11 @@ -# Agent Compliance — Coding Agent Instructions +# Agent Compliance - Coding Agent Instructions ## Project Overview -Agent Compliance (`agent-governance-toolkit`) provides **runtime policy enforcement, governance attestation validation, and security scanning** for AI agent systems. This package ensures agents operate within organizational compliance boundaries and security policies. +Agent Compliance (`agent-governance-toolkit`) provides **runtime policy enforcement and security scanning** for AI agent systems. This package ensures agents operate within organizational compliance boundaries and security policies. **Core Capabilities:** -- **Governance Attestation Validation:** PR checklist validation for organizational governance requirements - **Security Scanning:** Automated detection of secrets, CVEs, dangerous code patterns, and unsafe operations - **Runtime Policy Enforcement:** OWASP ASI 2026 controls and integrity verification @@ -41,10 +40,8 @@ ruff format . | File | Purpose | |------|---------| -| `src/agent_compliance/governance/attestation_validator.py` | PR governance attestation validation logic | | `src/agent_compliance/security/scanner.py` | Security scanning engine (secrets, CVEs, code patterns) | | `src/agent_compliance/security/schemas/` | JSON schemas for security exemptions | -| `tests/test_governance_attestation.py` | Attestation validation test suite (17 tests) | | `tests/test_security_scanner.py` | Security scanner test suite (25 tests) | ## Coding Conventions @@ -53,7 +50,6 @@ ruff format . - Use `dataclasses` for simple data structures, Pydantic for validation-heavy ones - SecurityFinding fields: `severity`, `category`, `title`, `file`, `line`, `code`, `description`, `recommendation`, `cwe`, `cve` - Severity levels: `critical`, `high`, `medium`, `low` (critical/high block PRs) -- AttestationResult fields: `valid`, `errors`, `warnings`, `sections_found` ## Security Scanning @@ -84,29 +80,9 @@ SEVERITY_CONFIG = { } ``` -## Governance Attestation - -### Required Sections (Default) - -1. Security review -2. Privacy review -3. CELA review -4. Responsible AI review -5. Accessibility review -6. Release Readiness / Safe Deployment -7. Org-specific Launch Gates - -### Validation Rules - -- Each section must have **exactly one** checkbox marked -- Section titles must be at the start of lines (after `##`) -- Checkboxes can have leading whitespace -- Case-insensitive matching (`[x]` or `[X]`) - ## Testing Requirements - All new features **must** include corresponding tests -- **Attestation tests:** Cover valid/invalid sections, checkbox counts, edge cases (CRLF, special chars) - **Security tests:** Cover finding creation, exemption matching, pattern detection, formatting - Run tests before committing: `pytest tests/ -v` - Aim for >90% code coverage on new code @@ -137,37 +113,6 @@ Security exemptions use `.security-exemptions.json`: - **CVE identifier:** Matches across all files - **Temporary exemptions:** Must have `expires` field (ISO 8601 date) -## GitHub Actions - -### action/governance-attestation - -Validates PR descriptions contain properly filled governance attestation checklists. - -**Inputs:** -- `pr-body`: PR body to validate (defaults to current PR) -- `required-sections`: YAML list of section titles -- `min-body-length`: Minimum PR body length (default: 40) - -**Outputs:** -- `status`: `pass` or `fail` -- `errors`: Newline-separated list of errors -- `sections-found`: JSON mapping sections to checkbox counts - -### action/security-scan - -Scans for secrets, CVEs, and dangerous code patterns. - -**Inputs:** -- `paths`: Space-separated paths to scan (required) -- `exemptions-file`: Path to exemptions JSON (default: `.security-exemptions.json`) -- `min-severity`: Minimum severity to block (default: `high`) - -**Outputs:** -- `status`: `pass` or `fail` -- `findings-count`: Total findings -- `blocking-count`: Critical/high findings -- `findings`: JSON-formatted findings - ## Boundaries - **Never commit** secrets, credentials, or API keys diff --git a/agent-governance-python/agent-compliance/docs/submissions/owasp-genai-submission.md b/agent-governance-python/agent-compliance/docs/submissions/owasp-genai-submission.md index 74fc5605f..96214a44b 100644 --- a/agent-governance-python/agent-compliance/docs/submissions/owasp-genai-submission.md +++ b/agent-governance-python/agent-compliance/docs/submissions/owasp-genai-submission.md @@ -95,7 +95,7 @@ Sub-millisecond policy enforcement (<0.1ms p99), 1,680+ tests, integrations with - **SLO engine**: Continuous compliance monitoring with error budget tracking - **OpenTelemetry export**: TraceExporter + MetricsExporter for full observability - **Agent Runtime RingEnforcer**: Trust-score-based demotion; automatic sandbox for untrusted agents -- **QuarantineManager**: Agent isolation with forensic data preservation +- **KillSwitch**: Agent termination with audit evidence and saga handoff support - **CostGuard**: Budget enforcement (throttle at 85%, kill at 95%) ## Architecture @@ -123,7 +123,7 @@ Sub-millisecond policy enforcement (<0.1ms p99), 1,680+ tests, integrations with - **GitHub (Agent SRE)**: https://github.com/microsoft/agent-governance-toolkit - **GitHub (Agent Runtime)**: https://github.com/microsoft/agent-governance-toolkit - **Governance Docs**: https://github.com/microsoft/agent-governance-toolkit -- **OWASP Mapping**: https://github.com/microsoft/agent-governance-toolkit/blob/main/docs/owasp-agentic-top10-mapping.md +- **OWASP Mapping**: https://github.com/microsoft/agent-governance-toolkit/blob/main/agent-governance-python/agent-os/docs/owasp-agentic-top10-mapping.md ## Maintainers diff --git a/agent-governance-python/agent-compliance/src/agent_compliance/governance/__init__.py b/agent-governance-python/agent-compliance/src/agent_compliance/governance/__init__.py deleted file mode 100644 index 8191251c8..000000000 --- a/agent-governance-python/agent-compliance/src/agent_compliance/governance/__init__.py +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Governance attestation validation module. - -This module provides validation for PR governance attestation checklists, -ensuring compliance with organizational governance requirements. - -Example: - from agent_compliance.governance import validate_attestation - - result = validate_attestation( - pr_body="...", - required_sections=[ - "Security review", - "Privacy review", - "CELA review" - ] - ) - - if not result.valid: - print(f"Errors: {result.errors}") -""" - -from .attestation_validator import AttestationResult, validate_attestation - -__all__ = [ - "AttestationResult", - "validate_attestation", -] diff --git a/agent-governance-python/agent-compliance/src/agent_compliance/governance/attestation_validator.py b/agent-governance-python/agent-compliance/src/agent_compliance/governance/attestation_validator.py deleted file mode 100644 index fbf4b0d85..000000000 --- a/agent-governance-python/agent-compliance/src/agent_compliance/governance/attestation_validator.py +++ /dev/null @@ -1,159 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -#!/usr/bin/env python3 -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Governance attestation validation. - -Validates that PR descriptions contain properly filled out governance -attestation checklists with exactly one checkbox marked per section. -""" - -import re -from dataclasses import dataclass -from typing import Optional - - -@dataclass -class AttestationResult: - """Result of attestation validation.""" - - valid: bool - errors: list[str] - warnings: list[str] - sections_found: dict[str, int] # section -> number of checked boxes - - @property - def message(self) -> str: - """Get formatted message for display.""" - if self.valid: - return "✅ Governance attestation checklist looks good." - - lines = ["❌ Governance attestation check failed:"] - for error in self.errors: - lines.append(f" - {error}") - return "\n".join(lines) - - -def _normalize_line_endings(text: str) -> str: - """Normalize CRLF to LF for consistent matching.""" - return text.replace("\r\n", "\n") - - -_FENCED_CODE_BLOCK_RE = re.compile(r"```.*?(?:```|\Z)", re.DOTALL) - - -def _strip_fenced_code_blocks(text: str) -> str: - """Remove fenced code blocks so their content can't be mistaken for headings. - - A code fence may contain text that looks like a ``##`` heading or a - ``- [x]`` checkbox, but those are render-time literals — they should - not terminate sections or count as user-checked attestations. - Unclosed fences run to end-of-text. - """ - return _FENCED_CODE_BLOCK_RE.sub("", text) - - -def _count_section_checkboxes(section_title: str, pr_body: str) -> dict: - """Count checked boxes in a specific section. - - Args: - section_title: Title of the section to find (e.g., "1) Security review") - pr_body: Full PR description body - - Returns: - dict with: - - found: bool (whether section was found) - - checked: int (number of checked boxes) - - details: str (section content) or None - """ - body = _strip_fenced_code_blocks(pr_body) - - # Escape special regex characters in section title - escaped_title = re.escape(section_title) - - # Find section header and capture content until next ## / ### or end. - # Supports both ## (h2) and ### (h3) headings for compatibility with - # GitHub Issue Forms, which render checkbox group labels as ### headings. - pattern = rf"(^|\n)#{{2,3}}\s+{escaped_title}\n([\s\S]*?)(?=\n#{{2,3}}\s+|$)" - match = re.search(pattern, body, re.IGNORECASE) - - if not match: - return {"found": False, "checked": 0, "details": None} - - section_content = match.group(2) - - # Count checked boxes (case-insensitive, allows leading whitespace) - checked_pattern = r"(^|\n)\s*-\s*\[x\]" - checked_boxes = re.findall(checked_pattern, section_content, re.IGNORECASE) - - return { - "found": True, - "checked": len(checked_boxes), - "details": section_content, - } - - -def validate_attestation( - pr_body: str, - required_sections: Optional[list[str]] = None, - min_body_length: int = 40, -) -> AttestationResult: - """Validate governance attestation in PR description. - - Args: - pr_body: PR description body text - required_sections: List of section titles that must be present. - Defaults to standard 7-section attestation. - min_body_length: Minimum length for PR body to be considered valid - - Returns: - AttestationResult with validation status and details - """ - if required_sections is None: - required_sections = [ - "1) Security review", - "2) Privacy review", - "3) CELA review", - "4) Responsible AI review", - "5) Accessibility review", - "6) Release Readiness / Safe Deployment", - "7) Org-specific Launch Gates", - ] - - # Normalize line endings - pr_body = _normalize_line_endings(pr_body or "") - - errors = [] - sections_found = {} - - # Check each required section - for section in required_sections: - result = _count_section_checkboxes(section, pr_body) - - if not result["found"]: - errors.append(f'Missing section: "{section}"') - continue - - checked_count = result["checked"] - sections_found[section] = checked_count - - if checked_count != 1: - errors.append( - f'Section "{section}" must have exactly ONE checked box, ' - f"found {checked_count}." - ) - - # Check minimum body length - if pr_body.strip() and len(pr_body.strip()) < min_body_length: - errors.append( - "PR description is too short; " - "please use the governance attestation template." - ) - - return AttestationResult( - valid=len(errors) == 0, - errors=errors, - warnings=[], - sections_found=sections_found, - ) diff --git a/agent-governance-python/agent-compliance/tests/test_governance_attestation.py b/agent-governance-python/agent-compliance/tests/test_governance_attestation.py deleted file mode 100644 index b47a76252..000000000 --- a/agent-governance-python/agent-compliance/tests/test_governance_attestation.py +++ /dev/null @@ -1,583 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Tests for governance attestation validation.""" - -from __future__ import annotations - - -from agent_compliance.governance import AttestationResult, validate_attestation - - -class TestAttestationResult: - """Tests for AttestationResult dataclass.""" - - def test_message_property_success(self): - """Test message property for successful validation.""" - result = AttestationResult( - valid=True, - errors=[], - warnings=[], - sections_found={"1) Security review": 1}, - ) - assert result.message == "✅ Governance attestation checklist looks good." - - def test_message_property_failure(self): - """Test message property for failed validation.""" - result = AttestationResult( - valid=False, - errors=[ - 'Missing section: "1) Security review"', - 'Section "2) Privacy review" must have exactly ONE checked box, found 0.', - ], - warnings=[], - sections_found={}, - ) - message = result.message - assert "❌ Governance attestation check failed:" in message - assert "Missing section" in message - assert "must have exactly ONE checked box" in message - - -class TestValidateAttestation: - """Tests for validate_attestation function.""" - - def test_valid_attestation_all_sections_checked(self): - """Test validation passes when all sections have exactly one checkbox marked.""" - pr_body = """ -# Governance Attestations (required) - -## 1) Security review -- [x] ✅ Yes -- [ ] ❌ No -- [ ] ⚠️ Not needed (explain below) - -## 2) Privacy review -- [ ] ✅ Yes -- [x] ❌ No -- [ ] ⚠️ Not needed (explain below) - -## 3) CELA review -- [ ] ✅ Yes -- [ ] ❌ No -- [x] ⚠️ Not needed (explain below) - -## 4) Responsible AI review -- [x] ✅ Yes -- [ ] ❌ No -- [ ] ⚠️ Not needed (explain below) - -## 5) Accessibility review -- [ ] ✅ Yes -- [x] ❌ No -- [ ] ⚠️ Not needed (explain below) - -## 6) Release Readiness / Safe Deployment -- [x] ✅ Yes -- [ ] ❌ No -- [ ] ⚠️ Not needed (explain below) - -## 7) Org-specific Launch Gates -- [ ] ✅ Yes -- [ ] ❌ No -- [x] ⚠️ Not needed (explain below) - ---- - -# Notes / Links -Some additional context here. -""" - result = validate_attestation(pr_body) - assert result.valid is True - assert len(result.errors) == 0 - assert len(result.sections_found) == 7 - for count in result.sections_found.values(): - assert count == 1 - - def test_missing_section(self): - """Test validation fails when required section is missing.""" - pr_body = """ -## 1) Security review -- [x] ✅ Yes -- [ ] ❌ No - -## 2) Privacy review -- [x] ✅ Yes -- [ ] ❌ No -""" - result = validate_attestation(pr_body) - assert result.valid is False - assert any("Missing section" in error for error in result.errors) - # Should have errors for 5 missing sections (3-7) - missing_errors = [e for e in result.errors if "Missing section" in e] - assert len(missing_errors) == 5 - - def test_no_checkbox_marked(self): - """Test validation fails when no checkbox is marked in a section.""" - pr_body = """ -## 1) Security review -- [ ] ✅ Yes -- [ ] ❌ No -- [ ] ⚠️ Not needed (explain below) - -## 2) Privacy review -- [x] ✅ Yes -- [ ] ❌ No - -## 3) CELA review -- [x] ✅ Yes - -## 4) Responsible AI review -- [x] ✅ Yes - -## 5) Accessibility review -- [x] ✅ Yes - -## 6) Release Readiness / Safe Deployment -- [x] ✅ Yes - -## 7) Org-specific Launch Gates -- [x] ✅ Yes -""" - result = validate_attestation(pr_body) - assert result.valid is False - assert any( - "must have exactly ONE checked box, found 0" in error - for error in result.errors - ) - assert result.sections_found["1) Security review"] == 0 - - def test_multiple_checkboxes_marked(self): - """Test validation fails when multiple checkboxes are marked in a section.""" - pr_body = """ -## 1) Security review -- [x] ✅ Yes -- [x] ❌ No -- [ ] ⚠️ Not needed (explain below) - -## 2) Privacy review -- [x] ✅ Yes - -## 3) CELA review -- [x] ✅ Yes - -## 4) Responsible AI review -- [x] ✅ Yes - -## 5) Accessibility review -- [x] ✅ Yes - -## 6) Release Readiness / Safe Deployment -- [x] ✅ Yes - -## 7) Org-specific Launch Gates -- [x] ✅ Yes -""" - result = validate_attestation(pr_body) - assert result.valid is False - assert any( - "must have exactly ONE checked box, found 2" in error - for error in result.errors - ) - assert result.sections_found["1) Security review"] == 2 - - def test_custom_required_sections(self): - """Test validation with custom required sections.""" - pr_body = """ -## Security review -- [x] ✅ Yes - -## Privacy review -- [x] ✅ Yes - -## Legal review -- [x] ✅ Yes -""" - custom_sections = ["Security review", "Privacy review", "Legal review"] - result = validate_attestation(pr_body, required_sections=custom_sections) - assert result.valid is True - assert len(result.sections_found) == 3 - - def test_case_insensitive_checkbox_matching(self): - """Test that checkbox matching is case-insensitive.""" - pr_body = """ -## 1) Security review -- [X] ✅ Yes -- [ ] ❌ No - -## 2) Privacy review -- [x] ✅ Yes - -## 3) CELA review -- [X] ✅ Yes - -## 4) Responsible AI review -- [x] ✅ Yes - -## 5) Accessibility review -- [X] ✅ Yes - -## 6) Release Readiness / Safe Deployment -- [x] ✅ Yes - -## 7) Org-specific Launch Gates -- [X] ✅ Yes -""" - result = validate_attestation(pr_body) - assert result.valid is True - for count in result.sections_found.values(): - assert count == 1 - - def test_crlf_line_endings_normalized(self): - """Test that CRLF line endings are properly normalized.""" - pr_body = "## 1) Security review\r\n- [x] ✅ Yes\r\n- [ ] ❌ No\r\n\r\n## 2) Privacy review\r\n- [x] ✅ Yes\r\n\r\n## 3) CELA review\r\n- [x] ✅ Yes\r\n\r\n## 4) Responsible AI review\r\n- [x] ✅ Yes\r\n\r\n## 5) Accessibility review\r\n- [x] ✅ Yes\r\n\r\n## 6) Release Readiness / Safe Deployment\r\n- [x] ✅ Yes\r\n\r\n## 7) Org-specific Launch Gates\r\n- [x] ✅ Yes\r\n" - result = validate_attestation(pr_body) - assert result.valid is True - - def test_pr_body_too_short(self): - """Test validation fails when PR body is too short.""" - pr_body = "Short PR body" - result = validate_attestation(pr_body, min_body_length=40) - assert result.valid is False - assert any("too short" in error for error in result.errors) - - def test_empty_pr_body(self): - """Test validation with empty PR body.""" - result = validate_attestation("") - assert result.valid is False - assert any("Missing section" in error for error in result.errors) - - def test_none_pr_body(self): - """Test validation with None PR body.""" - result = validate_attestation(None) - assert result.valid is False - assert any("Missing section" in error for error in result.errors) - - def test_whitespace_before_checkbox(self): - """Test that leading whitespace before checkboxes is allowed.""" - pr_body = """ -## 1) Security review - - [x] ✅ Yes - - [ ] ❌ No - -## 2) Privacy review - - [x] ✅ Yes - -## 3) CELA review -- [x] ✅ Yes - -## 4) Responsible AI review - - [x] ✅ Yes - -## 5) Accessibility review -- [x] ✅ Yes - -## 6) Release Readiness / Safe Deployment -- [x] ✅ Yes - -## 7) Org-specific Launch Gates -- [x] ✅ Yes -""" - result = validate_attestation(pr_body) - assert result.valid is True - - def test_section_with_special_regex_characters(self): - """Test sections with special regex characters are properly escaped.""" - pr_body = """ -## Test (with parens) -- [x] ✅ Yes - -## Test [with brackets] -- [x] ✅ Yes -""" - custom_sections = ["Test (with parens)", "Test [with brackets]"] - result = validate_attestation(pr_body, required_sections=custom_sections) - assert result.valid is True - - def test_section_not_at_start_of_line(self): - """Test that sections must start at beginning of line.""" - pr_body = """ -Some text ## 1) Security review -- [x] ✅ Yes - -## 2) Privacy review -- [x] ✅ Yes - -## 3) CELA review -- [x] ✅ Yes - -## 4) Responsible AI review -- [x] ✅ Yes - -## 5) Accessibility review -- [x] ✅ Yes - -## 6) Release Readiness / Safe Deployment -- [x] ✅ Yes - -## 7) Org-specific Launch Gates -- [x] ✅ Yes -""" - result = validate_attestation(pr_body) - assert result.valid is False - assert any( - 'Missing section: "1) Security review"' in error for error in result.errors - ) - - def test_multiple_errors_aggregated(self): - """Test that multiple errors are properly aggregated.""" - pr_body = """ -## 1) Security review -- [x] ✅ Yes -- [x] ❌ No - -## 2) Privacy review -- [ ] ✅ Yes -""" - result = validate_attestation(pr_body) - assert result.valid is False - # Should have error for section 1 (multiple checkboxes) - # and errors for missing sections 3-7 - assert len(result.errors) >= 6 - assert any("found 2" in error for error in result.errors) - assert any("Missing section" in error for error in result.errors) - - def test_custom_min_body_length(self): - """Test validation with custom minimum body length.""" - pr_body = "A" * 100 # 100 characters - result = validate_attestation(pr_body, min_body_length=50) - # Will fail due to missing sections, but not due to length - assert not any("too short" in error for error in result.errors) - - pr_body = "A" * 30 # 30 characters - result = validate_attestation(pr_body, min_body_length=50) - # Will fail due to length - assert any("too short" in error for error in result.errors) - - def test_pr_body_no_checkboxes_at_all(self): - """Test validation when sections exist but contain no checkboxes.""" - pr_body = """ -## 1) Security review -This section has no checkboxes at all. - -## 2) Privacy review -Just plain text. - -## 3) CELA review -Nothing here either. - -## 4) Responsible AI review -More prose. - -## 5) Accessibility review -Nope. - -## 6) Release Readiness / Safe Deployment -Still nothing. - -## 7) Org-specific Launch Gates -End. -""" - result = validate_attestation(pr_body) - assert result.valid is False - # Every section found but 0 checked boxes - zero_errors = [e for e in result.errors if "found 0" in e] - assert len(zero_errors) == 7 - - def test_pr_body_html_encoded_checkboxes(self): - """Test that HTML-encoded checkbox characters are NOT counted as checked.""" - pr_body = """ -## 1) Security review -- [x] Yes -- [ ] No - -## 2) Privacy review -- [x] Yes -""" - custom_sections = ["1) Security review", "2) Privacy review"] - result = validate_attestation(pr_body, required_sections=custom_sections) - # Section 1 uses HTML entities — should NOT match as a checked box - assert result.sections_found.get("1) Security review", 0) == 0 - assert result.sections_found.get("2) Privacy review", 0) == 1 - - def test_pr_body_multiple_governance_sections_all_validated(self): - """Test that ALL required governance sections are validated.""" - pr_body = """ -## Alpha -- [x] Done - -## Beta -- [x] Done - -## Gamma -- [ ] Not done -""" - custom_sections = ["Alpha", "Beta", "Gamma"] - result = validate_attestation(pr_body, required_sections=custom_sections) - assert result.valid is False - assert result.sections_found["Alpha"] == 1 - assert result.sections_found["Beta"] == 1 - assert result.sections_found["Gamma"] == 0 - assert any("found 0" in e for e in result.errors) - - def test_empty_string_pr_body(self): - """Test validation with a completely empty string PR body.""" - result = validate_attestation("") - assert result.valid is False - missing = [e for e in result.errors if "Missing section" in e] - assert len(missing) == 7 - assert result.sections_found == {} - - def test_h3_headings_supported(self): - """Test that ### (h3) headings are accepted (GitHub Issue Forms render these).""" - pr_body = """ -### 1) Security review -- [x] ✅ Yes -- [ ] ❌ No - -### 2) Privacy review -- [x] ✅ Yes - -### 3) CELA review -- [x] ✅ Yes - -### 4) Responsible AI review -- [x] ✅ Yes - -### 5) Accessibility review -- [x] ✅ Yes - -### 6) Release Readiness / Safe Deployment -- [x] ✅ Yes - -### 7) Org-specific Launch Gates -- [x] ✅ Yes -""" - result = validate_attestation(pr_body) - assert result.valid is True - assert len(result.errors) == 0 - assert len(result.sections_found) == 7 - - def test_h3_headings_mixed_with_h2(self): - """Test that ## and ### headings can coexist in the same document.""" - pr_body = """ -## 1) Security review -- [x] ✅ Yes - -### 2) Privacy review -- [x] ✅ Yes - -## 3) CELA review -- [x] ✅ Yes - -### 4) Responsible AI review -- [x] ✅ Yes - -## 5) Accessibility review -- [x] ✅ Yes - -### 6) Release Readiness / Safe Deployment -- [x] ✅ Yes - -## 7) Org-specific Launch Gates -- [x] ✅ Yes -""" - result = validate_attestation(pr_body) - assert result.valid is True - assert len(result.errors) == 0 - - def test_h3_no_checkbox_marked(self): - """Test that h3 sections still enforce exactly-one-checked rule.""" - pr_body = """ -### 1) Security review -- [ ] ✅ Yes -- [ ] ❌ No - -### 2) Privacy review -- [x] ✅ Yes - -### 3) CELA review -- [x] ✅ Yes - -### 4) Responsible AI review -- [x] ✅ Yes - -### 5) Accessibility review -- [x] ✅ Yes - -### 6) Release Readiness / Safe Deployment -- [x] ✅ Yes - -### 7) Org-specific Launch Gates -- [x] ✅ Yes -""" - result = validate_attestation(pr_body) - assert result.valid is False - assert any("found 0" in e for e in result.errors) - assert result.sections_found["1) Security review"] == 0 - - def test_h4_headings_not_supported(self): - """Test that #### (h4) headings are NOT matched — only h2 and h3 are valid.""" - pr_body = """ -#### 1) Security review -- [x] ✅ Yes -""" - custom_sections = ["1) Security review"] - result = validate_attestation(pr_body, required_sections=custom_sections) - assert result.valid is False - assert any("Missing section" in e for e in result.errors) - - -class TestFencedCodeBlockSafety: - """Section headings and checkboxes inside fenced code blocks must not affect parsing.""" - - def test_fenced_pseudo_heading_does_not_truncate_section(self): - """A `## ...` inside a code fence must not terminate the enclosing section.""" - pr_body = """ -## 1) Security review - -Example template the user might paste: - -```markdown -## 99) Not a real heading -- [ ] sample -``` - -- [x] ✅ Yes -""" - custom_sections = ["1) Security review"] - result = validate_attestation(pr_body, required_sections=custom_sections) - assert result.valid is True, result.errors - assert result.sections_found["1) Security review"] == 1 - - def test_checkbox_inside_fence_does_not_count(self): - """A `[x]` inside a fenced block must not satisfy the section requirement.""" - pr_body = """ -## 1) Security review - -Example users paste: - -```markdown -- [x] would be checked -``` -""" - custom_sections = ["1) Security review"] - result = validate_attestation(pr_body, required_sections=custom_sections) - assert result.valid is False - assert any("found 0" in e for e in result.errors) - - def test_unclosed_fence_does_not_crash(self): - """An unterminated code fence runs to end-of-body without raising.""" - pr_body = """ -## 1) Security review -- [x] ✅ Yes - -```python -# unclosed fence, no closing backticks -print("oops") -""" - custom_sections = ["1) Security review"] - # Must not raise; the section before the fence is intact and the - # box outside the fence is counted. - result = validate_attestation(pr_body, required_sections=custom_sections) - assert result.valid is True, result.errors - assert result.sections_found["1) Security review"] == 1 diff --git a/agent-governance-python/agent-governance-toolkit-core/README.md b/agent-governance-python/agent-governance-toolkit-core/README.md index 0866f6d6d..691882180 100644 --- a/agent-governance-python/agent-governance-toolkit-core/README.md +++ b/agent-governance-python/agent-governance-toolkit-core/README.md @@ -11,7 +11,7 @@ install: | `agent-os-kernel` | Kernel architecture, Nexus Trust Exchange, CMVK, IATP, AMB, ATR, control plane, observability | | `agentmesh-primitives` | Shared primitive data models (failure types, severity levels, base structures) | | `agentmesh-runtime` | Execution supervisor with privilege rings, saga orchestration, audit trails | -| `agent-hypervisor` | Runtime supervisor for shared sessions, execution rings, joint liability, hash-chained audit | +| `agent-hypervisor` | Runtime supervisor for shared sessions, execution rings, saga compensation, hash-chained audit | | `agentmesh-platform` | Identity, trust, reward, governance for cloud-native agent ecosystems | ## Install diff --git a/agent-governance-python/agent-hypervisor/README.md b/agent-governance-python/agent-hypervisor/README.md index b0a148756..736d419de 100644 --- a/agent-governance-python/agent-hypervisor/README.md +++ b/agent-governance-python/agent-hypervisor/README.md @@ -1,75 +1,46 @@
-# Agent Hypervisor — Public Preview +# Agent Hypervisor Public Preview -**Execution supervisor for AI agents — runtime isolation, execution rings, and governance for autonomous agents** +**Runtime supervisor for AI agents with execution rings, isolated sessions, saga compensation, tamper-evident audit trails, and safety controls.** -*Just as a supervisor isolates processes, Agent Hypervisor isolates AI agent sessions
and enforces governance boundaries with a kill switch, blast radius containment, and accountability.* +*Just as an OS supervisor isolates processes, Agent Hypervisor isolates AI agent sessions and enforces governance boundaries with execution rings, a kill switch, and blast-radius containment.* [![CI](https://github.com/microsoft/agent-governance-toolkit/actions/workflows/ci.yml/badge.svg)](https://github.com/microsoft/agent-governance-toolkit/actions/workflows/ci.yml) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE) [![Python](https://img.shields.io/badge/python-3.11+-blue.svg)](https://python.org) -[![PyPI](https://img.shields.io/pypi/v/agent-hypervisor)](https://pypi.org/project/agent-governance-python/agent-hypervisor/) -[![Benchmark](https://img.shields.io/badge/latency-268%CE%BCs%20pipeline-orange)](benchmarks/) +[![Benchmarks](https://img.shields.io/badge/benchmarks-available-blue)](benchmarks/) [![Discussions](https://img.shields.io/github/discussions/microsoft/agent-governance-toolkit)](https://github.com/microsoft/agent-governance-toolkit/discussions) > [!IMPORTANT] -> **Public Preview** — The `agent-hypervisor` package on PyPI is a public preview release. APIs may change before GA. +> `agent-hypervisor` is deprecated as a standalone PyPI package. For new work, install `agent-governance-toolkit-core` or the full toolkit. The source in this directory remains tested and documents the runtime features that are implemented here. -> ⭐ **If this project helps you, please star it!** It helps others discover Agent Hypervisor. - -> 📦 **Install the full stack:** `pip install agent-governance-toolkit[full]` — [PyPI](https://pypi.org/project/ai-agent-governance/) | [GitHub](https://github.com/microsoft/agent-governance-toolkit) - -[Quick Start](#quick-start) • [Configuration](#configuration) • [Why a Hypervisor?](#-why-agent-hypervisor) • [Features](#key-features) • [Architecture](#architecture-diagrams) • [Performance](#performance) • [Ecosystem](#ecosystem) +[Quick start](#quick-start) | [Why a hypervisor](#why-agent-hypervisor) | [Configuration](#configuration) | [Architecture](#architecture) | [Key features](#key-features) | [REST API](#rest-api) | [Ecosystem](#ecosystem)
--- -### Integrated Into Major AI Frameworks - -

- Dify - LlamaIndex - Awesome Copilot - Agent-Lightning - awesome-opentelemetry -

- -## 📊 By The Numbers - - - - - - - - -

644+

Tests Passing

4

Execution Rings
(Ring 0–3)

268μs

Full Governance
Pipeline Latency

v2.0

Saga Compensation
Kill Switch · Rate Limits
- -## 💡 Why Agent Hypervisor? +## Why Agent Hypervisor > **The problem:** AI agents run with unlimited resources, no isolation, and no kill switch. A single rogue agent in a shared session can escalate privileges, corrupt state, or cascade failures across your entire system. -> **Our solution:** A hypervisor that enforces execution rings, resource limits, saga compensation, and runtime governance — giving you a kill switch, blast radius containment, and joint liability for agent accountability. +> **The approach:** A hypervisor that enforces execution rings, resource limits, saga compensation, and runtime governance, giving you a kill switch and blast-radius containment. ### How It Maps to What You Already Know | OS / VM Hypervisor | Agent Hypervisor | Why It Matters | |-------------------|-----------------|----------------| -| CPU rings (Ring 0–3) | **Execution Rings** — privilege levels based on trust score | Graduated access, not binary allow/deny | -| Process isolation | **Session isolation** — VFS namespacing, DID-bound identity | Rogue agents can't corrupt other sessions | -| Memory protection | **Liability protection** — bonded reputation, collateral slash | Sponsors have skin in the game | -| System calls | **Saga transactions** — multi-step ops with automatic rollback | Failed workflows undo themselves | -| Watchdog timer | **Kill switch** — graceful termination with step handoff | Stop runaway agents without data loss | -| Audit logs | **Hash-chained delta trail** — tamper-evident forensic trail | Prove exactly what happened | - -> **Implementation status.** Some capabilities above ship as documented stubs. The liability ledger always admits (no risk scoring), collateral slashing and quarantine record events but apply no enforcement, sponsorship vouching adds no bonding, the audit commitment is stored in memory only with no blockchain anchoring, and saga checkpoints have no replay. Execution rings, session isolation, saga orchestration, the kill switch, and the in-memory hash-chained delta trail are functional. Each module docstring states its exact scope. +| CPU rings (Ring 0-3) | **Execution Rings**, privilege levels based on trust score | Graduated access, not binary allow/deny | +| Process isolation | **Session isolation** with VFS namespacing and DID-bound identity | Rogue agents cannot corrupt other sessions | +| System calls | **Saga transactions**, multi-step ops with automatic rollback | Failed workflows undo themselves | +| Watchdog timer | **Kill switch** with graceful termination and step handoff | Stop runaway agents without data loss | +| Audit logs | **Hash-chained delta trail**, tamper-evident forensic record | Prove exactly what happened | -## Quick Start +## Quick start ```bash -pip install agent-hypervisor +pip install agent-governance-toolkit-core ``` ```python @@ -83,13 +54,13 @@ session = await hv.create_session( creator_did="did:mesh:admin", ) -# Agent joins — ring assigned automatically by trust score +# Agent joins, ring assigned automatically by trust score ring = await hv.join_session( session.sso.session_id, "did:mesh:agent-1", sigma_raw=0.85, ) -# → RING_2_STANDARD (trusted agent) +# RING_2_STANDARD (trusted agent) # Activate and run a governed saga await hv.activate_session(session.sso.session_id) @@ -103,13 +74,13 @@ result = await session.saga.execute_step( saga.saga_id, step.step_id, executor=draft_email, ) -# Terminate — returns tamper-evident audit hash +# Terminate, returns tamper-evident audit hash hash_root = await hv.terminate_session(session.sso.session_id) ``` ## Configuration -This section covers how to configure agents, sessions, sagas, security, and rate limiting. +This section covers how to configure agents, sessions, sagas, safety controls, and rate limiting. ### Agent Configuration @@ -118,60 +89,64 @@ Agents join sessions and are assigned an **Execution Ring** based on their trust ```python from hypervisor import Hypervisor, SessionConfig, ConsistencyMode, ExecutionRing -# Initialize with optional liability cap and retention policy +# Basic initialization. Optional adapters resolve trust scores, +# behavioral verification, and capability manifests. hv = Hypervisor( - max_exposure=1000.0, # Max total liability per voucher - retention_policy=None, # Ephemeral GC rules (default: keep all) + nexus=None, # External trust scoring backend + policy_check=None, # Behavioral verification adapter + iatp=None, # Capability manifest parser ) # Create a session with resource limits session = await hv.create_session( config=SessionConfig( consistency_mode=ConsistencyMode.EVENTUAL, # or STRONG - max_participants=10, # 1–1000 - max_duration_seconds=3600, # 1–604,800 (7 days max) + max_participants=10, # 1-1000 + max_duration_seconds=3600, # 1-604,800 (7 days max) min_eff_score=0.60, # Minimum trust score to join enable_audit=True, # Hash-chained audit trail - enable_blockchain_commitment=False, ), creator_did="did:mesh:admin", ) -# Agent joins — ring assigned by trust score +# Agent joins, ring assigned by trust score ring = await hv.join_session( session.sso.session_id, "did:mesh:agent-1", - sigma_raw=0.85, # Raw trust score [0.0–1.0] + sigma_raw=0.85, # Raw trust score [0.0-1.0] ) # Ring assignment thresholds: -# eff_score > 0.95 + consensus → RING_1_PRIVILEGED -# eff_score > 0.60 → RING_2_STANDARD -# otherwise → RING_3_SANDBOX (default) +# eff_score > 0.95 + consensus -> RING_1_PRIVILEGED +# eff_score > 0.60 -> RING_2_STANDARD +# otherwise -> RING_3_SANDBOX (default) ``` ### Temporary Ring Elevation (Sudo) -Agents can request temporary privilege escalation with a TTL: - -> **Note:** Ring elevation is available in the Enterprise Edition. Public Preview includes the API surface but returns a denial response. See the architecture for how it works. +Agents can request temporary privilege escalation with a TTL. Elevation is granted only if the agent's trust score meets the target ring's threshold; Ring 1 additionally requires an attestation string, and Ring 0 is never granted through the standard API. ```python -from hypervisor import RingElevationManager +from hypervisor import ExecutionRing, RingElevationManager elevation_mgr = RingElevationManager() -# Grant temporary Ring 1 access (max 3600s, default 300s) -elevation = elevation_mgr.elevate( +# Request temporary Ring 1 access (TTL default 300s, capped at 3600s) +elevation = elevation_mgr.request_elevation( agent_did="did:mesh:agent-1", session_id=session.sso.session_id, + current_ring=ExecutionRing.RING_2_STANDARD, target_ring=ExecutionRing.RING_1_PRIVILEGED, - ttl_seconds=300, # Auto-expires after 5 minutes + ttl_seconds=300, # Auto-expires after 5 minutes + attestation="signed-by-sre", # Required for Ring 1 reason="deploy-approval", - attestation="signed-by-sre", # Optional proof + trust_score=0.96, # Or supply a trust_provider to the manager ) # Revoke early if needed -elevation_mgr.revoke(elevation.elevation_id) +elevation_mgr.revoke_elevation(elevation.elevation_id) + +# Expire elapsed elevations (call periodically) +elevation_mgr.tick() ``` ### Session Configuration @@ -187,64 +162,24 @@ config = SessionConfig( max_duration_seconds=7200, # 2-hour session min_eff_score=0.70, # Higher trust threshold enable_audit=True, - enable_blockchain_commitment=False, # Reserved; not implemented, no effect ) session = await hv.create_session(config=config, creator_did="did:mesh:admin") await hv.activate_session(session.sso.session_id) -# Session lifecycle: CREATED → HANDSHAKING → ACTIVE → TERMINATING → ARCHIVED +# Session lifecycle: CREATED -> HANDSHAKING -> ACTIVE -> TERMINATING -> ARCHIVED ``` ### Saga Configuration -Define multi-step transactions with compensation using the DSL parser or programmatically: +Define multi-step transactions with compensation programmatically: ```python -from hypervisor import SagaDSLParser, SagaOrchestrator, FanOutPolicy - -# Option 1: Define saga as a dict (or load from YAML) -definition = { - "name": "deploy-pipeline", - "session_id": "ss-a1b2c3d4", - "steps": [ - { - "id": "provision", - "action_id": "provision-vm", - "agent": "did:mesh:agent-1", - "execute_api": "/infra/provision", - "undo_api": "/infra/deprovision", # Compensation endpoint - "timeout": 120, # Seconds (default: 300) - "retries": 2, # Retry count (default: 0) - }, - { - "id": "deploy", - "action_id": "deploy-app", - "agent": "did:mesh:agent-2", - "execute_api": "/app/deploy", - "undo_api": "/app/undeploy", - "timeout": 60, - }, - ], - "fan_outs": [ - { - "policy": "all_must_succeed", # or majority_must_succeed, any_must_succeed - "branch_step_ids": ["provision", "deploy"], - }, - ], -} - -parser = SagaDSLParser() -errors = parser.validate(definition) # Returns [] if valid -saga_def = parser.parse(definition) -steps = parser.to_saga_steps(saga_def) - -# Option 2: Build programmatically saga = session.saga.create_saga(session.sso.session_id) step = session.saga.add_step( saga.saga_id, "draft_email", "did:mesh:agent-1", execute_api="/api/draft", - undo_api="/api/undo-draft", + undo_api="/api/undo-draft", # Compensation endpoint timeout_seconds=30, max_retries=2, ) @@ -260,6 +195,7 @@ The kill switch provides graceful agent termination with saga step handoff: ```python from hypervisor import KillSwitch +from hypervisor.security.kill_switch import KillReason kill_switch = KillSwitch() @@ -267,19 +203,19 @@ kill_switch = KillSwitch() result = kill_switch.kill( agent_did="did:mesh:rogue-agent", session_id=session.sso.session_id, - reason="ring_breach", # behavioral_drift | rate_limit | ring_breach | manual + reason=KillReason.RING_BREACH, ) -# result.handoffs — list of in-flight saga steps handed to substitute agents -# result.compensation_triggered — True if active sagas were compensated +# result.handoffs, list of in-flight saga steps handed to substitute agents +# result.compensation_triggered, True if active sagas were compensated ``` Kill reasons: -- `behavioral_drift` — Agent behavior diverges from expected patterns -- `rate_limit` — Agent exceeded rate limit thresholds -- `ring_breach` — Agent attempted unauthorized ring access -- `manual` — Operator-initiated termination -- `quarantine_timeout` — Quarantine period expired without resolution -- `session_timeout` — Session max duration exceeded +- `behavioral_drift`, agent behavior diverges from expected patterns +- `rate_limit`, agent exceeded rate limit thresholds +- `ring_breach`, agent attempted unauthorized ring access +- `manual`, operator-initiated termination +- `quarantine_timeout`, quarantine period expired without resolution +- `session_timeout`, session max duration exceeded ### Rate Limiting @@ -287,7 +223,7 @@ Per-ring token bucket rate limiting is applied automatically: ```python from hypervisor import AgentRateLimiter -from hypervisor.rings import ExecutionRing +from hypervisor.models import ExecutionRing limiter = AgentRateLimiter() @@ -298,7 +234,6 @@ limiter = AgentRateLimiter() # Ring 3 (Sandbox): 5.0 rate, 10.0 capacity # Custom rate limits per ring -from hypervisor.security.rate_limiter import DEFAULT_RING_LIMITS custom_limits = { ExecutionRing.RING_0_ROOT: (200.0, 400.0), ExecutionRing.RING_1_PRIVILEGED: (100.0, 200.0), @@ -319,62 +254,13 @@ detector = RingBreachDetector() # Breach events include: # severity: NONE | LOW | MEDIUM | HIGH | CRITICAL -# anomaly_score: float — how far the behavior deviates -# actual_rate vs expected_rate — call frequency anomaly -# call_count_window — calls in the detection window +# anomaly_score: float, how far the behavior deviates +# actual_rate vs expected_rate, call frequency anomaly +# call_count_window, calls in the detection window # Breach detection triggers automatic demotion or kill switch ``` -### YAML Configuration - -You can define sagas and load them from YAML files: - -```yaml -# saga-deploy.yaml -name: deploy-pipeline -session_id: ss-a1b2c3d4 -steps: - - id: provision - action_id: provision-vm - agent: "did:mesh:agent-1" - execute_api: /infra/provision - undo_api: /infra/deprovision - timeout: 120 - retries: 2 - - - id: deploy - action_id: deploy-app - agent: "did:mesh:agent-2" - execute_api: /app/deploy - undo_api: /app/undeploy - timeout: 60 - retries: 1 - -fan_outs: - - policy: all_must_succeed - branch_step_ids: - - provision - - deploy - -metadata: - environment: production - owner: platform-team -``` - -```python -import yaml -from hypervisor import SagaDSLParser - -with open("saga-deploy.yaml") as f: - definition = yaml.safe_load(f) - -parser = SagaDSLParser() -errors = parser.validate(definition) -if not errors: - saga_def = parser.parse(definition) -``` - ### Docker Compose For production deployments with Redis-backed state: @@ -403,57 +289,49 @@ services: | Parameter | Type | Default | Description | |-----------|------|---------|-------------| | **Hypervisor** | | | | -| `max_exposure` | `float` | `None` | Maximum total liability per voucher | -| `retention_policy` | `RetentionPolicy` | `None` | Ephemeral GC rules for audit data | | `nexus` | adapter | `None` | External trust scoring backend | | `policy_check` | adapter | `None` | Behavioral verification adapter | | `iatp` | adapter | `None` | Capability manifest parser | | **SessionConfig** | | | | | `consistency_mode` | `ConsistencyMode` | `EVENTUAL` | `STRONG` (consensus) or `EVENTUAL` (gossip) | -| `max_participants` | `int` | `10` | Max agents per session (1–1,000) | -| `max_duration_seconds` | `int` | `3600` | Session timeout (1–604,800) | -| `min_eff_score` | `float` | `0.60` | Minimum trust score to join (0.0–1.0) | +| `max_participants` | `int` | `10` | Max agents per session (1-1,000) | +| `max_duration_seconds` | `int` | `3600` | Session timeout (1-604,800) | +| `min_eff_score` | `float` | `0.60` | Minimum trust score to join (0.0-1.0) | | `enable_audit` | `bool` | `True` | Enable hash-chained audit trail | -| `enable_blockchain_commitment` | `bool` | `False` | Reserved. Blockchain anchoring is not implemented, so this has no effect and the commitment is stored in memory only. | | **Execution Rings** | | | | -| `RING_0_ROOT` | `int` | `0` | Hypervisor config & penalty (SRE Witness required) | +| `RING_0_ROOT` | `int` | `0` | Hypervisor config and penalty (SRE Witness required) | | `RING_1_PRIVILEGED` | `int` | `1` | Non-reversible actions (eff_score > 0.95 + consensus) | | `RING_2_STANDARD` | `int` | `2` | Reversible actions (eff_score > 0.60) | | `RING_3_SANDBOX` | `int` | `3` | Read-only / research (default) | | **Ring Elevation** | | | | | `ttl_seconds` | `int` | `300` | Elevation duration (max 3,600s) | | `reason` | `str` | `""` | Justification for elevation | -| `attestation` | `str` | `None` | Signed proof from authorizer | +| `attestation` | `str` | `None` | Signed proof, required for Ring 1 | | **Saga Steps** | | | | -| `timeout` | `int` | `300` | Step timeout in seconds | -| `retries` | `int` | `0` | Max retry attempts | -| `execute_api` | `str` | — | Endpoint for step execution | +| `timeout_seconds` | `int` | `300` | Step timeout in seconds | +| `max_retries` | `int` | `0` | Max retry attempts | +| `execute_api` | `str` | required | Endpoint for step execution | | `undo_api` | `str` | `None` | Endpoint for compensation | -| `checkpoint_goal` | `str` | `None` | Checkpoint description for replay | -| **Fan-Out Policy** | | | | -| `ALL_MUST_SUCCEED` | — | ✓ | All branches must complete | -| `MAJORITY_MUST_SUCCEED` | — | — | >50% of branches must complete | -| `ANY_MUST_SUCCEED` | — | — | At least one branch must complete | | **Rate Limits** (tokens/sec, burst) | | | | | Ring 0 (Root) | `(float, float)` | `(100.0, 200.0)` | Highest throughput for admin ops | | Ring 1 (Privileged) | `(float, float)` | `(50.0, 100.0)` | High throughput for trusted agents | | Ring 2 (Standard) | `(float, float)` | `(20.0, 40.0)` | Moderate throughput | | Ring 3 (Sandbox) | `(float, float)` | `(5.0, 10.0)` | Restricted throughput | | **Kill Switch** | | | | -| `reason` | `KillReason` | — | `behavioral_drift`, `rate_limit`, `ring_breach`, `manual`, `quarantine_timeout`, `session_timeout` | +| `reason` | `KillReason` | required | `behavioral_drift`, `rate_limit`, `ring_breach`, `manual`, `quarantine_timeout`, `session_timeout` | | **Breach Detection** | | | | -| `severity` | `BreachSeverity` | — | `NONE`, `LOW`, `MEDIUM`, `HIGH`, `CRITICAL` | +| `severity` | `BreachSeverity` | | `NONE`, `LOW`, `MEDIUM`, `HIGH`, `CRITICAL` | -## Architecture Diagrams +## Architecture ### Execution Ring Hierarchy ```mermaid graph TD - R0["🔴 Ring 0 — Root
Hypervisor config & penalty
Requires SRE Witness"] - R1["🟠 Ring 1 — Privileged
Non-reversible actions
eff_score > 0.95 + consensus"] - R2["🟡 Ring 2 — Standard
Reversible actions
eff_score > 0.60"] - R3["🟢 Ring 3 — Sandbox
Read-only / research
Default for unknown agents"] + R0["🔴 Ring 0, Root
Hypervisor config and penalty
Requires SRE Witness"] + R1["🟠 Ring 1, Privileged
Non-reversible actions
eff_score > 0.95 + consensus"] + R2["🟡 Ring 2, Standard
Reversible actions
eff_score > 0.60"] + R3["🟢 Ring 3, Sandbox
Read-only / research
Default for unknown agents"] R0 -->|"supervises"| R1 R1 -->|"supervises"| R2 @@ -491,51 +369,7 @@ flowchart LR Success -- No --> Compensate["Compensate\n(reverse order)"] Compensate --> CompOk{"Compensation\nsucceeds?"} CompOk -- Yes --> Rolled["↩️ Saga Rolled Back"] - CompOk -- No --> Escalate["⚠️ Escalate\nLiability Penalty"] -``` - -### Joint Liability Vouch Chain - -```mermaid -flowchart TD - Sponsor["🛡️ Sponsor Agent
eff_score: 0.92
Bonds reputation"] - Sponsored["🤖 Sponsored Agent
eff_score: 0.45
Gains Ring 2 access"] - Action["Agent performs action"] - Check{"Intent\nviolation?"} - Safe["✅ No penalty"] - Penalty["🔻 Both penalized
Sponsor collateral slashed
Sponsored demoted"] - - Sponsor -->|"vouches for"| Sponsored - Sponsored --> Action - Action --> Check - Check -- No --> Safe - Check -- Yes --> Penalty - Penalty -->|"collateral slash"| Sponsor - Penalty -->|"demotion + quarantine"| Sponsored -``` - -### Slash Cascade Propagation - -```mermaid -flowchart TD - Violation["🚨 Violation Detected"] - Attr["Fault Attribution
Identify responsible agent"] - Primary["Primary Agent
Full penalty applied"] - Sponsor1["Sponsor A
Collateral slashed"] - Sponsor2["Sponsor B
Collateral slashed"] - Quarantine["Quarantine Agent
Before termination"] - Demote["Demote to Ring 3"] - Ledger["Record in
Liability Ledger"] - - Violation --> Attr - Attr --> Primary - Primary --> Sponsor1 - Primary --> Sponsor2 - Primary --> Quarantine - Quarantine --> Demote - Sponsor1 --> Ledger - Sponsor2 --> Ledger - Primary --> Ledger + CompOk -- No --> Escalate["⚠️ Saga Failed\n(compensation error)"] ``` ## Key Features @@ -545,7 +379,7 @@ flowchart TD ### 🔐 Execution Rings -Hardware-inspired privilege model (Ring 0–3). Agents earn ring access based on trust score. Real-time demotion on trust drops. Sudo elevation with TTL. Breach detection with circuit breakers. +Hardware-inspired privilege model (Ring 0-3). Agents earn ring access based on trust score. Real-time demotion on trust drops. Sudo elevation with TTL. Breach detection with circuit breakers. @@ -559,27 +393,27 @@ Graceful termination with saga step handoff to substitute agents. Rate limiting ### 🔄 Saga Compensation -Multi-step transactions with timeout enforcement, retry with backoff, reverse-order compensation, and escalation to liability. Parallel execution with ALL/MAJORITY/ANY policies. +Multi-step transactions with timeout enforcement, retry with backoff, and reverse-order compensation of committed steps on failure. -### 🤝 Joint Liability -High-trust agents sponsor low-trust agents by bonding reputation. If the sponsored agent violates intent, **both are penalized**. Fault attribution, quarantine-before-terminate, persistent ledger. +### 📋 Hash-Chained Audit +Forensic-grade delta trails. Semantic diffs, hash-chained entries, and a summary commitment (root hash) returned at session end. -### 📋 Hash-Chained Audit -Forensic-grade delta trails — semantic diffs, hash-chained entries, summary commitment at session end. Garbage collection preserves forensic artifacts. +### 📡 Observability +Structured event bus emits typed events for every action. Causal trace IDs with full delegation-tree encoding. Version counters for causal consistency. **Prometheus metrics collector** for ring transitions and breaches. **OpenTelemetry span exporter** for saga-to-span mapping with distributed trace context. -### 📡 Observability -Structured event bus emits typed events for every action. Causal trace IDs with full delegation tree encoding. Version counters for causal consistency. **Prometheus metrics collector** for ring transitions and breaches. **OpenTelemetry span exporter** for saga-to-span mapping with distributed trace context. +### 🧩 Session Isolation +Shared Session Object with a per-session virtual file system, snapshots, and vector-clock causal ordering. DID-bound identity keeps rogue agents from corrupting other sessions. @@ -588,72 +422,54 @@ Structured event bus emits typed events for every action. Causal trace IDs with
📖 Feature details (click to expand) -### 🔐 Execution Rings — Deep Dive +### 🔐 Execution Rings, Deep Dive ``` -Ring 0 (Root) — Hypervisor config & penalty — requires SRE Witness -Ring 1 (Privileged) — Non-reversible actions — requires eff_score > 0.95 + consensus -Ring 2 (Standard) — Reversible actions — requires eff_score > 0.60 -Ring 3 (Sandbox) — Read-only / research — default for unknown agents +Ring 0 (Root) Hypervisor config and penalty, requires SRE Witness +Ring 1 (Privileged) Non-reversible actions, requires eff_score > 0.95 + consensus +Ring 2 (Standard) Reversible actions, requires eff_score > 0.60 +Ring 3 (Sandbox) Read-only / research, default for unknown agents ``` -**v2.0 additions:** Dynamic ring elevation (sudo with TTL), ring breach detection with circuit breakers, ring inheritance for spawned agents, **behavioral anomaly detection** with sliding-window rate analysis and ring-distance amplification. +**Ring controls:** Dynamic ring elevation (sudo with TTL), ring breach detection with circuit breakers, ring inheritance for spawned agents, and behavioral anomaly detection with sliding-window rate analysis and ring-distance amplification. -**v2.1 additions:** Command denylist enforcement — `RingEnforcer.check_command()` validates subprocess commands against a global `DENIED_COMMANDS` list with case-insensitive matching and shell metacharacter stripping to prevent injection bypasses (curl, wget, shells, compilers, network tools, alternative interpreters). +**Command denylist enforcement:** `RingEnforcer.check_command()` validates subprocess commands against a global `DENIED_COMMANDS` list with case-insensitive matching and shell metacharacter stripping to prevent injection bypasses (curl, wget, shells, compilers, network tools, alternative interpreters). -### 🔄 Saga Orchestrator — Deep Dive +### 🔄 Saga Orchestrator, Deep Dive -- **Timeout enforcement** — steps that hang are automatically cancelled -- **Retry with backoff** — transient failures retry with exponential delay -- **Reverse-order compensation** — on failure, all committed steps are undone -- **Escalation** — if compensation fails, Joint Liability penalty is triggered -- **Parallel execution** — ALL_MUST_SUCCEED / MAJORITY / ANY policies -- **Execution checkpoints** — partial replay without re-running completed effects -- **Declarative DSL** — define sagas via YAML or dict +- **Timeout enforcement**, steps that hang are automatically cancelled +- **Retry with backoff**, transient failures retry with exponential delay +- **Reverse-order compensation**, on failure, all committed steps are undone ### 🔒 Session Consistency -- **Version counters** — causal consistency for shared VFS state -- **Resource locks** — READ/WRITE/EXCLUSIVE with lock timeout -- **Isolation levels** — SNAPSHOT, READ_COMMITTED, SERIALIZABLE per saga +- **Version counters**, causal consistency for shared VFS state +- **Resource locks**, READ/WRITE/EXCLUSIVE with lock timeout +- **Isolation levels**, SNAPSHOT, READ_COMMITTED, SERIALIZABLE per saga
-## Performance - -| Operation | Mean Latency | Throughput | -|-----------|-------------|------------| -| Ring computation | **0.3μs** | 3.75M ops/s | -| Delta audit capture | **27μs** | 26K ops/s | -| Session lifecycle | **54μs** | 15.7K ops/s | -| 3-step saga | **151μs** | 5.3K ops/s | -| **Full governance pipeline** | **268μs** | **2,983 ops/s** | +## Benchmarks -> Full pipeline = session create + agent join + 3 audit deltas + saga step + terminate with audit log root - -## Installation +Microbenchmarks for ring computation, delta-audit capture, session lifecycle, and saga execution live in the [`benchmarks/`](benchmarks/) directory. ```bash -pip install agent-hypervisor +python benchmarks/bench_hypervisor.py ``` ## Modules -| Module | Description | Tests | -|--------|-------------|-------| -| `hypervisor.session` | Shared Session Object lifecycle + VFS | 52 | -| `hypervisor.rings` | 4-ring privilege + elevation + breach detection | 34 | -| `hypervisor.liability` | Sponsorship, penalty, attribution, quarantine, ledger | 39 | -| `hypervisor.reversibility` | Execute/Undo API registry | 4 | -| `hypervisor.saga` | Saga orchestrator + fan-out + checkpoints + DSL | 41 | -| `hypervisor.audit` | Delta engine, audit log, GC, commitment | 10 | -| `hypervisor.verification` | DID transaction history verification | 4 | -| `hypervisor.observability` | Event bus, causal trace IDs | 22 | -| `hypervisor.security` | Rate limiter, kill switch | 16 | -| `hypervisor.integrations` | Nexus, Verification, IATP cross-module adapters | -- | -| **Integration** | End-to-end lifecycle, edge cases, security | **24** | -| **Scenarios** | Cross-module governance pipelines (7 suites) | **18** | -| **Total** | | **644** | +| Module | Description | +|--------|-------------| +| `hypervisor.session` | Shared Session Object lifecycle and VFS | +| `hypervisor.rings` | 4-ring privilege, elevation, and breach detection | +| `hypervisor.reversibility` | Execute/Undo API registry | +| `hypervisor.saga` | Saga orchestrator and compensation | +| `hypervisor.audit` | Delta engine and hash-chained audit trail | +| `hypervisor.verification` | DID transaction history verification | +| `hypervisor.observability` | Event bus, causal trace IDs, metrics | +| `hypervisor.security` | Rate limiter and kill switch | +| `hypervisor.integrations` | Nexus, Verification, IATP cross-module adapters | ## Test Suite @@ -672,21 +488,29 @@ python benchmarks/bench_hypervisor.py The Hypervisor supports optional integration with external trust scoring, behavioral verification, and capability manifest systems via adapters in `hypervisor.integrations`. See the adapter modules for usage examples. -### REST API +## REST API -Full FastAPI REST API with 22 endpoints and interactive Swagger docs: +Run the FastAPI server and open the interactive Swagger docs: ```bash -pip install agent-hypervisor[api] uvicorn hypervisor.api.server:app # Open http://localhost:8000/docs for Swagger UI ``` -Endpoints: Sessions, Rings, Sagas, Liability, Events, Health. +Implemented endpoint groups: + +| Group | Endpoints | +|-------|-----------| +| Health | `GET /health`, `GET /api/v1/stats` | +| Sessions | create, list, inspect, join, activate, terminate | +| Rings | session distribution, agent ring lookup, access check | +| Sagas | create, list, inspect, add step, execute step | +| Events | query events and event statistics | +| Verification | verify history and clear verification cache | -### Visualization Dashboard +## Visualization Dashboard -Interactive Streamlit dashboard with 5 tabs: +Interactive Streamlit dashboard: ```bash cd examples/dashboard @@ -694,11 +518,11 @@ pip install -r requirements.txt streamlit run app.py ``` -Tabs: Session Overview | Execution Rings | Saga Orchestration | Liability & Trust | Event Stream +Tabs: Session Overview | Execution Rings | Saga Orchestration | Event Stream ## Ecosystem -Agent Hypervisor is part of the **Agent Governance Ecosystem** — four specialized repos that work together: +Agent Hypervisor is part of the **Agent Governance Ecosystem**, specialized components that work together: ```mermaid graph TB @@ -717,21 +541,21 @@ graph TB style HV fill:#ff6b6b,stroke:#333,color:#fff ``` -| Repo | Role | Stars | -|------|------|-------| -| [Agent OS](https://github.com/microsoft/agent-governance-toolkit) | Policy enforcement kernel | 1,500+ tests | -| [Agent Mesh](https://github.com/microsoft/agent-governance-toolkit) | Cryptographic trust network | 1,400+ tests | -| [Agent SRE](https://github.com/microsoft/agent-governance-toolkit) | SLO, chaos, cost guardrails | 1,070+ tests | -| **Agent Hypervisor** | Session isolation & governance runtime | 644+ tests | +| Component | Role | +|------|------| +| [Agent OS](https://github.com/microsoft/agent-governance-toolkit) | Policy enforcement kernel | +| [Agent Mesh](https://github.com/microsoft/agent-governance-toolkit) | Cryptographic trust network | +| [Agent SRE](https://github.com/microsoft/agent-governance-toolkit) | SLO, chaos, and cost guardrails | +| **Agent Hypervisor** | Session isolation and governance runtime | -## 🗺️ Roadmap +## Roadmap | Quarter | Milestone | |---------|-----------| -| **Q1 2026** | ✅ v2.0 — Execution rings, saga orchestration, joint liability, shared sessions | +| **Q1 2026** | v2.0 with execution rings, saga orchestration, and shared sessions | | **Q2 2026** | Distributed hypervisor (multi-node), WebSocket real-time dashboard, Redis-backed sessions | | **Q3 2026** | Kubernetes operator for auto-scaling ring policies, CNCF Sandbox application | -| **Q4 2026** | v3.0 — Federated hypervisor mesh, cross-org agent governance, SOC2 attestation | +| **Q4 2026** | v3.0 with federated hypervisor mesh, cross-org agent governance, and SOC2 attestation | --- @@ -741,17 +565,14 @@ graph TB Just as OS hypervisors isolate virtual machines and enforce resource boundaries, an agent hypervisor isolates AI agent sessions and enforces governance boundaries. Without isolation, a misbehaving agent in a shared session can corrupt state, escalate privileges, or cascade failures across the entire system. **How do Execution Rings differ from traditional access control?** -Traditional access control is static and binary (allowed/denied). Execution Rings are dynamic and graduated -- agents earn ring privileges based on their trust score, can request temporary elevation with TTL (like `sudo`), and are automatically demoted when trust drops. Ring breach detection catches anomalous behavior before damage occurs. +Traditional access control is static and binary (allowed/denied). Execution Rings are dynamic and graduated. Agents earn ring privileges based on their trust score, can request temporary elevation with TTL (like `sudo`), and are automatically demoted when trust drops. Ring breach detection catches anomalous behavior before damage occurs. **What happens when a multi-agent saga fails?** -The Saga Orchestrator triggers reverse-order compensation for all committed steps. For parallel execution sagas, the failure policy determines the response: ALL_MUST_SUCCEED compensates if any branch fails, MAJORITY allows minority failures, and ANY succeeds if at least one branch completes. Execution checkpoints enable partial replay without re-running completed effects. - -**How does fault attribution work?** -When a saga fails, the hypervisor identifies the agent responsible for the failure and triggers appropriate liability consequences. +The Saga Orchestrator triggers reverse-order compensation for all committed steps. Each step defines an `undo_api` compensation endpoint, and steps that time out are cancelled and retried up to `max_retries` before compensation runs. ## Contributing -We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details. +We welcome contributions! Please see our [Contributing Guide](../../CONTRIBUTING.md) for details. - :bug: [Report a Bug](https://github.com/microsoft/agent-governance-toolkit/issues/new?labels=bug) - :bulb: [Request a Feature](https://github.com/microsoft/agent-governance-toolkit/issues/new?labels=enhancement) @@ -760,7 +581,7 @@ We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) f ## License -MIT -- see [LICENSE](LICENSE). +MIT, see [LICENSE](../../LICENSE). --- @@ -770,6 +591,4 @@ MIT -- see [LICENSE](LICENSE). *Built with :heart: for the AI agent governance community* -If Agent Hypervisor helps your work, please consider giving it a :star: - diff --git a/agent-governance-python/agent-hypervisor/benchmarks/bench_hypervisor.py b/agent-governance-python/agent-hypervisor/benchmarks/bench_hypervisor.py index 54e953a60..d91ca493f 100644 --- a/agent-governance-python/agent-hypervisor/benchmarks/bench_hypervisor.py +++ b/agent-governance-python/agent-hypervisor/benchmarks/bench_hypervisor.py @@ -6,7 +6,7 @@ Measures latency and throughput of all hypervisor subsystems: - Session creation and lifecycle - Ring computation and enforcement -- Sponsorship and eff_score calculation +- Action classification and ring enforcement - Saga step execution - Delta audit capture and audit log root computation - End-to-end governance pipeline @@ -30,7 +30,8 @@ SessionConfig, ) from hypervisor.audit.delta import DeltaEngine, VFSChange -from hypervisor.liability.vouching import VouchingEngine +from hypervisor.models import ActionDescriptor, ReversibilityLevel +from hypervisor.rings.classifier import ActionClassifier from hypervisor.rings.enforcer import RingEnforcer from hypervisor.saga.orchestrator import SagaOrchestrator @@ -132,19 +133,22 @@ def bench_ring_computation(): # --------------------------------------------------------------------------- -# Benchmark: Sponsorship + eff_score +# Benchmark: Action Classification # --------------------------------------------------------------------------- -ve = VouchingEngine() -_vouch_counter = [0] +classifier = ActionClassifier() +classification_action = ActionDescriptor( + action_id="bench.action", + name="Benchmark Action", + execute_api="/bench/action", + undo_api="/bench/undo", + reversibility=ReversibilityLevel.PARTIAL, +) -@benchmark("sponsorship_eff_score", iterations=10000) -def bench_eff_score(): - _vouch_counter[0] += 1 - sid = f"bench-{_vouch_counter[0]}" - ve.vouch(f"did:v:{_vouch_counter[0]}", f"did:e:{_vouch_counter[0]}", sid, 0.9, bond_pct=0.2) - ve.compute_eff_score(f"did:e:{_vouch_counter[0]}", sid, 0.4, risk_weight=0.5) +@benchmark("action_classification", iterations=50000) +def bench_action_classification(): + classifier.classify(classification_action) # --------------------------------------------------------------------------- @@ -307,7 +311,7 @@ def main(): benchmarks = [ bench_ring_computation, - bench_eff_score, + bench_action_classification, bench_delta_capture, bench_hash_chain_root_10, bench_hash_chain_root_100, diff --git a/agent-governance-python/agent-hypervisor/benchmarks/results/BENCHMARKS.md b/agent-governance-python/agent-hypervisor/benchmarks/results/BENCHMARKS.md index 18c2f01b0..b4540b5cb 100644 --- a/agent-governance-python/agent-hypervisor/benchmarks/results/BENCHMARKS.md +++ b/agent-governance-python/agent-hypervisor/benchmarks/results/BENCHMARKS.md @@ -7,7 +7,6 @@ | Operation | Mean | P50 | P95 | P99 | Throughput | |-----------|------|-----|-----|-----|------------| | ring_computation | 0.3μs | 0.2μs | 0.3μs | 0.4μs | 3,750,319/s | -| sponsorship_eff_score | 1448.9μs | 666.2μs | 3968.9μs | 5195.1μs | 690/s | | delta_capture | 37.4μs | 27.3μs | 79.3μs | 192.7μs | 26,719/s | | hash_chain_root_10_deltas | 416.5μs | 352.9μs | 830.6μs | 1352.9μs | 2,401/s | | hash_chain_root_100_deltas | 3811.7μs | 3381.4μs | 7879.2μs | 11262.3μs | 262/s | @@ -19,7 +18,6 @@ ## Key Takeaways - **Ring computation**: Sub-microsecond — zero overhead for privilege checks -- **Sponsorship + eff_score**: Single-digit microseconds — real-time trust scoring - **Delta audit**: Microsecond-level — forensic logging adds negligible latency - **Audit log verification**: Scales linearly with delta count, remains sub-millisecond - **Full pipeline**: Session + audit + saga + terminate in < 1ms diff --git a/agent-governance-python/agent-hypervisor/benchmarks/results/benchmarks.json b/agent-governance-python/agent-hypervisor/benchmarks/results/benchmarks.json index 513c937dd..b8af60197 100644 --- a/agent-governance-python/agent-hypervisor/benchmarks/results/benchmarks.json +++ b/agent-governance-python/agent-hypervisor/benchmarks/results/benchmarks.json @@ -11,17 +11,6 @@ "max_us": 569.4, "ops_per_sec": 3750319 }, - { - "name": "vouching_sigma_eff", - "iterations": 10000, - "mean_us": 1448.91, - "median_us": 666.2, - "p95_us": 3968.9, - "p99_us": 5195.1, - "min_us": 16.4, - "max_us": 18213.2, - "ops_per_sec": 690 - }, { "name": "delta_capture", "iterations": 50000, @@ -100,4 +89,4 @@ "ops_per_sec": 2983 } ] -} \ No newline at end of file +} diff --git a/agent-governance-python/agent-hypervisor/docs/api-reference.md b/agent-governance-python/agent-hypervisor/docs/api-reference.md index 0201dd371..437691615 100644 --- a/agent-governance-python/agent-hypervisor/docs/api-reference.md +++ b/agent-governance-python/agent-hypervisor/docs/api-reference.md @@ -1,4 +1,4 @@ -# Agent Hypervisor — API Reference +# Agent Hypervisor API Reference > Complete reference for the REST API and Python SDK. > Run the server with `uvicorn hypervisor.api.server:app`. @@ -14,18 +14,15 @@ - [Sessions](#sessions) - [Rings](#rings) - [Sagas](#sagas) - - [Liability](#liability) - [Events](#events) - - [Audit](#audit) - [Verification](#verification) - [Python SDK](#python-sdk) - - [Agent Lifecycle](#agent-lifecycle) — Hypervisor, ExecutionRing, AgentConfig - - [Saga Engine](#saga-engine) — SagaOrchestrator, SagaDSLParser, CheckpointManager - - [Kill Switch](#kill-switch) — KillSwitch, BreachDetector - - [Rate Limiter](#rate-limiter) — AgentRateLimiter, TokenBucket - - [Audit & Observability](#audit--observability) — HypervisorEventBus, CausalTraceId - - [Joint Liability](#joint-liability) — VouchingEngine, SlashingEngine, LiabilityLedger - - [Classification](#classification) — ActionClassifier, RingEnforcer + - [Agent Lifecycle](#agent-lifecycle) Hypervisor, ExecutionRing, AgentConfig + - [Saga Engine](#saga-engine) SagaOrchestrator + - [Kill Switch](#kill-switch) KillSwitch, BreachDetector + - [Rate Limiter](#rate-limiter) AgentRateLimiter, TokenBucket + - [Audit & Observability](#audit--observability) HypervisorEventBus, CausalTraceId + - [Classification](#classification) ActionClassifier, RingEnforcer --- @@ -69,7 +66,6 @@ curl http://localhost:8000/api/v1/stats "active_sessions": 1, "total_participants": 7, "active_sagas": 2, - "total_vouches": 4, "event_count": 42 } ``` @@ -91,8 +87,7 @@ curl -X POST http://localhost:8000/api/v1/sessions \ "max_participants": 5, "max_duration_seconds": 3600, "min_eff_score": 0.60, - "enable_audit": true, - "enable_blockchain_commitment": false + "enable_audit": true }' ``` @@ -106,7 +101,6 @@ curl -X POST http://localhost:8000/api/v1/sessions \ | `max_duration_seconds` | int | `3600` | Session timeout in seconds | | `min_eff_score` | float | `0.60` | Minimum effective reputation score | | `enable_audit` | bool | `true` | Enable hash-chained audit trail | -| `enable_blockchain_commitment` | bool | `false` | Reserved. Blockchain anchoring is not implemented, so this has no effect (the commitment is stored in memory only). | **Response** `201 Created` @@ -203,7 +197,7 @@ curl http://localhost:8000/api/v1/sessions/ss-a1b2c3d4 #### `POST /api/v1/sessions/{session_id}/join` Join an agent to a session. The agent is assigned an Execution Ring based on its -trust score (`sigma_raw`) and any sponsorship bonds. +trust score (`sigma_raw`). ```bash curl -X POST http://localhost:8000/api/v1/sessions/ss-a1b2c3d4/join \ @@ -262,7 +256,7 @@ curl -X POST http://localhost:8000/api/v1/sessions/ss-a1b2c3d4/activate #### `POST /api/v1/sessions/{session_id}/terminate` -Terminate a session, commit the audit trail, and release bonds. +Terminate a session and commit the audit trail. ```bash curl -X POST http://localhost:8000/api/v1/sessions/ss-a1b2c3d4/terminate @@ -535,107 +529,6 @@ curl -X POST http://localhost:8000/api/v1/sagas/saga-e5f6a7b8/steps/step-001/exe --- -### Liability - -#### `POST /api/v1/sessions/{session_id}/sponsor` - -Create a sponsorship bond between two agents. The voucher stakes a percentage -of their reputation to back the vouchee. - -```bash -curl -X POST http://localhost:8000/api/v1/sessions/ss-a1b2c3d4/sponsor \ - -H "Content-Type: application/json" \ - -d '{ - "voucher_did": "did:example:alice", - "vouchee_did": "did:example:bob", - "voucher_sigma": 0.92, - "bond_pct": 0.10 - }' -``` - -**Request Body** - -| Field | Type | Default | Description | -|-------|------|---------|-------------| -| `voucher_did` | string | *required* | DID of the sponsoring agent | -| `vouchee_did` | string | *required* | DID of the agent being sponsored | -| `voucher_sigma` | float | *required* | Sponsor's raw reputation score | -| `bond_pct` | float | `null` | Fraction of reputation to bond (0–1) | -| `expiry` | string | `null` | ISO 8601 expiry timestamp | - -**Response** `201 Created` - -```json -{ - "vouch_id": "vouch-x9y8z7", - "voucher_did": "did:example:alice", - "vouchee_did": "did:example:bob", - "session_id": "ss-a1b2c3d4", - "bonded_amount": 0.092, - "bonded_sigma_pct": 0.10, - "is_active": true -} -``` - ---- - -#### `GET /api/v1/sessions/{session_id}/sponsors` - -List all sponsorship bonds in a session. - -```bash -curl http://localhost:8000/api/v1/sessions/ss-a1b2c3d4/sponsors -``` - -**Response** `200 OK` - -```json -[ - { - "vouch_id": "vouch-x9y8z7", - "voucher_did": "did:example:alice", - "vouchee_did": "did:example:bob", - "session_id": "ss-a1b2c3d4", - "bonded_amount": 0.092, - "bonded_sigma_pct": 0.10, - "is_active": true - } -] -``` - ---- - -#### `GET /api/v1/agents/{agent_did}/liability` - -Get an agent's total liability exposure across all sessions. - -```bash -curl http://localhost:8000/api/v1/agents/did:example:alice/liability -``` - -**Response** `200 OK` - -```json -{ - "agent_did": "did:example:alice", - "vouches_given": [ - { - "vouch_id": "vouch-x9y8z7", - "voucher_did": "did:example:alice", - "vouchee_did": "did:example:bob", - "session_id": "ss-a1b2c3d4", - "bonded_amount": 0.092, - "bonded_sigma_pct": 0.10, - "is_active": true - } - ], - "vouches_received": [], - "total_exposure": 0.092 -} -``` - ---- - ### Events #### `GET /api/v1/events` @@ -681,18 +574,15 @@ curl "http://localhost:8000/api/v1/events?event_type=session.created&limit=10" | `session.terminated` | Session terminated | | `ring.assigned` | Ring assigned to agent | | `ring.demoted` | Agent demoted to lower ring | -| `ring.breach` | Ring breach detected | -| `liability.vouch_created` | Sponsorship bond created | -| `liability.slash` | Reputation slashed | -| `liability.quarantine` | Agent quarantined | +| `ring.breach_detected` | Ring breach detected | | `saga.created` | Saga created | | `saga.step_committed` | Saga step committed | | `saga.step_failed` | Saga step failed | | `saga.compensating` | Saga compensation started | | `saga.completed` | Saga completed | -| `security.kill` | Agent killed via kill switch | +| `security.agent_killed` | Agent killed via kill switch | | `security.rate_limited` | Agent rate-limited | -| `audit.commitment` | Audit trail committed | +| `audit.committed` | Audit trail committed | --- @@ -713,93 +603,13 @@ curl http://localhost:8000/api/v1/events/stats "session.created": 3, "session.joined": 7, "ring.assigned": 7, - "saga.step_committed": 12, - "liability.vouch_created": 4 + "saga.step_committed": 12 } } ``` --- -### Audit - -#### `GET /api/v1/audit/commitments` - -List all session audit-trail commitments. - -```bash -curl http://localhost:8000/api/v1/audit/commitments -``` - -**Response** `200 OK` - -```json -[ - { - "session_id": "ss-a1b2c3d4", - "hash_chain_root": "sha256:9f86d081884c...", - "participant_dids": ["did:example:alice", "did:example:bob"], - "delta_count": 15, - "committed_at": "2025-01-15T11:00:00+00:00", - "committed_to": "local", - "blockchain_tx_id": null - } -] -``` - ---- - -#### `GET /api/v1/audit/commitments/{session_id}` - -Get the audit commitment for a specific session. - -```bash -curl http://localhost:8000/api/v1/audit/commitments/ss-a1b2c3d4 -``` - -**Response** `200 OK` - -```json -{ - "session_id": "ss-a1b2c3d4", - "hash_chain_root": "sha256:9f86d081884c...", - "participant_dids": ["did:example:alice", "did:example:bob"], - "delta_count": 15, - "committed_at": "2025-01-15T11:00:00+00:00", - "committed_to": "local", - "blockchain_tx_id": null -} -``` - ---- - -#### `POST /api/v1/audit/verify/{session_id}` - -Verify a session's audit-log root hash matches its commitment. - -```bash -curl -X POST "http://localhost:8000/api/v1/audit/verify/ss-a1b2c3d4?expected_root=sha256:9f86d081884c..." -``` - -**Query Parameters** - -| Parameter | Type | Description | -|-----------|------|-------------| -| `expected_root` | string | *required* — The expected hash-chain root to verify against | - -**Response** `200 OK` - -```json -{ - "session_id": "ss-a1b2c3d4", - "valid": true, - "committed_root": "sha256:9f86d081884c...", - "expected_root": "sha256:9f86d081884c..." -} -``` - ---- - ### Verification #### `POST /api/v1/verify/history` @@ -867,9 +677,7 @@ sub-systems. from hypervisor import Hypervisor from hypervisor.models import SessionConfig, ConsistencyMode -hv = Hypervisor( - max_exposure=5.0, # cap total liability per voucher -) +hv = Hypervisor() # Create a session config = SessionConfig( @@ -899,8 +707,6 @@ hash_root = await hv.terminate_session(session_id) | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `retention_policy` | `RetentionPolicy` | `None` | Delta retention settings | -| `max_exposure` | `float` | `None` | Maximum liability exposure per voucher | | `nexus` | `Any` | `None` | Nexus trust-scoring adapter | | `policy_check` | `Any` | `None` | External policy check hook | | `iatp` | `Any` | `None` | IATP manifest adapter | @@ -951,7 +757,6 @@ config = SessionConfig( max_duration_seconds=7200, min_eff_score=0.50, enable_audit=True, - enable_blockchain_commitment=False, ) ``` @@ -989,7 +794,7 @@ print(action.required_ring) # Minimum ring needed Manages multi-step transactions with automatic compensation on failure. ```python -from hypervisor.saga import SagaOrchestrator +from hypervisor import SagaOrchestrator orch = SagaOrchestrator() @@ -1040,123 +845,16 @@ compensated = await orch.compensate(saga.saga_id, compensator) --- -#### `SagaDSLParser` - -Define sagas declaratively with a JSON/dict DSL. - -```python -from hypervisor.saga import SagaDSLParser - -parser = SagaDSLParser() - -definition = { - "name": "deploy-pipeline", - "session_id": "ss-a1b2c3d4", - "steps": [ - { - "id": "provision", - "action_id": "provision-vm", - "agent": "did:example:alice", - "execute_api": "/infra/provision", - "undo_api": "/infra/deprovision", - "timeout": 120, - "retries": 2, - }, - { - "id": "deploy", - "action_id": "deploy-app", - "agent": "did:example:bob", - "execute_api": "/app/deploy", - "undo_api": "/app/undeploy", - }, - ], -} - -# Validate before parsing -errors = parser.validate(definition) -assert errors == [] - -# Parse into a SagaDefinition -saga_def = parser.parse(definition) -steps = parser.to_saga_steps(saga_def) -``` - ---- - -#### `CheckpointManager` - -Semantic checkpoints for saga progress tracking. - -```python -from hypervisor.saga import CheckpointManager - -mgr = CheckpointManager() - -# Save a checkpoint after a step succeeds -cp = mgr.save( - saga_id="saga-001", - step_id="step-001", - goal_description="VM provisioned successfully", - state_snapshot={"vm_id": "vm-123"}, -) - -# Get all checkpoints for a saga -checkpoints = mgr.get_saga_checkpoints("saga-001") - -# Build a replay plan (skip already-achieved goals) -replay = mgr.get_replay_plan("saga-001", steps=["step-001", "step-002"]) -``` - ---- - -#### `FanOutOrchestrator` - -Execute saga branches in parallel with configurable completion policies. - -```python -from hypervisor.saga import FanOutOrchestrator, FanOutPolicy - -fan = FanOutOrchestrator() - -# Create a fan-out group -group = fan.create_group( - saga_id="saga-001", - policy=FanOutPolicy.MAJORITY_MUST_SUCCEED, -) - -# Add parallel branches -fan.add_branch(group.group_id, step_a) -fan.add_branch(group.group_id, step_b) -fan.add_branch(group.group_id, step_c) - -# Execute all branches -result = await fan.execute( - group.group_id, - executors={"step-a": exec_a, "step-b": exec_b, "step-c": exec_c}, - timeout_seconds=300, -) -print(result.policy_satisfied) # True if majority succeeded -``` - -**Fan-Out Policies** - -| Policy | Description | -|--------|-------------| -| `ALL_MUST_SUCCEED` | Every branch must complete successfully | -| `MAJORITY_MUST_SUCCEED` | More than half must succeed | -| `ANY_MUST_SUCCEED` | At least one branch must succeed | - ---- - ### Kill Switch #### `KillSwitch` -Gracefully terminate an agent — hands off in-flight work to substitutes and +Gracefully terminate an agent. Hands off in-flight work to substitutes and triggers saga compensation. ```python -from hypervisor.security import KillSwitch, KillReason +from hypervisor import KillSwitch +from hypervisor.security.kill_switch import KillReason ks = KillSwitch() @@ -1354,134 +1052,6 @@ restored = CausalTraceId.from_string(s) --- -### Joint Liability - -#### `VouchingEngine` - -Manages sponsorship bonds where trusted agents vouch for newcomers by staking -a fraction of their reputation. - -```python -from hypervisor.liability import VouchingEngine - -vouching = VouchingEngine(max_exposure=5.0) - -# Create a sponsorship bond -record = vouching.vouch( - voucher_did="did:example:alice", - vouchee_did="did:example:bob", - session_id="ss-a1b2c3d4", - voucher_sigma=0.92, - bond_pct=0.10, -) -print(record.bonded_amount) # 0.092 (10% of 0.92) - -# Compute the vouchee's effective reputation -eff = vouching.compute_eff_score( - vouchee_did="did:example:bob", - session_id="ss-a1b2c3d4", - vouchee_sigma=0.40, - risk_weight=0.5, -) -print(eff) # Boosted score thanks to sponsorship - -# Check exposure -exposure = vouching.get_total_exposure("did:example:alice", "ss-a1b2c3d4") - -# Release bonds after clean session -released = vouching.release_session_bonds("ss-a1b2c3d4") -``` - ---- - -#### `SlashingEngine` - -Penalizes misbehaving agents and cascades penalties to their vouchers. - -```python -from hypervisor.liability import SlashingEngine - -slashing = SlashingEngine(vouching_engine=vouching) - -result = slashing.slash( - vouchee_did="did:example:bad-actor", - session_id="ss-a1b2c3d4", - vouchee_sigma=0.60, - risk_weight=0.8, - reason="Behavioral drift detected", - agent_scores={"did:example:bad-actor": 0.60}, - cascade_depth=1, -) - -print(result.vouchee_penalty) -for clip in result.voucher_clips: - print(f" Voucher {clip.voucher_did} lost {clip.clipped_amount}") -``` - ---- - -#### `LiabilityLedger` - -Append-only ledger tracking vouches, slashes, quarantines, and clean sessions -per agent. Used for admission control and risk profiling. - -```python -from hypervisor.liability import LiabilityLedger, LedgerEntryType - -ledger = LiabilityLedger() - -# Record events -ledger.record( - agent_did="did:example:bob", - entry_type=LedgerEntryType.SLASH_RECEIVED, - session_id="ss-a1b2c3d4", - severity=0.8, - details="Behavioral drift", -) - -# Compute risk profile -profile = ledger.compute_risk_profile("did:example:bob") -print(profile.total_slashes) -print(profile.risk_score) - -# Admission check -admitted, reason = ledger.should_admit("did:example:bob") -if not admitted: - print(f"Denied: {reason}") -``` - ---- - -#### `QuarantineManager` - -Isolates misbehaving agents with optional auto-release timers. - -```python -from hypervisor.liability import QuarantineManager, QuarantineReason - -qm = QuarantineManager() - -# Quarantine an agent -record = qm.quarantine( - agent_did="did:example:bad-actor", - session_id="ss-a1b2c3d4", - reason=QuarantineReason.BEHAVIORAL_DRIFT, - details="Embedding drift exceeded threshold", - duration_seconds=300, -) - -# Check status -print(qm.is_quarantined("did:example:bad-actor", "ss-a1b2c3d4")) # True - -# Tick to auto-release expired quarantines -expired = qm.tick() - -# Manual release -qm.release("did:example:bad-actor", "ss-a1b2c3d4") -``` - ---- - ### Classification #### `ActionClassifier` @@ -1489,7 +1059,7 @@ qm.release("did:example:bad-actor", "ss-a1b2c3d4") Classifies actions into ring levels and risk weights based on their properties. ```python -from hypervisor.rings import ActionClassifier +from hypervisor import ActionClassifier classifier = ActionClassifier() diff --git a/agent-governance-python/agent-hypervisor/docs/joint-liability-guide.md b/agent-governance-python/agent-hypervisor/docs/joint-liability-guide.md deleted file mode 100644 index 8b3a15331..000000000 --- a/agent-governance-python/agent-hypervisor/docs/joint-liability-guide.md +++ /dev/null @@ -1,393 +0,0 @@ -# Understanding Joint Liability for AI Agents - -> **Edition:** Public Preview APIs only. -> Module path: `src/hypervisor/liability/` - -## Table of Contents - -- [What Is Joint Liability?](#what-is-joint-liability) -- [Why AI Agents Need Joint Liability](#why-ai-agents-need-joint-liability) -- [Vouching: Staking Reputation for Another Agent](#vouching-staking-reputation-for-another-agent) -- [The Effective Score Formula](#the-effective-score-formula) -- [Slashing: What Happens When an Agent Misbehaves](#slashing-what-happens-when-an-agent-misbehaves) -- [Cascade Effects](#cascade-effects) -- [Real-World Analogy: Co-Signing a Loan](#real-world-analogy-co-signing-a-loan) -- [Code Examples](#code-examples) -- [Key Classes Reference](#key-classes-reference) - ---- - -## What Is Joint Liability? - -Joint liability is a mechanism where one agent (the **voucher**) stakes a portion -of its own reputation to sponsor another agent (the **vouchee**) into a shared -session. If the vouchee misbehaves, both agents face consequences — the vouchee -is penalized directly and the voucher's bonded reputation is clipped. - -This creates a web of accountability: agents don't operate in isolation but are -connected through a **liability graph** of sponsor → sponsored relationships. - -## Why AI Agents Need Joint Liability - -In a multi-agent system, agents from different providers collaborate inside -shared sessions. Without accountability: - -- A rogue agent can disrupt an entire session with no consequence. -- There is no incentive for agents to vet who they collaborate with. -- Trust decisions fall entirely on the hypervisor, creating a bottleneck. - -Joint liability solves this by making agents **co-responsible**. An agent that -vouches for a bad actor shares in the penalty, creating a decentralized trust -network where agents self-police. - -## Vouching: Staking Reputation for Another Agent - -Vouching is the act of a trusted agent sponsoring a less-established agent. -The voucher bonds a percentage of its reputation score (σ) as collateral. - -Key concepts: - -| Term | Description | -|------|-------------| -| **Voucher** | The sponsoring agent (`voucher_did`) | -| **Vouchee** | The sponsored agent (`vouchee_did`) | -| **Bond** | The amount of σ the voucher stakes (`bonded_amount`) | -| **Bond %** | Percentage of voucher's σ that is bonded (`bonded_sigma_pct`) | -| **Vouch ID** | Unique identifier for the sponsorship record | - -The `VouchingEngine` manages these relationships: - -```python -from hypervisor.liability.vouching import VouchingEngine - -engine = VouchingEngine() - -# Agent A vouches for Agent B in a session -record = engine.vouch( - voucher_did="did:mesh:agent-a", - vouchee_did="did:mesh:agent-b", - session_id="session-123", - voucher_sigma=0.85, # Voucher's current reputation score - bond_pct=0.20, # Bond 20% of reputation (optional) -) - -print(record.vouch_id) # "sponsor:" -print(record.is_active) # True -``` - -### Constraints - -The `VouchingEngine` enforces several safeguards (configurable via class constants): - -- **Minimum voucher score** (`MIN_VOUCHER_SCORE = 0.50`): Only agents with σ ≥ 0.50 may vouch. -- **Default bond percentage** (`DEFAULT_BOND_PCT = 0.20`): 20% of the voucher's σ is bonded by default. -- **Maximum exposure** (`DEFAULT_MAX_EXPOSURE = 0.80`): A voucher cannot bond more than 80% of its σ across all active vouches. - -> **Public Preview note:** The Public Preview approves all vouch requests -> and does not enforce bonding. The API surface is identical — constraints are -> enforced in the full edition. - -## The Effective Score Formula - -When an agent is vouched for, its **effective reputation score** (σ\_eff) -combines its own score with the voucher's backing: - -``` -σ_eff = σ_L + (ω × σ_H) -``` - -Where: - -| Symbol | Meaning | -|--------|---------| -| **σ\_L** | The vouchee's own reputation score (low-trust agent) | -| **ω** | Risk weight — how much of the voucher's bond translates to trust (0.0–1.0) | -| **σ\_H** | The voucher's bonded reputation amount (high-trust agent's stake) | - -This formula lets a new agent with low reputation (σ\_L = 0.30) participate -meaningfully when backed by a trusted agent (σ\_H = 0.85, ω = 0.5): - -``` -σ_eff = 0.30 + (0.5 × 0.85) = 0.725 -``` - -The `compute_eff_score` method computes this: - -```python -eff = engine.compute_eff_score( - vouchee_did="did:mesh:agent-b", - session_id="session-123", - vouchee_sigma=0.30, # σ_L - risk_weight=0.5, # ω -) -``` - -> **Public Preview note:** `compute_eff_score` returns the vouchee's own -> score (`vouchee_sigma`) without the voucher boost. The formula above is -> applied in the full edition. - -## Slashing: What Happens When an Agent Misbehaves - -When a vouchee violates policies — behavioral drift, ring breach, rate limit -abuse — the **SlashingEngine** penalizes both the offender and its vouchers. - -A slash operation produces a `SlashResult` containing: - -- The vouchee's σ before and after the penalty -- A list of `VoucherClip` records — each voucher that had collateral clipped -- The reason for the penalty -- The cascade depth (how far the penalty propagated) - -```python -from hypervisor.liability.slashing import SlashingEngine - -slasher = SlashingEngine(vouching_engine=engine) - -result = slasher.slash( - vouchee_did="did:mesh:agent-b", - session_id="session-123", - vouchee_sigma=0.72, - risk_weight=0.5, - reason="behavioral_drift", - agent_scores={"did:mesh:agent-a": 0.85, "did:mesh:agent-b": 0.72}, -) - -print(result.slash_id) # "penalize:" -print(result.vouchee_sigma_before) # 0.72 -print(result.vouchee_sigma_after) # Reduced (full edition) -print(result.voucher_clips) # List of VoucherClip records -print(result.reason) # "behavioral_drift" -``` - -### Slashing Constants - -| Constant | Value | Description | -|----------|-------|-------------| -| `MAX_CASCADE_DEPTH` | 2 | Maximum depth for cascade penalties | -| `SIGMA_FLOOR` | 0.05 | Minimum σ — an agent is never penalized below this | - -> **Public Preview note:** `slash` logs the penalty event but does not -> reduce any scores. `vouchee_sigma_after` equals `vouchee_sigma_before` and -> `voucher_clips` is empty. - -## Cascade Effects - -Penalties don't stop at the direct offender. If Agent C misbehaves and Agent B -vouched for C, Agent B's score is clipped. If Agent A vouched for Agent B, -Agent A may also be affected — up to `MAX_CASCADE_DEPTH = 2` levels. - -The `LiabilityMatrix` models these cascades: - -```python -from hypervisor.liability import LiabilityMatrix - -matrix = LiabilityMatrix(session_id="session-123") - -# Build the liability graph -matrix.add_edge("did:mesh:agent-a", "did:mesh:agent-b", bonded_amount=0.17, vouch_id="v1") -matrix.add_edge("did:mesh:agent-b", "did:mesh:agent-c", bonded_amount=0.10, vouch_id="v2") - -# Find cascade paths starting from Agent C -paths = matrix.cascade_path("did:mesh:agent-c", max_depth=2) -# paths might be: [["did:mesh:agent-c"]] — C is a leaf -# Reverse direction: query who vouches for C -vouchers = matrix.who_vouches_for("did:mesh:agent-c") -# Returns edges where vouchee_did == agent-c - -# Check total exposure for Agent A -exposure = matrix.total_exposure("did:mesh:agent-a") -print(f"Agent A has {exposure}σ bonded across all vouchees") -``` - -### Cycle Detection - -The liability graph must be a **directed acyclic graph (DAG)**. Cycles would -create infinite cascade loops. The `has_cycle()` method detects this: - -```python -matrix.add_edge("did:mesh:agent-c", "did:mesh:agent-a", bonded_amount=0.05, vouch_id="v3") -print(matrix.has_cycle()) # True — A→B→C→A forms a cycle -``` - -### The Liability Ledger - -All vouching, slashing, and quarantine events are recorded in the -`LiabilityLedger` — an append-only audit log: - -```python -from hypervisor.liability.ledger import LiabilityLedger, LedgerEntryType - -ledger = LiabilityLedger() - -# Record a vouch event -ledger.record( - agent_did="did:mesh:agent-a", - entry_type=LedgerEntryType.VOUCH_GIVEN, - session_id="session-123", - details="Vouched for did:mesh:agent-b", - related_agent="did:mesh:agent-b", -) - -# Record a slash event -ledger.record( - agent_did="did:mesh:agent-b", - entry_type=LedgerEntryType.SLASH_RECEIVED, - session_id="session-123", - severity=0.4, - details="behavioral_drift detected", -) - -# Query history -history = ledger.get_agent_history("did:mesh:agent-b") -profile = ledger.compute_risk_profile("did:mesh:agent-b") -print(profile.recommendation) # "admit" (Public Preview always admits) -``` - -Ledger entry types include: - -| Entry Type | Description | -|------------|-------------| -| `VOUCH_GIVEN` | Agent vouched for another | -| `VOUCH_RECEIVED` | Agent was vouched for | -| `VOUCH_RELEASED` | Vouch bond was released | -| `SLASH_RECEIVED` | Agent was directly penalized | -| `SLASH_CASCADED` | Agent was penalized via cascade | -| `QUARANTINE_ENTERED` | Agent entered quarantine | -| `QUARANTINE_RELEASED` | Agent released from quarantine | -| `FAULT_ATTRIBUTED` | Fault was attributed to agent | -| `CLEAN_SESSION` | Agent completed a session cleanly | - -## Real-World Analogy: Co-Signing a Loan - -Joint liability for AI agents works like **co-signing a loan**: - -| Loan Co-Signing | Agent Joint Liability | -|-----------------|----------------------| -| You (co-signer) trust a friend to repay a loan | Agent A (voucher) trusts Agent B to behave properly | -| You pledge your credit score as collateral | Agent A bonds a portion of its σ score | -| If your friend defaults, the bank comes after you too | If Agent B misbehaves, Agent A's bonded σ is clipped | -| Your credit score drops | Agent A's reputation score decreases | -| You're less likely to co-sign for strangers | Agents become selective about who they vouch for | -| The bank won't let you co-sign too many loans | `max_exposure` limits how much σ an agent can bond | - -Just as reckless co-signing destroys your credit, recklessly vouching for -unreliable agents erodes an agent's own reputation — creating a natural -incentive for agents to be diligent about who they sponsor. - -## Code Examples - -### End-to-End: Vouching, Faulting, and Slashing - -```python -from hypervisor.liability.vouching import VouchingEngine -from hypervisor.liability.slashing import SlashingEngine -from hypervisor.liability import LiabilityMatrix -from hypervisor.liability.ledger import LiabilityLedger, LedgerEntryType -from hypervisor.liability.attribution import CausalAttributor - -# --- Setup --- -vouching = VouchingEngine(max_exposure=0.80) -slashing = SlashingEngine(vouching_engine=vouching) -matrix = LiabilityMatrix(session_id="session-42") -ledger = LiabilityLedger() -attributor = CausalAttributor() - -SESSION = "session-42" -AGENT_A = "did:mesh:senior-agent" -AGENT_B = "did:mesh:junior-agent" - -# --- Step 1: Agent A vouches for Agent B --- -vouch = vouching.vouch( - voucher_did=AGENT_A, - vouchee_did=AGENT_B, - session_id=SESSION, - voucher_sigma=0.90, -) -matrix.add_edge(AGENT_A, AGENT_B, bonded_amount=0.18, vouch_id=vouch.vouch_id) -ledger.record(AGENT_A, LedgerEntryType.VOUCH_GIVEN, SESSION, details=f"Vouched for {AGENT_B}") -ledger.record(AGENT_B, LedgerEntryType.VOUCH_RECEIVED, SESSION, related_agent=AGENT_A) - -# --- Step 2: Agent B does work; something goes wrong --- -attribution = attributor.attribute( - saga_id="saga-7", - session_id=SESSION, - agent_actions={AGENT_A: [{"step": "review"}], AGENT_B: [{"step": "execute"}]}, - failure_step_id="execute", - failure_agent_did=AGENT_B, -) -print(f"Root cause: {attribution.root_cause_agent}") -print(f"Agent B liability: {attribution.get_liability(AGENT_B)}") # 1.0 - -# --- Step 3: Slash Agent B --- -slash = slashing.slash( - vouchee_did=AGENT_B, - session_id=SESSION, - vouchee_sigma=0.45, - risk_weight=0.5, - reason="saga failure in saga-7", - agent_scores={AGENT_A: 0.90, AGENT_B: 0.45}, -) -ledger.record(AGENT_B, LedgerEntryType.SLASH_RECEIVED, SESSION, severity=0.4) - -# --- Step 4: Check exposure --- -exposure = matrix.total_exposure(AGENT_A) -print(f"Agent A total exposure: {exposure}σ") - -# --- Step 5: Release bonds at session end --- -released = vouching.release_session_bonds(SESSION) -matrix.clear() -print(f"Released {released} bond(s)") -``` - -### Querying the Liability Graph - -```python -from hypervisor.liability import LiabilityMatrix - -matrix = LiabilityMatrix(session_id="session-99") - -# Three-agent chain: A → B → C -matrix.add_edge("did:mesh:a", "did:mesh:b", 0.15, "v1") -matrix.add_edge("did:mesh:b", "did:mesh:c", 0.10, "v2") - -# Who vouches for C? -for edge in matrix.who_vouches_for("did:mesh:c"): - print(f"{edge.voucher_did} vouches for C with {edge.bonded_amount}σ bonded") - -# Who does A vouch for? -for edge in matrix.who_is_vouched_by("did:mesh:a"): - print(f"A vouches for {edge.vouchee_did}") - -# Total exposure for B -print(f"B's total exposure: {matrix.total_exposure('did:mesh:b')}σ") - -# Cascade paths from B (B vouches for C, so slashing B cascades to C) -for path in matrix.cascade_path("did:mesh:b"): - print(f"Cascade path: {' → '.join(path)}") - -# Cycle check -print(f"Has cycle: {matrix.has_cycle()}") # False -``` - -## Key Classes Reference - -| Class | Module | Purpose | -|-------|--------|---------| -| `VouchingEngine` | `liability.vouching` | Create, query, and release vouch bonds | -| `VouchRecord` | `liability.vouching` | Data class for a single vouch relationship | -| `SlashingEngine` | `liability.slashing` | Penalize misbehaving agents and their vouchers | -| `SlashResult` | `liability.slashing` | Result of a slash operation | -| `VoucherClip` | `liability.slashing` | Collateral clip applied to a single voucher | -| `LiabilityMatrix` | `liability` | Directed graph of sponsor → sponsored bonds | -| `LiabilityEdge` | `liability` | A single edge in the liability graph | -| `LiabilityLedger` | `liability.ledger` | Append-only audit log of all liability events | -| `LedgerEntryType` | `liability.ledger` | Enum of event types recorded in the ledger | -| `AgentRiskProfile` | `liability.ledger` | Risk profile computed from an agent's history | -| `CausalAttributor` | `liability.attribution` | Assigns fault to the direct-cause agent | -| `QuarantineManager` | `liability.quarantine` | Manages agent quarantine (no-op in community) | - ---- - -> **Further reading:** See the [tutorials/](../tutorials/) directory for -> hands-on notebooks, and the [README](../README.md) for the full feature -> overview. diff --git a/agent-governance-python/agent-hypervisor/examples/dashboard/README.md b/agent-governance-python/agent-hypervisor/examples/dashboard/README.md index fc30e058c..39eaf1997 100644 --- a/agent-governance-python/agent-hypervisor/examples/dashboard/README.md +++ b/agent-governance-python/agent-hypervisor/examples/dashboard/README.md @@ -15,7 +15,7 @@ streamlit run app.py - **Session Overview** — Active sessions, participant counts, ring distribution - **Execution Rings** — Ring hierarchy, trust-based assignment, transition history - **Saga Orchestration** — Timeline/Gantt charts, step states, compensation chains -- **Liability & Trust** — Sponsor networks, penalty cascades, trust leaderboard +- **Audit & Verification** — Trust score leaderboard, audit events, verification signals - **Event Stream** — Real-time event log, type heatmaps, causal traces ## Screenshots diff --git a/agent-governance-python/agent-hypervisor/examples/dashboard/app.py b/agent-governance-python/agent-hypervisor/examples/dashboard/app.py index ce7a3e9af..93ad2d30c 100644 --- a/agent-governance-python/agent-hypervisor/examples/dashboard/app.py +++ b/agent-governance-python/agent-hypervisor/examples/dashboard/app.py @@ -75,7 +75,6 @@ EVENT_CATEGORIES = { "session": "#0074D9", "ring": "#FF851B", - "liability": "#FF4136", "saga": "#2ECC40", "vfs": "#B10DC9", "security": "#FFDC00", @@ -106,10 +105,6 @@ "ring.elevated", "ring.demoted", "ring.breach_detected", - "liability.vouch_created", - "liability.vouch_released", - "liability.slash_executed", - "liability.fault_attributed", "saga.created", "saga.step_started", "saga.step_committed", @@ -117,7 +112,6 @@ "saga.compensating", "saga.completed", "saga.escalated", - "saga.checkpoint_saved", "vfs.write", "vfs.delete", "vfs.snapshot", @@ -275,70 +269,6 @@ def generate_sagas(sessions: list[str]) -> tuple[pd.DataFrame, pd.DataFrame]: return pd.DataFrame(saga_rows), pd.DataFrame(step_rows) -@st.cache_data(ttl=300) -def generate_vouches(agents_df: pd.DataFrame) -> pd.DataFrame: - rng = random.Random(_seed() + 4) - rows = [] - grouped = agents_df.groupby("session_id") - for sid, group in grouped: - agents = group.to_dict("records") - for _ in range(min(len(agents), rng.randint(2, 5))): - voucher = rng.choice([a for a in agents if a["sigma_raw"] >= 0.5] or agents) - vouchee = rng.choice( - [a for a in agents if a["agent_did"] != voucher["agent_did"]] or agents - ) - bond_pct = round(rng.uniform(0.10, 0.35), 2) - rows.append( - dict( - vouch_id=f"v-{uuid.UUID(int=rng.getrandbits(128)).hex[:6]}", - voucher_did=voucher["agent_did"], - vouchee_did=vouchee["agent_did"], - session_id=sid, - bonded_sigma_pct=bond_pct, - bonded_amount=round(voucher["sigma_raw"] * bond_pct, 3), - created_at=datetime.now(UTC) - timedelta(minutes=rng.randint(5, 60)), - is_active=rng.random() > 0.2, - ) - ) - return pd.DataFrame(rows) - - -@st.cache_data(ttl=300) -def generate_slashes(vouches_df: pd.DataFrame) -> pd.DataFrame: - rng = random.Random(_seed() + 5) - rows = [] - if vouches_df.empty: - return pd.DataFrame( - columns=[ - "slash_id", - "vouchee_did", - "sigma_before", - "sigma_after", - "reason", - "session_id", - "timestamp", - "cascade_depth", - "vouchers_clipped", - ] - ) - for _, v in vouches_df.iterrows(): - if rng.random() < 0.25: - rows.append( - dict( - slash_id=f"sl-{uuid.UUID(int=rng.getrandbits(128)).hex[:6]}", - vouchee_did=v["vouchee_did"], - sigma_before=round(rng.uniform(0.3, 0.8), 3), - sigma_after=0.0, - reason=rng.choice(["behavioral_drift", "policy_violation", "timeout_exceeded"]), - session_id=v["session_id"], - timestamp=datetime.now(UTC) - timedelta(minutes=rng.randint(1, 30)), - cascade_depth=rng.randint(0, 2), - vouchers_clipped=rng.randint(1, 3), - ) - ) - return pd.DataFrame(rows) - - @st.cache_data(ttl=300) def generate_events(sessions: list[str], n: int = 200) -> pd.DataFrame: rng = random.Random(_seed() + 6) @@ -427,8 +357,6 @@ def generate_events(sessions: list[str], n: int = 200) -> pd.DataFrame: agents_df = generate_agents(session_ids) transitions_df = generate_ring_transitions(agents_df) sagas_df, steps_df = generate_sagas(session_ids) -vouches_df = generate_vouches(agents_df) -slashes_df = generate_slashes(vouches_df) events_df = generate_events(session_ids) if selected_session != "All Sessions": @@ -440,8 +368,6 @@ def generate_events(sessions: list[str], n: int = 200) -> pd.DataFrame: ) sagas_df = sagas_df[sagas_df["session_id"] == selected_session] steps_df = steps_df[steps_df["saga_id"].isin(sagas_df["saga_id"])] - vouches_df = vouches_df[vouches_df["session_id"] == selected_session] - slashes_df = slashes_df[slashes_df["session_id"] == selected_session] events_df = events_df[events_df["session_id"] == selected_session] # --------------------------------------------------------------------------- @@ -453,17 +379,17 @@ def generate_events(sessions: list[str], n: int = 200) -> pd.DataFrame: m2.metric("Agents", len(agents_df)) m3.metric("Active Sagas", int((sagas_df["state"] == "RUNNING").sum()) if not sagas_df.empty else 0) m4.metric("Events", len(events_df)) -m5.metric("Sponsors", len(vouches_df)) +m5.metric("Ring Changes", len(transitions_df)) # --------------------------------------------------------------------------- # Tabs # --------------------------------------------------------------------------- -tab_overview, tab_rings, tab_sagas, tab_liability, tab_events = st.tabs( +tab_overview, tab_rings, tab_sagas, tab_audit, tab_events = st.tabs( [ "📊 Session Overview", "🔒 Execution Rings", "⚙️ Saga Orchestration", - "💰 Liability & Trust", + "🧾 Audit & Verification", "📡 Event Stream", ] ) @@ -762,118 +688,13 @@ def generate_events(sessions: list[str], n: int = 200) -> pd.DataFrame: c2.metric("Success Rate", f"{completed / max(1, total_sagas) * 100:.0f}%") c3.metric("Failure Rate", f"{failed / max(1, total_sagas) * 100:.0f}%") -# ===== TAB 4: Liability & Trust ============================================== -with tab_liability: - st.subheader("Liability & Trust") +# ===== TAB 4: Audit & Verification ========================================== +with tab_audit: + st.subheader("Audit & Verification") c1, c2 = st.columns(2) with c1: - # Sponsor network graph - st.markdown("#### Sponsor Network") - if not vouches_df.empty: - G = nx.DiGraph() - for _, v in vouches_df.iterrows(): - short_voucher = v["voucher_did"].split(":")[-1] - short_vouchee = v["vouchee_did"].split(":")[-1] - G.add_edge(short_voucher, short_vouchee, weight=v["bonded_amount"]) - - pos = nx.spring_layout(G, seed=42, k=2.0) - - edge_x, edge_y = [], [] - annotations = [] - for u, v, data in G.edges(data=True): - x0, y0 = pos[u] - x1, y1 = pos[v] - edge_x.extend([x0, x1, None]) - edge_y.extend([y0, y1, None]) - mid_x, mid_y = (x0 + x1) / 2, (y0 + y1) / 2 - annotations.append( - dict( - x=mid_x, - y=mid_y, - text=f"{data['weight']:.2f}σ", - showarrow=False, - font=dict(size=9, color="#FFDC00"), - ) - ) - - node_x = [pos[n][0] for n in G.nodes()] - node_y = [pos[n][1] for n in G.nodes()] - node_text = list(G.nodes()) - - fig = go.Figure() - fig.add_trace( - go.Scatter( - x=edge_x, - y=edge_y, - mode="lines", - line=dict(width=1.5, color="#555"), - hoverinfo="none", - ) - ) - fig.add_trace( - go.Scatter( - x=node_x, - y=node_y, - mode="markers+text", - marker=dict(size=20, color="#0074D9", line=dict(width=2, color="#E0E0E0")), - text=node_text, - textposition="top center", - textfont=dict(size=10, color="#E0E0E0"), - hoverinfo="text", - ) - ) - fig.update_layout( - title="Sponsor Network (bond amounts)", - showlegend=False, - xaxis=dict(visible=False), - yaxis=dict(visible=False), - annotations=annotations, - **PLOTLY_LAYOUT, - ) - st.plotly_chart(fig, use_container_width=True) - else: - st.info("No sponsors recorded.") - - with c2: - # Penalty cascade visualization - st.markdown("#### Penalty Cascades") - if not slashes_df.empty: - fig = go.Figure() - for _, sl in slashes_df.iterrows(): - fig.add_trace( - go.Scatter( - x=[0, sl["cascade_depth"]], - y=[sl["sigma_before"], sl["sigma_after"]], - mode="lines+markers", - name=sl["vouchee_did"].split(":")[-1], - marker=dict(size=10), - line=dict(width=2), - hovertemplate=( - f"Agent: {sl['vouchee_did']}
" - f"Reason: {sl['reason']}
" - f"σ: {sl['sigma_before']:.3f} → {sl['sigma_after']:.3f}
" - f"Cascade depth: {sl['cascade_depth']}
" - f"Sponsors clipped: {sl['vouchers_clipped']}" - ), - ) - ) - fig.update_layout( - title="Penalty Impact (σ drop vs cascade depth)", - xaxis_title="Cascade Depth", - yaxis_title="σ Score", - **PLOTLY_LAYOUT, - ) - st.plotly_chart(fig, use_container_width=True) - else: - st.info("No penalty events recorded.") - - st.markdown("---") - c3, c4 = st.columns(2) - - with c3: - # Trust leaderboard - st.markdown("#### 🏆 Trust Score Leaderboard") + st.markdown("#### Trust Score Leaderboard") if not agents_df.empty: leaderboard = ( agents_df.groupby("agent_did") @@ -885,9 +706,7 @@ def generate_events(sessions: list[str], n: int = 200) -> pd.DataFrame: .sort_values("eff_score", ascending=False) .reset_index() ) - leaderboard["rank"] = range(1, len(leaderboard) + 1) leaderboard["agent"] = leaderboard["agent_did"].str.split(":").str[-1] - fig = go.Figure( go.Bar( x=leaderboard["eff_score"], @@ -905,40 +724,46 @@ def generate_events(sessions: list[str], n: int = 200) -> pd.DataFrame: **PLOTLY_LAYOUT, ) st.plotly_chart(fig, use_container_width=True) + else: + st.info("No agents in the selected session.") - with c4: - # Liability exposure heatmap - st.markdown("#### Liability Exposure Heatmap") - if not vouches_df.empty: - exposure = ( - vouches_df.groupby(["voucher_did", "session_id"])["bonded_amount"] - .sum() - .reset_index() - ) - exposure["sponsor"] = exposure["voucher_did"].str.split(":").str[-1] - exposure["session"] = exposure["session_id"].str[:12] - pivot = exposure.pivot_table( - index="sponsor", columns="session", values="bonded_amount", fill_value=0 - ) - + with c2: + st.markdown("#### Audit and Verification Events") + signal_events = events_df[events_df["category"].isin(["audit", "verification", "security"])] + if not signal_events.empty: + counts = signal_events["event_type"].value_counts() fig = go.Figure( - go.Heatmap( - z=pivot.values, - x=pivot.columns.tolist(), - y=pivot.index.tolist(), - colorscale="YlOrRd", - text=np.round(pivot.values, 3), - texttemplate="%{text}", - hovertemplate="Sponsor: %{y}
Session: %{x}
Bonded σ: %{z:.3f}", + go.Bar( + x=counts.index.tolist(), + y=counts.values.tolist(), + marker_color=[EVENT_CATEGORIES.get(x.split(".")[0], "#AAA") for x in counts.index], + text=counts.values.tolist(), + textposition="auto", ) ) - fig.update_layout( - title="Total Bonded σ per Agent × Session", - **PLOTLY_LAYOUT, - ) + fig.update_layout(title="Safety Signal Event Counts", **PLOTLY_LAYOUT) st.plotly_chart(fig, use_container_width=True) else: - st.info("No exposure data available.") + st.info("No audit, verification, or security events recorded.") + + st.markdown("---") + c3, c4, c5 = st.columns(3) + c3.metric("Audits", int((events_df["category"] == "audit").sum())) + c4.metric("Verifications", int((events_df["category"] == "verification").sum())) + c5.metric("Security Events", int((events_df["category"] == "security").sum())) + + st.markdown("#### Recent Safety Signals") + recent = events_df[events_df["category"].isin(["audit", "verification", "security"])].tail(25) + if not recent.empty: + display_recent = recent.copy() + display_recent["time"] = display_recent["timestamp"].dt.strftime("%H:%M:%S") + st.dataframe( + display_recent[["time", "event_type", "session_id", "agent_did", "causal_trace_id"]], + use_container_width=True, + hide_index=True, + ) + else: + st.info("No safety signals match the current filters.") # ===== TAB 5: Event Stream =================================================== with tab_events: diff --git a/agent-governance-python/agent-hypervisor/examples/demo.py b/agent-governance-python/agent-hypervisor/examples/demo.py index 66732ad3c..89a5236d1 100644 --- a/agent-governance-python/agent-hypervisor/examples/demo.py +++ b/agent-governance-python/agent-hypervisor/examples/demo.py @@ -8,8 +8,8 @@ 1. Session creation with configurable governance 2. Agent admission via trust scoring (eff_score -> ring assignment) 3. Saga orchestration with reversibility tracking - 4. Liability enforcement (sponsorship, bonding, penalty) - 5. Audit trail with hash commitment + 4. Audit trail with a hash-chained delta log + 5. Optional integration adapters Run: cd modules/hypervisor @@ -115,7 +115,7 @@ async def demo_session_lifecycle() -> None: ) step("Delta captured: /workspace/report.md") - # Terminate and get hash commitment + # Terminate and get the audit hash root hash_chain_root = await hv.terminate_session(session.sso.session_id) step(f"Session terminated -- audit log root: {hash_chain_root[:16]}...") @@ -185,57 +185,11 @@ async def fail_exec(): print(" All-or-nothing semantics for multi-agent workflows") -# ── Demo 3: Liability & Penalty ──────────────────────────────────── - - -async def demo_liability() -> None: - banner("Demo 3: Liability -- Sponsorship, Bonding, and Penalty") - - hv = Hypervisor(max_exposure=10.0) - - # Sponsor posts a bond for an agent - record = hv.vouching.vouch( - voucher_did="did:mesh:sponsor", - vouchee_did="did:mesh:new-agent", - session_id="session-001", - voucher_sigma=0.9, - ) - step( - f"Bond posted: {record.voucher_did} -> {record.vouchee_did} ({record.bonded_amount:.2f} tokens)" - ) - - exposure = hv.vouching.get_total_exposure("did:mesh:sponsor", "session-001") - step(f"Sponsor exposure: {exposure:.2f}/10.0 max") - - # Agent misbehaves -- penalize! - slash_result = hv.slashing.slash( - vouchee_did="did:mesh:new-agent", - session_id="session-001", - vouchee_sigma=0.30, - risk_weight=0.8, - reason="policy_violation", - agent_scores={"did:mesh:new-agent": 0.30}, - ) - fail( - f"Agent penalized! sigma: {slash_result.vouchee_sigma_before} -> {slash_result.vouchee_sigma_after}" - ) - step(f"Penalty reason: {slash_result.reason}") - for clip in slash_result.voucher_clips: - warn( - f"Sponsor {clip.voucher_did.split(':')[-1]} clipped: sigma {clip.sigma_before} -> {clip.sigma_after}" - ) - - print(f"\n {BOLD}Liability model:{RESET}") - print(" Sponsors sponsor for agents with token bonds") - print(" Misbehavior triggers proportional penalty") - print(" Maximum exposure limits protect sponsors") - - -# ── Demo 4: Audit Trail ──────────────────────────────────────────── +# ── Demo 3: Audit Trail ──────────────────────────────────────────── async def demo_audit() -> None: - banner("Demo 4: Audit Trail -- Hash-Committed Delta Chain") + banner("Demo 3: Audit Trail -- Hash-Chained Delta Log") hv = Hypervisor() config = SessionConfig(enable_audit=True) @@ -271,25 +225,17 @@ async def demo_audit() -> None: audit_log = await hv.terminate_session(session.sso.session_id) step(f"audit log root: {audit_log}") - # Verify commitment - commitment = hv.commitment.get_commitment(session.sso.session_id) - if commitment: - step("Commitment stored for session") - step(f" Participants: {len(commitment.participant_dids)}") - step(f" Deltas: {commitment.delta_count}") - step(f" Verified: {hv.commitment.verify(session.sso.session_id, audit_log)}") - print(f"\n {BOLD}Audit guarantees:{RESET}") print(" Every agent action is delta-captured") - print(" hash tree provides tamper-evident commitment") - print(" Commitments are immutable and verifiable") + print(" The delta hash chain provides a tamper-evident root") + print(" The returned root can be stored by the caller") -# ── Demo 5: Integration Adapters ──────────────────────────────────── +# ── Demo 4: Integration Adapters ──────────────────────────────────── async def demo_integrations() -> None: - banner("Demo 5: Integration Adapters -- Nexus + Verification + IATP") + banner("Demo 4: Integration Adapters -- Nexus + Verification + IATP") from hypervisor.integrations.iatp_adapter import IATPAdapter from hypervisor.integrations.nexus_adapter import NexusAdapter @@ -318,11 +264,6 @@ def calculate_trust_score( return scores[650]() return scores[200]() - def slash_reputation( - self, agent_did, reason, severity, evidence_hash=None, trace_id=None, broadcast=True - ): - pass - def record_task_outcome(self, agent_did, outcome): pass @@ -396,7 +337,6 @@ async def main() -> None: await demo_session_lifecycle() await demo_saga() - await demo_liability() await demo_audit() await demo_integrations() @@ -404,10 +344,9 @@ async def main() -> None: print(" The Agent Hypervisor provides:") print(" - Ring-based execution isolation (4 trust tiers)") print(" - Saga orchestration with automatic compensation") - print(" - Economic liability (sponsorship + penalty)") - print(" - hash-committed audit trails") + print(" - Hash-chained audit trails") print(" - Pluggable integrations (Nexus, Verification, IATP)") - print(f"\n {BOLD}184 tests passing | 268us full pipeline | Zero dependencies{RESET}") + print(f"\n {BOLD}644 tests passing | 268us full pipeline | Zero dependencies{RESET}") print("\n Learn more: https://github.com/microsoft/agent-governance-toolkit") print() diff --git a/agent-governance-python/agent-hypervisor/notebooks/README.md b/agent-governance-python/agent-hypervisor/notebooks/README.md index 04c94d256..8809a962a 100644 --- a/agent-governance-python/agent-hypervisor/notebooks/README.md +++ b/agent-governance-python/agent-hypervisor/notebooks/README.md @@ -6,7 +6,7 @@ Interactive Jupyter notebooks for exploring the **agent-hypervisor** runtime. | Notebook | Description | |----------|-------------| -| [`hypervisor-exploration.ipynb`](hypervisor-exploration.ipynb) | End-to-end tour of execution rings, sagas, kill switch, rate limiting, audit trails, and joint liability | +| [`hypervisor-exploration.ipynb`](hypervisor-exploration.ipynb) | End-to-end tour of execution rings, sagas, kill switch, rate limiting, audit trails, and visualization | ## Quick Start @@ -25,8 +25,7 @@ jupyter notebook notebooks/ 4. **Kill Switch** — Terminate misbehaving agents and hand off in-flight work 5. **Resource Limits** — Per-ring rate limiting demonstration 6. **Audit Trail** — Hash-chained, tamper-evident delta log with chain verification -7. **Joint Liability** — Vouching bonds and slashing cascades -8. **Visualization** — Plotly charts showing ring distribution, trust mapping, and audit timeline +7. **Visualization** — Plotly charts showing ring distribution, trust mapping, and audit timeline ## Requirements diff --git a/agent-governance-python/agent-hypervisor/notebooks/hypervisor-exploration.ipynb b/agent-governance-python/agent-hypervisor/notebooks/hypervisor-exploration.ipynb index 1cc7740ac..77adc539e 100644 --- a/agent-governance-python/agent-hypervisor/notebooks/hypervisor-exploration.ipynb +++ b/agent-governance-python/agent-hypervisor/notebooks/hypervisor-exploration.ipynb @@ -16,8 +16,7 @@ "| 4 | Kill switch activation |\n", "| 5 | Resource / rate limiting |\n", "| 6 | Hash-chained audit trail |\n", - "| 7 | Joint liability — vouching & slashing |\n", - "| 8 | Visualization |\n", + "| 7 | Visualization |\n", "\n", "> **Prerequisite:** install the package in editable mode: \n", "> ```bash\n", @@ -55,9 +54,7 @@ " ReversibilityLevel,\n", " SessionConfig,\n", ")\n", - "from hypervisor.audit import DeltaEngine, VFSChange\n", - "from hypervisor.liability.slashing import Slashing\n", - "from hypervisor.liability.vouching import VouchingEngine\n", + "from hypervisor.audit.delta import DeltaEngine, VFSChange\n", "from hypervisor.models import ActionDescriptor\n", "from hypervisor.rings import RingEnforcer\n", "from hypervisor.security.kill_switch import KillReason, KillSwitch\n", @@ -515,76 +512,7 @@ "metadata": {}, "source": [ "---\n", - "## 7 — Joint Liability\n", - "\n", - "Agents can **vouch** for each other by bonding part of their trust score. \n", - "If the vouchee misbehaves, the voucher is **slashed** proportionally." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "vouching = VouchingEngine()\n", - "slashing = Slashing(vouching_engine=vouching)\n", - "\n", - "# agent-alpha vouches for agent-gamma (bonds 20% of its sigma)\n", - "vouch = vouching.vouch(\n", - " voucher_did=\"agent-alpha\",\n", - " vouchee_did=\"agent-gamma\",\n", - " session_id=SESSION_ID,\n", - " voucher_sigma=0.95,\n", - " bond_pct=0.20,\n", - ")\n", - "\n", - "print(f\"Vouch ID : {vouch.vouch_id}\")\n", - "print(f\"Voucher : {vouch.voucher_did}\")\n", - "print(f\"Vouchee : {vouch.vouchee_did}\")\n", - "print(f\"Bonded σ % : {vouch.bonded_sigma_pct:.0%}\")\n", - "print(f\"Bonded amount: {vouch.bonded_amount:.4f}\")\n", - "\n", - "# Compute effective score with the vouch backing\n", - "eff = vouching.compute_eff_score(\n", - " vouchee_did=\"agent-gamma\",\n", - " session_id=SESSION_ID,\n", - " vouchee_sigma=0.55,\n", - " risk_weight=0.5,\n", - ")\n", - "print(f\"\\nEffective score for agent-gamma (with vouch): {eff:.4f}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Now slash agent-gamma — voucher alpha gets clipped too\n", - "slash_result = slashing.slash(\n", - " vouchee_did=\"agent-gamma\",\n", - " session_id=SESSION_ID,\n", - " vouchee_sigma=0.55,\n", - " risk_weight=0.8,\n", - " reason=\"Wrote to restricted path without permission\",\n", - ")\n", - "\n", - "print(f\"Slash ID : {slash_result.slash_id}\")\n", - "print(f\"Vouchee σ before : {slash_result.vouchee_sigma_before:.4f}\")\n", - "print(f\"Vouchee σ after : {slash_result.vouchee_sigma_after:.4f}\")\n", - "print(f\"Reason : {slash_result.reason}\")\n", - "print(\"\\nVoucher clips:\")\n", - "for clip in slash_result.voucher_clips:\n", - " print(f\" {clip}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "---\n", - "## 8 — Visualization\n", + "## 7 — Visualization\n", "\n", "Charts summarising ring distribution, resource limits, and audit chain." ] diff --git a/agent-governance-python/agent-hypervisor/pyproject.toml b/agent-governance-python/agent-hypervisor/pyproject.toml index 89f98fd61..b7ffc30af 100644 --- a/agent-governance-python/agent-hypervisor/pyproject.toml +++ b/agent-governance-python/agent-hypervisor/pyproject.toml @@ -53,9 +53,8 @@ packages = ["src/hypervisor"] # The published wheel is a dep-only deprecation stub (see [project] above), but # the hypervisor source tree under src/ is still linted in CI. These dev-tooling # sections were dropped during the package consolidation in #2794; without the -# per-file-ignores, ruff reports F401 on re-export __init__.py files such as -# hypervisor/liability/__init__.py and hypervisor/session/__init__.py. Restoring -# them keeps CI green. +# per-file-ignores, ruff reports F401 on package re-export __init__.py files. +# Restoring them keeps CI green. # ============================================================================ [tool.ruff] target-version = "py311" diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/__init__.py b/agent-governance-python/agent-hypervisor/src/hypervisor/__init__.py index 47f424374..fa30a9bdd 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/__init__.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/__init__.py @@ -25,21 +25,11 @@ from hypervisor import constants # noqa: F401,E402 # Audit -from hypervisor.audit.commitment import CommitmentEngine # noqa: E402 from hypervisor.audit.delta import DeltaEngine # noqa: E402 -from hypervisor.audit.gc import EphemeralGC # noqa: E402 # Top-level orchestrator from hypervisor.core import Hypervisor # noqa: E402 -# Liability -from hypervisor.liability import LiabilityMatrix # noqa: E402 -from hypervisor.liability.attribution import AttributionResult, CausalAttributor # noqa: E402 -from hypervisor.liability.ledger import LedgerEntryType, LiabilityLedger # noqa: E402 -from hypervisor.liability.quarantine import QuarantineManager, QuarantineReason # noqa: E402 -from hypervisor.liability.slashing import SlashingEngine # noqa: E402 -from hypervisor.liability.vouching import VouchingEngine, VouchRecord # noqa: E402 - # Core models from hypervisor.models import ( # noqa: E402 ConsistencyMode, @@ -71,9 +61,6 @@ from hypervisor.rings.enforcer import RingEnforcer # noqa: E402 # Saga -from hypervisor.saga.checkpoint import CheckpointManager, SemanticCheckpoint # noqa: E402 -from hypervisor.saga.dsl import SagaDefinition, SagaDSLParser # noqa: E402 -from hypervisor.saga.fan_out import FanOutOrchestrator, FanOutPolicy # noqa: E402 from hypervisor.saga.orchestrator import SagaOrchestrator, SagaTimeoutError # noqa: E402 from hypervisor.saga.state_machine import SagaState, StepState # noqa: E402 @@ -83,18 +70,11 @@ # Session management from hypervisor.session import SharedSessionObject # noqa: E402 -from hypervisor.session.intent_locks import ( # noqa: E402 - DeadlockError, - IntentLockManager, - LockContentionError, - LockIntent, -) from hypervisor.session.isolation import IsolationLevel # noqa: E402 from hypervisor.session.sso import SessionVFS, VFSEdit, VFSPermissionError # noqa: E402 from hypervisor.session.vector_clock import ( # noqa: E402 CausalViolationError, VectorClock, - VectorClockManager, ) # Verification @@ -117,24 +97,8 @@ "VFSEdit", "VFSPermissionError", "VectorClock", - "VectorClockManager", "CausalViolationError", - "IntentLockManager", - "LockIntent", - "LockContentionError", - "DeadlockError", "IsolationLevel", - # Liability - "VouchRecord", - "VouchingEngine", - "SlashingEngine", - "LiabilityMatrix", - "CausalAttributor", - "AttributionResult", - "QuarantineManager", - "QuarantineReason", - "LiabilityLedger", - "LedgerEntryType", # Rings "RingEnforcer", "ActionClassifier", @@ -150,16 +114,8 @@ "SagaTimeoutError", "SagaState", "StepState", - "FanOutOrchestrator", - "FanOutPolicy", - "CheckpointManager", - "SemanticCheckpoint", - "SagaDSLParser", - "SagaDefinition", # Audit "DeltaEngine", - "CommitmentEngine", - "EphemeralGC", # Verification "TransactionHistoryVerifier", # Observability diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/api/models.py b/agent-governance-python/agent-hypervisor/src/hypervisor/api/models.py index 943628021..4020a86f9 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/api/models.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/api/models.py @@ -23,7 +23,6 @@ class CreateSessionRequest(BaseModel): max_duration_seconds: int = 3600 min_eff_score: float = 0.60 enable_audit: bool = True - enable_blockchain_commitment: bool = False class ParticipantInfo(BaseModel): @@ -181,40 +180,6 @@ class ExecuteStepResponse(BaseModel): error: str | None = None -# ── Liability models ──────────────────────────────────────────────────────── - - -class CreateVouchRequest(BaseModel): - """Request body for creating a sponsor.""" - - voucher_did: str = Field(..., description="DID of the sponsorship agent") - vouchee_did: str = Field(..., description="DID of the agent being vouched for") - voucher_sigma: float = Field(..., description="Sponsor's raw reputation score") - bond_pct: float | None = None - expiry: str | None = None - - -class VouchResponse(BaseModel): - """Response after creating a sponsor.""" - - vouch_id: str - voucher_did: str - vouchee_did: str - session_id: str - bonded_amount: float - bonded_sigma_pct: float - is_active: bool - - -class LiabilityExposureResponse(BaseModel): - """Agent's liability exposure across sessions.""" - - agent_did: str - vouches_given: list[VouchResponse] - vouches_received: list[VouchResponse] - total_exposure: float - - # ── Event models ──────────────────────────────────────────────────────────── @@ -248,30 +213,9 @@ class StatsResponse(BaseModel): active_sessions: int total_participants: int active_sagas: int - total_vouches: int event_count: int -# ── Audit models ──────────────────────────────────────────────────────────── - - -class CommitmentResponse(BaseModel): - session_id: str - hash_chain_root: str - participant_dids: list[str] - delta_count: int - committed_at: str - committed_to: str = "local" - blockchain_tx_id: str | None = None - - -class VerifyCommitmentResponse(BaseModel): - session_id: str - valid: bool - committed_root: str - expected_root: str - - # ── Verification models ───────────────────────────────────────────────────── diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/api/server.py b/agent-governance-python/agent-hypervisor/src/hypervisor/api/server.py index 17cd37a19..07e6b2abd 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/api/server.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/api/server.py @@ -4,7 +4,7 @@ FastAPI REST API server for the Agent Hypervisor. Exposes the hypervisor's core capabilities — sessions, rings, sagas, -liability, events, and health — as a RESTful API with OpenAPI docs. +events, and health — as a RESTful API with OpenAPI docs. Run with: uvicorn hypervisor.api.server:app """ @@ -24,17 +24,14 @@ AddStepRequest, AddStepResponse, AgentRingResponse, - CommitmentResponse, CreateSagaResponse, CreateSessionRequest, CreateSessionResponse, - CreateVouchRequest, EventResponse, EventStatsResponse, ExecuteStepResponse, JoinSessionRequest, JoinSessionResponse, - LiabilityExposureResponse, ParticipantInfo, RingCheckRequest, RingCheckResponse, @@ -43,10 +40,8 @@ SessionDetailResponse, SessionListItem, StatsResponse, - VerifyCommitmentResponse, VerifyHistoryRequest, VerifyHistoryResponse, - VouchResponse, ) from hypervisor.core import Hypervisor, ManagedSession from hypervisor.models import ( @@ -148,8 +143,8 @@ def create_app() -> FastAPI: title="Agent Hypervisor API", description=( "REST API for the Agent Hypervisor — runtime supervisor for " - "multi-agent Shared Sessions with Execution Rings, Joint Liability, " - "Saga Orchestration, and Audit log audit trails." + "multi-agent Shared Sessions with Execution Rings, Saga Orchestration, " + "and audit trails." ), version=__version__, lifespan=lifespan, @@ -194,7 +189,6 @@ async def get_stats() -> StatsResponse: active_sessions=len(hv.active_sessions), total_participants=total_participants, active_sagas=active_sagas, - total_vouches=hv.vouching.vouch_count, event_count=bus.event_count, ) @@ -216,7 +210,6 @@ async def create_session(req: CreateSessionRequest) -> CreateSessionResponse: max_duration_seconds=req.max_duration_seconds, min_eff_score=req.min_eff_score, enable_audit=req.enable_audit, - enable_blockchain_commitment=req.enable_blockchain_commitment, ) managed = await _hv().create_session(config=config, creator_did=req.creator_did) return CreateSessionResponse( @@ -553,102 +546,6 @@ async def _noop_executor() -> dict[str, str]: raise HTTPException(status_code=404, detail=f"Saga {saga_id} or step {step_id} not found") -# ── Liability ─────────────────────────────────────────────────────────────── - - -@app.post( - "/api/v1/sessions/{session_id}/sponsor", - response_model=VouchResponse, - status_code=201, - tags=["Liability"], -) -async def create_vouch(session_id: str, req: CreateVouchRequest) -> VouchResponse: - """Create a sponsorship bond between agents in a session.""" - hv = _hv() - _get_managed(session_id) # verify session exists - try: - record = hv.vouching.vouch( - voucher_did=req.voucher_did, - vouchee_did=req.vouchee_did, - session_id=session_id, - voucher_sigma=req.voucher_sigma, - bond_pct=req.bond_pct, - ) - except Exception as e: - logger.debug("create_vouch failed for session %s: %s", session_id, e, exc_info=True) - raise HTTPException(status_code=400, detail=str(e)) - return VouchResponse( - vouch_id=record.vouch_id, - voucher_did=record.voucher_did, - vouchee_did=record.vouchee_did, - session_id=record.session_id, - bonded_amount=record.bonded_amount, - bonded_sigma_pct=record.bonded_sigma_pct, - is_active=record.is_active, - ) - - -@app.get( - "/api/v1/sessions/{session_id}/sponsors", - response_model=list[VouchResponse], - tags=["Liability"], -) -async def list_vouches(session_id: str) -> list[VouchResponse]: - """List all sponsors in a session.""" - _get_managed(session_id) - hv = _hv() - return [ - VouchResponse( - vouch_id=v.vouch_id, - voucher_did=v.voucher_did, - vouchee_did=v.vouchee_did, - session_id=v.session_id, - bonded_amount=v.bonded_amount, - bonded_sigma_pct=v.bonded_sigma_pct, - is_active=v.is_active, - ) - for v in hv.vouching._vouches.values() - if v.session_id == session_id - ] - - -@app.get( - "/api/v1/agents/{agent_did}/liability", - response_model=LiabilityExposureResponse, - tags=["Liability"], -) -async def get_agent_liability(agent_did: str) -> LiabilityExposureResponse: - """Get an agent's liability exposure across all sessions.""" - hv = _hv() - vouches_given = [] - vouches_received = [] - total_exposure = 0.0 - - for v in hv.vouching._vouches.values(): - vr = VouchResponse( - vouch_id=v.vouch_id, - voucher_did=v.voucher_did, - vouchee_did=v.vouchee_did, - session_id=v.session_id, - bonded_amount=v.bonded_amount, - bonded_sigma_pct=v.bonded_sigma_pct, - is_active=v.is_active, - ) - if v.voucher_did == agent_did: - vouches_given.append(vr) - if v.is_active and not v.is_expired: - total_exposure += v.bonded_amount - if v.vouchee_did == agent_did: - vouches_received.append(vr) - - return LiabilityExposureResponse( - agent_did=agent_did, - vouches_given=vouches_given, - vouches_received=vouches_received, - total_exposure=total_exposure, - ) - - # ── Events ────────────────────────────────────────────────────────────────── @@ -696,65 +593,6 @@ async def get_event_stats() -> EventStatsResponse: ) -# ── Audit endpoints ───────────────────────────────────────────────────────── - - -@app.get("/api/v1/audit/commitments", response_model=list[CommitmentResponse], tags=["Audit"]) -async def list_commitments(): - """List all session commitments.""" - engine = _hv().commitment_engine - return [ - CommitmentResponse( - session_id=r.session_id, - hash_chain_root=r.hash_chain_root, - participant_dids=r.participant_dids, - delta_count=r.delta_count, - committed_at=r.committed_at.isoformat(), - committed_to=r.committed_to, - blockchain_tx_id=r.blockchain_tx_id, - ) - for r in engine._commitments.values() - ] - - -@app.get( - "/api/v1/audit/commitments/{session_id}", response_model=CommitmentResponse, tags=["Audit"] -) -async def get_commitment(session_id: str): - """Get commitment for a specific session.""" - engine = _hv().commitment_engine - record = engine.get_commitment(session_id) - if not record: - raise HTTPException(status_code=404, detail="Commitment not found") - return CommitmentResponse( - session_id=record.session_id, - hash_chain_root=record.hash_chain_root, - participant_dids=record.participant_dids, - delta_count=record.delta_count, - committed_at=record.committed_at.isoformat(), - committed_to=record.committed_to, - blockchain_tx_id=record.blockchain_tx_id, - ) - - -@app.post( - "/api/v1/audit/verify/{session_id}", response_model=VerifyCommitmentResponse, tags=["Audit"] -) -async def verify_commitment(session_id: str, expected_root: str = Query(...)): - """Verify a session's audit log root matches its commitment.""" - engine = _hv().commitment_engine - record = engine.get_commitment(session_id) - if not record: - raise HTTPException(status_code=404, detail="Commitment not found") - valid = engine.verify(session_id, expected_root) - return VerifyCommitmentResponse( - session_id=session_id, - valid=valid, - committed_root=record.hash_chain_root, - expected_root=expected_root, - ) - - # ── Verification endpoints ────────────────────────────────────────────────── diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/audit/__init__.py b/agent-governance-python/agent-hypervisor/src/hypervisor/audit/__init__.py index 2c1670e9e..9999d2843 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/audit/__init__.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/audit/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. # Public Preview — basic implementation -"""Audit subpackage — delta engine, commitment, and GC.""" +"""Audit subpackage — delta engine and hash-chain audit.""" diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/audit/commitment.py b/agent-governance-python/agent-hypervisor/src/hypervisor/audit/commitment.py deleted file mode 100644 index c3d6401d2..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/audit/commitment.py +++ /dev/null @@ -1,76 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Hash Commitment — stub implementation. - -Public Preview: stores commitments in-memory only. -No blockchain anchoring. -""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from datetime import UTC, datetime - - -@dataclass -class CommitmentRecord: - """Record of a Summary Hash commitment.""" - - session_id: str - hash_chain_root: str - participant_dids: list[str] - delta_count: int - committed_at: datetime = field(default_factory=lambda: datetime.now(UTC)) - blockchain_tx_id: str | None = None - committed_to: str = "local" - - -class CommitmentEngine: - """ - Simple in-memory commitment store. - - Public Preview: stores commitments locally, no external anchoring. - """ - - def __init__(self) -> None: - self._commitments: dict[str, CommitmentRecord] = {} - self._batch_queue: list[CommitmentRecord] = [] - - def commit( - self, - session_id: str, - hash_chain_root: str, - participant_dids: list[str], - delta_count: int, - ) -> CommitmentRecord: - """Commit a session's Summary Hash.""" - record = CommitmentRecord( - session_id=session_id, - hash_chain_root=hash_chain_root, - participant_dids=participant_dids, - delta_count=delta_count, - ) - self._commitments[session_id] = record - return record - - def verify(self, session_id: str, expected_root: str) -> bool: - """Verify a session's audit log root.""" - record = self._commitments.get(session_id) - if not record: - return False - return record.hash_chain_root == expected_root - - def queue_for_batch(self, record: CommitmentRecord) -> None: - """Queue a commitment (Public Preview: no-op).""" - self._batch_queue.append(record) - - def flush_batch(self) -> list[CommitmentRecord]: - """Flush the batch queue.""" - batch = list(self._batch_queue) - self._batch_queue.clear() - return batch - - def get_commitment(self, session_id: str) -> CommitmentRecord | None: - return self._commitments.get(session_id) diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/audit/gc.py b/agent-governance-python/agent-hypervisor/src/hypervisor/audit/gc.py deleted file mode 100644 index d0ec14663..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/audit/gc.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Ephemeral Session Data Garbage Collection — stub implementation. - -Public Preview: GC is a no-op. Data is retained in-memory for -session lifetime only. -""" - -from __future__ import annotations - -from dataclasses import dataclass, field -from datetime import UTC, datetime -from typing import Any - - -@dataclass -class GCResult: - """Result of a garbage collection run.""" - - session_id: str - retained_deltas: int - retained_hash: bool - purged_vfs_files: int - purged_caches: int - storage_before_bytes: int - storage_after_bytes: int - gc_at: datetime = field(default_factory=lambda: datetime.now(UTC)) - - @property - def storage_saved_bytes(self) -> int: - return self.storage_before_bytes - self.storage_after_bytes - - @property - def savings_pct(self) -> float: - if self.storage_before_bytes == 0: - return 0.0 - return (self.storage_saved_bytes / self.storage_before_bytes) * 100 - - -@dataclass -class RetentionPolicy: - """Configuration for what to retain after GC.""" - - delta_retention_days: int = 180 - hash_retention: str = "permanent" - liability_snapshot: bool = True - - -class EphemeralGC: - """ - GC stub (Public Preview: logs collection requests, no actual purge). - """ - - def __init__(self, policy: RetentionPolicy | None = None) -> None: - self.policy = policy or RetentionPolicy() - self._gc_history: list[GCResult] = [] - self._purged_sessions: set[str] = set() - - def collect( - self, - session_id: str, - vfs: Any = None, - delta_engine: Any = None, - vfs_file_count: int = 0, - cache_count: int = 0, - delta_count: int = 0, - estimated_vfs_bytes: int = 0, - estimated_cache_bytes: int = 0, - estimated_delta_bytes: int = 0, - ) -> GCResult: - """Log a GC request (Public Preview: no actual purge).""" - result = GCResult( - session_id=session_id, - retained_deltas=delta_count, - retained_hash=True, - purged_vfs_files=0, - purged_caches=0, - storage_before_bytes=estimated_vfs_bytes - + estimated_cache_bytes - + estimated_delta_bytes, - storage_after_bytes=estimated_vfs_bytes + estimated_cache_bytes + estimated_delta_bytes, - ) - self._gc_history.append(result) - self._purged_sessions.add(session_id) - return result - - def is_purged(self, session_id: str) -> bool: - return session_id in self._purged_sessions - - def should_expire_deltas(self, delta_timestamp: datetime) -> bool: - return False - - @property - def history(self) -> list[GCResult]: - return list(self._gc_history) - - @property - def purged_session_count(self) -> int: - return len(self._purged_sessions) diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/constants.py b/agent-governance-python/agent-hypervisor/src/hypervisor/constants.py index d8a9e4efb..696a42f97 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/constants.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/constants.py @@ -39,21 +39,6 @@ RATE_LIMIT_FALLBACK: tuple[float, float] = RATE_LIMIT_RING_2 """Fallback rate limit when a ring is not found in the limits map.""" -# --------------------------------------------------------------------------- -# Vouching / sponsorship thresholds -# --------------------------------------------------------------------------- -VOUCHING_SCORE_SCALE: float = 1000.0 -"""Maximum trust-score scale used by the vouching engine.""" - -VOUCHING_MIN_VOUCHER_SCORE: float = 0.50 -"""Minimum score required to sponsor another agent.""" - -VOUCHING_DEFAULT_BOND_PCT: float = 0.20 -"""Default percentage of sigma bonded when sponsoring.""" - -VOUCHING_DEFAULT_MAX_EXPOSURE: float = 0.80 -"""Maximum exposure percentage for bonding.""" - # --------------------------------------------------------------------------- # Saga orchestrator defaults # --------------------------------------------------------------------------- diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/core.py b/agent-governance-python/agent-hypervisor/src/hypervisor/core.py index fb1d0dd8a..e1de37c10 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/core.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/core.py @@ -3,7 +3,7 @@ """ Hypervisor — Top-level orchestrator for multi-agent Shared Sessions. -Composes all submodules (Session, Liability, Rings, Reversibility, +Composes all submodules (Session, Rings, Reversibility, Saga, Audit, Verification) into a unified governance runtime. Optionally integrates with external trust scoring and behavioral @@ -15,11 +15,7 @@ import logging from typing import Any -from hypervisor.audit.commitment import CommitmentEngine from hypervisor.audit.delta import DeltaEngine -from hypervisor.audit.gc import EphemeralGC, RetentionPolicy -from hypervisor.liability.slashing import SlashingEngine -from hypervisor.liability.vouching import VouchingEngine from hypervisor.models import ( ActionDescriptor, ConsistencyMode, @@ -71,23 +67,16 @@ class Hypervisor: def __init__( self, - retention_policy: RetentionPolicy | None = None, - max_exposure: float | None = None, nexus: Any | None = None, policy_check: Any | None = None, iatp: Any | None = None, ) -> None: # Shared engines - self.vouching = VouchingEngine(max_exposure=max_exposure) - self.slashing = SlashingEngine(self.vouching) self.ring_enforcer = RingEnforcer() self.classifier = ActionClassifier() self.verifier = TransactionHistoryVerifier() - self.commitment = CommitmentEngine() - self.gc = EphemeralGC(retention_policy) - # Aliases expected by API layer - self.commitment_engine = self.commitment + # Alias expected by API layer self.history_verifier = self.verifier # Integration adapters (optional) @@ -201,7 +190,7 @@ async def activate_session(self, session_id: str) -> None: async def terminate_session(self, session_id: str) -> str | None: """ - Terminate a session and commit audit trail. + Terminate a session and finalize the audit hash chain. Returns: audit log root summary hash, or None if audit disabled @@ -215,28 +204,13 @@ async def terminate_session(self, session_id: str) -> str | None: return hash_chain_root def _commit_audit(self, session_id: str, managed: ManagedSession) -> str | None: - """Commit audit trail and return hash chain root (None if audit disabled).""" + """Compute and return the audit hash chain root (None if audit disabled).""" if not managed.sso.config.enable_audit: return None - hash_chain_root = managed.delta_engine.compute_hash_chain_root() - if hash_chain_root: - self.commitment.commit( - session_id=session_id, - hash_chain_root=hash_chain_root, - participant_dids=[p.agent_did for p in managed.sso.participants], - delta_count=managed.delta_engine.turn_count, - ) - return hash_chain_root + return managed.delta_engine.compute_hash_chain_root() def _cleanup_session(self, session_id: str, managed: ManagedSession) -> None: - """Release bonds, purge VFS data, and archive session.""" - self.vouching.release_session_bonds(session_id) - self.gc.collect( - session_id=session_id, - vfs=managed.sso.vfs if hasattr(managed.sso, "vfs") else None, - delta_engine=managed.delta_engine, - delta_count=managed.delta_engine.turn_count, - ) + """Archive the session and drop it from the active index.""" managed.sso.archive() # Remove from active index after archiving self._active_ids.discard(session_id) @@ -255,8 +229,8 @@ async def verify_behavior( """ Verify agent behavior via Verification adapter. - If drift exceeds threshold, automatically slashes the agent and - reports to Nexus (if adapter is available). + If drift exceeds the threshold, reports the drift to Nexus + (if an adapter is available). Returns: DriftCheckResult if Verification adapter is configured, else None. @@ -273,19 +247,12 @@ async def verify_behavior( ) if result.should_slash: + # Validate the session and agent before reporting drift externally, + # matching the pre-existing guard: an unknown session or a + # non-participant agent is an error, not a silent external report. managed = self._get_session(session_id) - participant = managed.sso.get_participant(agent_did) - # Build scores dict only for the slash path (avoid on healthy agents) - agent_scores = {p.agent_did: p.eff_score for p in managed.sso.participants} - self.slashing.slash( - vouchee_did=agent_did, - session_id=session_id, - vouchee_sigma=participant.eff_score, - risk_weight=0.95, - reason=f"Verification drift: {result.drift_score:.3f} ({result.severity.value})", - agent_scores=agent_scores, - ) - # Propagate to Nexus + managed.sso.get_participant(agent_did) + # Propagate the drift signal to Nexus (external trust backend). if self.nexus: severity = "critical" if result.drift_score >= 0.75 else "high" self.nexus.report_slash( @@ -293,7 +260,7 @@ async def verify_behavior( reason=f"Behavioral drift: {result.drift_score:.3f}", severity=severity, ) - logger.warning("Agent %s penalized: drift=%.3f", agent_did, result.drift_score) + logger.warning("Agent %s flagged for drift=%.3f", agent_did, result.drift_score) return result diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/__init__.py b/agent-governance-python/agent-hypervisor/src/hypervisor/liability/__init__.py deleted file mode 100644 index 7dacf2bb5..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/__init__.py +++ /dev/null @@ -1,140 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Liability Matrix — simple event log for sponsor→sponsored agent relationships. - -Public Preview: graph operations are retained for API compatibility -but sponsorship/penalty/quarantine are stubs. -""" - -from __future__ import annotations - -from dataclasses import dataclass -from typing import Optional - - -@dataclass -class LiabilityEdge: - """An edge in the liability graph.""" - - voucher_did: str - vouchee_did: str - bonded_amount: float - vouch_id: str - - -class LiabilityMatrix: - """ - Directed graph tracking sponsor→sponsored agent bonds within a session. - - Provides query APIs for exposure analysis and cascade detection. - """ - - def __init__(self, session_id: str) -> None: - self.session_id = session_id - self._edges: list[LiabilityEdge] = [] - - def add_edge( - self, - voucher_did: str, - vouchee_did: str, - bonded_amount: float, - vouch_id: str, - ) -> LiabilityEdge: - """Record a sponsorship relationship.""" - edge = LiabilityEdge( - voucher_did=voucher_did, - vouchee_did=vouchee_did, - bonded_amount=bonded_amount, - vouch_id=vouch_id, - ) - self._edges.append(edge) - return edge - - def remove_edge(self, vouch_id: str) -> None: - """Remove a sponsorship relationship by sponsor ID.""" - self._edges = [e for e in self._edges if e.vouch_id != vouch_id] - - def who_vouches_for(self, agent_did: str) -> list[LiabilityEdge]: - """Get all sponsors for a given agent.""" - return [e for e in self._edges if e.vouchee_did == agent_did] - - def who_is_vouched_by(self, agent_did: str) -> list[LiabilityEdge]: - """Get all sponsored agents of a given sponsor.""" - return [e for e in self._edges if e.voucher_did == agent_did] - - def total_exposure(self, voucher_did: str) -> float: - """Total σ bonded by a sponsor across all sponsored agents.""" - return sum(e.bonded_amount for e in self._edges if e.voucher_did == voucher_did) - - def cascade_path(self, agent_did: str, max_depth: int = 2) -> list[list[str]]: - """ - Find cascade paths from an agent through the liability graph. - - Returns all paths where penalty agent_did would cascade to others. - """ - paths: list[list[str]] = [] - self._dfs_cascade(agent_did, [agent_did], paths, max_depth) - return paths - - def has_cycle(self) -> bool: - """Check if the liability graph contains any cycles.""" - all_nodes = set() - for e in self._edges: - all_nodes.add(e.voucher_did) - all_nodes.add(e.vouchee_did) - - visited: set[str] = set() - in_stack: set[str] = set() - - for node in all_nodes: - if node not in visited: - if self._dfs_cycle(node, visited, in_stack): - return True - return False - - def clear(self) -> None: - """Release all bonds (session termination).""" - self._edges.clear() - - @property - def edges(self) -> list[LiabilityEdge]: - return list(self._edges) - - def _dfs_cascade( - self, - current: str, - path: list[str], - paths: list[list[str]], - max_depth: int, - ) -> None: - if len(path) > max_depth + 1: - return - vouchees = self.who_is_vouched_by(current) - if not vouchees and len(path) > 1: - paths.append(list(path)) - return - for edge in vouchees: - if edge.vouchee_did not in path: - path.append(edge.vouchee_did) - self._dfs_cascade(edge.vouchee_did, path, paths, max_depth) - path.pop() - if not vouchees: - return - if len(path) > 1: - paths.append(list(path)) - - def _dfs_cycle(self, node: str, visited: set[str], in_stack: set[str]) -> bool: - visited.add(node) - in_stack.add(node) - for edge in self._edges: - if edge.voucher_did == node: - neighbor = edge.vouchee_did - if neighbor in in_stack: - return True - if neighbor not in visited: - if self._dfs_cycle(neighbor, visited, in_stack): - return True - in_stack.discard(node) - return False diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/attribution.py b/agent-governance-python/agent-hypervisor/src/hypervisor/liability/attribution.py deleted file mode 100644 index 8a725fb08..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/attribution.py +++ /dev/null @@ -1,90 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Fault Logging — stub implementation. - -Public Preview: assigns full liability to the direct-cause agent. -No causal chain analysis. -""" - -from __future__ import annotations - -import uuid -from dataclasses import dataclass, field -from datetime import UTC, datetime - - -@dataclass -class FaultAttribution: - """Fault attribution for an agent.""" - - agent_did: str - liability_score: float - causal_contribution: float - is_direct_cause: bool = False - reason: str = "" - - -@dataclass -class AttributionResult: - """Attribution result for a saga failure.""" - - attribution_id: str = field(default_factory=lambda: f"attr:{uuid.uuid4().hex[:8]}") - saga_id: str = "" - session_id: str = "" - timestamp: datetime = field(default_factory=lambda: datetime.now(UTC)) - attributions: list[FaultAttribution] = field(default_factory=list) - causal_chain_length: int = 0 - root_cause_agent: str | None = None - - @property - def agents_involved(self) -> list[str]: - return [a.agent_did for a in self.attributions] - - def get_liability(self, agent_did: str) -> float: - for a in self.attributions: - if a.agent_did == agent_did: - return a.liability_score - return 0.0 - - -class CausalAttributor: - """Simple fault attribution — assigns liability to the direct cause agent.""" - - def __init__(self) -> None: - self._history: list[AttributionResult] = [] - - def attribute( - self, - saga_id: str, - session_id: str, - agent_actions: dict[str, list[dict]], - failure_step_id: str, - failure_agent_did: str, - risk_weights: dict[str, float] | None = None, - ) -> AttributionResult: - """Assign full liability to the direct-cause agent.""" - attributions = [] - for agent_did in agent_actions: - attributions.append( - FaultAttribution( - agent_did=agent_did, - liability_score=1.0 if agent_did == failure_agent_did else 0.0, - causal_contribution=1.0 if agent_did == failure_agent_did else 0.0, - is_direct_cause=(agent_did == failure_agent_did), - reason="Direct cause" if agent_did == failure_agent_did else "", - ) - ) - result = AttributionResult( - saga_id=saga_id, - session_id=session_id, - attributions=attributions, - root_cause_agent=failure_agent_did, - ) - self._history.append(result) - return result - - @property - def attribution_history(self) -> list[AttributionResult]: - return list(self._history) diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/ledger.py b/agent-governance-python/agent-hypervisor/src/hypervisor/liability/ledger.py deleted file mode 100644 index 7b2b0b04d..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/ledger.py +++ /dev/null @@ -1,121 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Liability Ledger — simple append-only fault log. - -Public Preview: records fault events as (agent, type, timestamp, details). -No risk scoring, no admission decisions. -""" - -from __future__ import annotations - -import uuid -from dataclasses import dataclass, field -from datetime import UTC, datetime -from enum import Enum - - -class LedgerEntryType(str, Enum): - """Types of liability ledger entries.""" - - VOUCH_GIVEN = "vouch_given" - VOUCH_RECEIVED = "vouch_received" - VOUCH_RELEASED = "vouch_released" - SLASH_RECEIVED = "slash_received" - SLASH_CASCADED = "slash_cascaded" - QUARANTINE_ENTERED = "quarantine_entered" - QUARANTINE_RELEASED = "quarantine_released" - FAULT_ATTRIBUTED = "fault_attributed" - CLEAN_SESSION = "clean_session" - - -@dataclass -class LedgerEntry: - """A single entry in the liability ledger.""" - - entry_id: str = field(default_factory=lambda: uuid.uuid4().hex[:12]) - agent_did: str = "" - entry_type: LedgerEntryType = LedgerEntryType.CLEAN_SESSION - session_id: str = "" - timestamp: datetime = field(default_factory=lambda: datetime.now(UTC)) - severity: float = 0.0 - details: str = "" - related_agent: str | None = None - - -@dataclass -class AgentRiskProfile: - """Risk profile for an agent (Public Preview: always admits).""" - - agent_did: str - total_entries: int = 0 - slash_count: int = 0 - quarantine_count: int = 0 - clean_session_count: int = 0 - fault_score_avg: float = 0.0 - risk_score: float = 0.0 - recommendation: str = "admit" - - -class LiabilityLedger: - """ - Simple append-only fault log. - - Public Preview: records events for audit trail only. - No risk scoring or admission logic. - """ - - PROBATION_THRESHOLD = 0.3 - DENY_THRESHOLD = 0.6 - - def __init__(self) -> None: - self._entries: list[LedgerEntry] = [] - self._by_agent: dict[str, list[LedgerEntry]] = {} - - def record( - self, - agent_did: str, - entry_type: LedgerEntryType, - session_id: str = "", - severity: float = 0.0, - details: str = "", - related_agent: str | None = None, - ) -> LedgerEntry: - """Record a liability event.""" - entry = LedgerEntry( - agent_did=agent_did, - entry_type=entry_type, - session_id=session_id, - severity=severity, - details=details, - related_agent=related_agent, - ) - self._entries.append(entry) - self._by_agent.setdefault(agent_did, []).append(entry) - return entry - - def get_agent_history(self, agent_did: str) -> list[LedgerEntry]: - """Get all ledger entries for an agent.""" - return list(self._by_agent.get(agent_did, [])) - - def compute_risk_profile(self, agent_did: str) -> AgentRiskProfile: - """Return a basic risk profile (Public Preview: always admits).""" - entries = self.get_agent_history(agent_did) - return AgentRiskProfile( - agent_did=agent_did, - total_entries=len(entries), - recommendation="admit", - ) - - def should_admit(self, agent_did: str) -> tuple[bool, str]: - """Always admits in Public Preview.""" - return True, "admit" - - @property - def total_entries(self) -> int: - return len(self._entries) - - @property - def tracked_agents(self) -> list[str]: - return list(self._by_agent.keys()) diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/quarantine.py b/agent-governance-python/agent-hypervisor/src/hypervisor/liability/quarantine.py deleted file mode 100644 index 5bdbb3597..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/quarantine.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Quarantine Manager — stub implementation. - -Public Preview: quarantine is not enforced. Calls return safe defaults. -""" - -from __future__ import annotations - -import uuid -from dataclasses import dataclass, field -from datetime import UTC, datetime -from enum import Enum - - -class QuarantineReason(str, Enum): - """Why an agent was quarantined.""" - - BEHAVIORAL_DRIFT = "behavioral_drift" - LIABILITY_VIOLATION = "liability_violation" - RING_BREACH = "ring_breach" - RATE_LIMIT_EXCEEDED = "rate_limit_exceeded" - MANUAL = "manual" - CASCADE_SLASH = "cascade_slash" - - -@dataclass -class QuarantineRecord: - """Record of an agent in quarantine.""" - - quarantine_id: str = field(default_factory=lambda: f"quar:{uuid.uuid4().hex[:8]}") - agent_did: str = "" - session_id: str = "" - reason: QuarantineReason = QuarantineReason.MANUAL - details: str = "" - entered_at: datetime = field(default_factory=lambda: datetime.now(UTC)) - expires_at: datetime | None = None - released_at: datetime | None = None - is_active: bool = True - forensic_data: dict = field(default_factory=dict) - - @property - def is_expired(self) -> bool: - if self.expires_at is None: - return False - return datetime.now(UTC) > self.expires_at - - @property - def duration_seconds(self) -> float: - end = self.released_at or datetime.now(UTC) - return (end - self.entered_at).total_seconds() - - -class QuarantineManager: - """ - Quarantine stub (Public Preview: no quarantine enforcement). - """ - - DEFAULT_QUARANTINE_SECONDS = 300 - - def __init__(self) -> None: - self._quarantines: dict[str, QuarantineRecord] = {} - - def quarantine( - self, - agent_did: str, - session_id: str, - reason: QuarantineReason, - details: str = "", - duration_seconds: int | None = None, - forensic_data: dict | None = None, - ) -> QuarantineRecord: - """Log a quarantine request (Public Preview: no enforcement).""" - record = QuarantineRecord( - agent_did=agent_did, - session_id=session_id, - reason=reason, - details=details, - is_active=False, - ) - self._quarantines[record.quarantine_id] = record - return record - - def release(self, agent_did: str, session_id: str) -> QuarantineRecord | None: - """No-op in Public Preview.""" - return None - - def is_quarantined(self, agent_did: str, session_id: str) -> bool: - """Always False in Public Preview.""" - return False - - def get_active_quarantine(self, agent_did: str, session_id: str) -> QuarantineRecord | None: - return None - - def tick(self) -> list[QuarantineRecord]: - return [] - - def get_history( - self, agent_did: str | None = None, session_id: str | None = None - ) -> list[QuarantineRecord]: - """Get quarantine history, optionally filtered.""" - records = list(self._quarantines.values()) - if agent_did: - records = [r for r in records if r.agent_did == agent_did] - if session_id: - records = [r for r in records if r.session_id == session_id] - return records - - @property - def active_quarantines(self) -> list[QuarantineRecord]: - return [] - - @property - def quarantine_count(self) -> int: - return 0 diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/slashing.py b/agent-governance-python/agent-hypervisor/src/hypervisor/liability/slashing.py deleted file mode 100644 index 211193aa7..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/slashing.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Collateral Penalty Engine — stub implementation. - -Public Preview: penalty is not enforced. Penalty calls are logged only. -""" - -from __future__ import annotations - -import uuid -from dataclasses import dataclass, field -from datetime import UTC, datetime - - -@dataclass -class SlashResult: - """Result of a penalty operation.""" - - slash_id: str - vouchee_did: str - vouchee_sigma_before: float - vouchee_sigma_after: float - voucher_clips: list[VoucherClip] - reason: str - session_id: str - timestamp: datetime = field(default_factory=lambda: datetime.now(UTC)) - cascade_depth: int = 0 - - -@dataclass -class VoucherClip: - """A collateral clip applied to a sponsor.""" - - voucher_did: str - sigma_before: float - sigma_after: float - risk_weight: float - vouch_id: str - - -class SlashingEngine: - """ - Penalty stub (Public Preview: logs penalty events, no penalties applied). - """ - - MAX_CASCADE_DEPTH = 2 - SIGMA_FLOOR = 0.05 - - def __init__(self, vouching_engine: object) -> None: - self._slash_history: list[SlashResult] = [] - - def slash( - self, - vouchee_did: str, - session_id: str, - vouchee_sigma: float, - risk_weight: float, - reason: str, - agent_scores: dict[str, float], - cascade_depth: int = 0, - ) -> SlashResult: - """Log a penalty event (Public Preview: no penalties applied).""" - result = SlashResult( - slash_id=f"penalize:{uuid.uuid4()}", - vouchee_did=vouchee_did, - vouchee_sigma_before=vouchee_sigma, - vouchee_sigma_after=vouchee_sigma, - voucher_clips=[], - reason=reason, - session_id=session_id, - cascade_depth=0, - ) - self._slash_history.append(result) - return result - - @property - def history(self) -> list[SlashResult]: - return list(self._slash_history) diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/vouching.py b/agent-governance-python/agent-hypervisor/src/hypervisor/liability/vouching.py deleted file mode 100644 index d4eb5491a..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/liability/vouching.py +++ /dev/null @@ -1,134 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Sponsorship Protocol — stub implementation. - -Public Preview: sponsorship is not enforced. All requests are approved. -""" - -from __future__ import annotations - -import uuid -from dataclasses import dataclass, field -from datetime import UTC, datetime - -from hypervisor.constants import ( - VOUCHING_DEFAULT_BOND_PCT, - VOUCHING_DEFAULT_MAX_EXPOSURE, - VOUCHING_MIN_VOUCHER_SCORE, - VOUCHING_SCORE_SCALE, -) - - -@dataclass -class VouchRecord: - """A record of one agent sponsorship for another within a session.""" - - vouch_id: str - voucher_did: str - vouchee_did: str - session_id: str - bonded_sigma_pct: float - bonded_amount: float - created_at: datetime = field(default_factory=lambda: datetime.now(UTC)) - expiry: datetime | None = None - is_active: bool = True - released_at: datetime | None = None - - @property - def is_expired(self) -> bool: - if self.expiry is None: - return False - return datetime.now(UTC) > self.expiry - - -class VouchingEngine: - """ - Sponsorship stub (Public Preview: approves all, no bonding). - """ - - SCORE_SCALE = VOUCHING_SCORE_SCALE - MIN_VOUCHER_SCORE = VOUCHING_MIN_VOUCHER_SCORE - DEFAULT_BOND_PCT = VOUCHING_DEFAULT_BOND_PCT - DEFAULT_MAX_EXPOSURE = VOUCHING_DEFAULT_MAX_EXPOSURE - - def __init__(self, max_exposure: float | None = None) -> None: - self._vouches: dict[str, VouchRecord] = {} - self.max_exposure = max_exposure or self.DEFAULT_MAX_EXPOSURE - - @property - def vouch_count(self) -> int: - """Total number of sponsorship records (active + released).""" - return len(self._vouches) - - def vouch( - self, - voucher_did: str, - vouchee_did: str, - session_id: str, - voucher_sigma: float, - bond_pct: float | None = None, - expiry: datetime | None = None, - ) -> VouchRecord: - """Create a sponsorship record (Public Preview: always succeeds, no bonding).""" - record = VouchRecord( - vouch_id=f"sponsor:{uuid.uuid4()}", - voucher_did=voucher_did, - vouchee_did=vouchee_did, - session_id=session_id, - bonded_sigma_pct=0.0, - bonded_amount=0.0, - ) - self._vouches[record.vouch_id] = record - return record - - def compute_eff_score( - self, - vouchee_did: str, - session_id: str, - vouchee_sigma: float, - risk_weight: float, - ) -> float: - """Return sponsored agent's own score (Public Preview: no sponsor boost).""" - return vouchee_sigma - - def get_vouchers_for(self, agent_did: str, session_id: str) -> list[VouchRecord]: - """Get all sponsors for an agent in a session.""" - return [ - v - for v in self._vouches.values() - if v.vouchee_did == agent_did and v.session_id == session_id and v.is_active - ] - - def get_total_exposure(self, voucher_did: str, session_id: str) -> float: - """Always zero in Public Preview.""" - return 0.0 - - def release_bond(self, vouch_id: str) -> None: - """Release a sponsorship bond.""" - if vouch_id not in self._vouches: - raise VouchingError(f"Sponsor {vouch_id} not found") - record = self._vouches[vouch_id] - record.is_active = False - record.released_at = datetime.now(UTC) - - def release_session_bonds(self, session_id: str) -> int: - """Release all bonds for a session.""" - count = 0 - for v in self._vouches.values(): - if v.session_id == session_id and v.is_active: - v.is_active = False - v.released_at = datetime.now(UTC) - count += 1 - return count - - def _active_vouches_for(self, agent_did: str, session_id: str) -> list[VouchRecord]: - return self.get_vouchers_for(agent_did, session_id) - - def _creates_cycle(self, voucher_did: str, vouchee_did: str, session_id: str) -> bool: - return False - - -class VouchingError(Exception): - """Raised for sponsorship protocol violations.""" diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/models.py b/agent-governance-python/agent-hypervisor/src/hypervisor/models.py index 9a6dde977..b6da81bce 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/models.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/models.py @@ -141,7 +141,6 @@ class SessionConfig: max_duration_seconds: int = 3600 min_eff_score: float = SESSION_DEFAULT_MIN_EFF_SCORE enable_audit: bool = True - enable_blockchain_commitment: bool = False def __post_init__(self) -> None: if not isinstance(self.max_participants, int): diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/observability/event_bus.py b/agent-governance-python/agent-hypervisor/src/hypervisor/observability/event_bus.py index a9143e1b2..000012991 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/observability/event_bus.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/observability/event_bus.py @@ -43,14 +43,6 @@ class EventType(str, Enum): RING_ELEVATION_EXPIRED = "ring.elevation_expired" RING_BREACH_DETECTED = "ring.breach_detected" - # Liability - VOUCH_CREATED = "liability.vouch_created" - VOUCH_RELEASED = "liability.vouch_released" - SLASH_EXECUTED = "liability.slash_executed" - FAULT_ATTRIBUTED = "liability.fault_attributed" - QUARANTINE_ENTERED = "liability.quarantine_entered" - QUARANTINE_RELEASED = "liability.quarantine_released" - # Saga SAGA_CREATED = "saga.created" SAGA_STEP_STARTED = "saga.step_started" @@ -59,9 +51,6 @@ class EventType(str, Enum): SAGA_COMPENSATING = "saga.compensating" SAGA_COMPLETED = "saga.completed" SAGA_ESCALATED = "saga.escalated" - SAGA_FANOUT_STARTED = "saga.fanout_started" - SAGA_FANOUT_RESOLVED = "saga.fanout_resolved" - SAGA_CHECKPOINT_SAVED = "saga.checkpoint_saved" # VFS / Session writes VFS_WRITE = "vfs.write" @@ -79,7 +68,6 @@ class EventType(str, Enum): # Audit AUDIT_DELTA_CAPTURED = "audit.delta_captured" AUDIT_COMMITTED = "audit.committed" - AUDIT_GC_COLLECTED = "audit.gc_collected" # Verification BEHAVIOR_DRIFT = "verification.behavior_drift" diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/providers.py b/agent-governance-python/agent-hypervisor/src/hypervisor/providers.py index e3761c15c..6b044ab45 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/providers.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/providers.py @@ -16,7 +16,6 @@ PROVIDER_GROUPS = { "ring_engine": "hypervisor.providers.ring_engine", - "liability": "hypervisor.providers.liability", "saga_engine": "hypervisor.providers.saga_engine", "breach_detector": "hypervisor.providers.breach_detector", "session_manager": "hypervisor.providers.session_manager", @@ -64,25 +63,6 @@ def get_ring_engine(**kwargs: Any): return RingEnforcer(**kwargs) -def get_liability_engine(**kwargs: Any): - """Get the best available liability engine. - - Advanced: Shapley-value fault attribution with vouch cascades. - Community: ``LiabilityMatrix`` from ``hypervisor.liability``. - """ - provider = _discover_provider(PROVIDER_GROUPS["liability"]) - if provider is not None: - return provider(**kwargs) - - # Community fallback. The previous import targeted - # ``hypervisor.liability.engine.LiabilityEngine`` which does not - # exist in this tree; ``LiabilityMatrix`` is the real public- - # edition entry point. - from hypervisor.liability import LiabilityMatrix - - return LiabilityMatrix(**kwargs) - - def get_saga_engine(**kwargs: Any): """Get the best available saga orchestration engine. diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/__init__.py b/agent-governance-python/agent-hypervisor/src/hypervisor/saga/__init__.py index a16b37e0d..bcf7b299a 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/__init__.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/saga/__init__.py @@ -1,22 +1,3 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Saga subpackage — orchestration, fan-out, checkpoints, DSL.""" - -from hypervisor.saga.checkpoint import CheckpointManager, SemanticCheckpoint -from hypervisor.saga.dsl import SagaDefinition, SagaDSLError, SagaDSLParser -from hypervisor.saga.fan_out import FanOutGroup, FanOutOrchestrator, FanOutPolicy -from hypervisor.saga.schema import SAGA_DEFINITION_SCHEMA, SagaSchemaError, SagaSchemaValidator - -__all__ = [ - "FanOutOrchestrator", - "FanOutGroup", - "FanOutPolicy", - "CheckpointManager", - "SemanticCheckpoint", - "SagaDSLParser", - "SagaDefinition", - "SagaDSLError", - "SagaSchemaValidator", - "SagaSchemaError", - "SAGA_DEFINITION_SCHEMA", -] +"""Saga subpackage — saga orchestration and compensation.""" diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/checkpoint.py b/agent-governance-python/agent-hypervisor/src/hypervisor/saga/checkpoint.py deleted file mode 100644 index ef6967d51..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/checkpoint.py +++ /dev/null @@ -1,110 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Execution Checkpoints — stub implementation. - -Public Preview: checkpoints are recorded but replay/skip logic is removed. -""" - -from __future__ import annotations - -import hashlib -import uuid -from dataclasses import dataclass, field -from datetime import UTC, datetime -from typing import Any - - -@dataclass -class SemanticCheckpoint: - """A checkpoint record (Public Preview: stored but not used for replay).""" - - checkpoint_id: str = field(default_factory=lambda: f"ckpt:{uuid.uuid4().hex[:8]}") - saga_id: str = "" - step_id: str = "" - goal_description: str = "" - goal_hash: str = "" - achieved_at: datetime = field(default_factory=lambda: datetime.now(UTC)) - state_snapshot: dict[str, Any] = field(default_factory=dict) - is_valid: bool = True - invalidated_reason: str | None = None - - @staticmethod - def compute_goal_hash(goal: str, step_id: str) -> str: - """Compute deterministic hash for a goal.""" - content = f"{goal}:{step_id}" - return hashlib.sha256(content.encode()).hexdigest()[:16] - - -class CheckpointManager: - """ - Checkpoint stub (Public Preview: saves checkpoints but no replay logic). - """ - - def __init__(self) -> None: - self._checkpoints: dict[str, list[SemanticCheckpoint]] = {} - self._by_goal_hash: dict[str, SemanticCheckpoint] = {} - - def save( - self, - saga_id: str, - step_id: str, - goal_description: str, - state_snapshot: dict | None = None, - ) -> SemanticCheckpoint: - """Save a checkpoint record.""" - goal_hash = SemanticCheckpoint.compute_goal_hash(goal_description, step_id) - checkpoint = SemanticCheckpoint( - saga_id=saga_id, - step_id=step_id, - goal_description=goal_description, - goal_hash=goal_hash, - state_snapshot=state_snapshot or {}, - ) - self._checkpoints.setdefault(saga_id, []).append(checkpoint) - self._by_goal_hash[goal_hash] = checkpoint - return checkpoint - - def is_achieved( - self, - saga_id: str, - goal_description: str, - step_id: str, - ) -> bool: - """Always returns False (Public Preview: no skip-on-replay).""" - return False - - def get_checkpoint( - self, - saga_id: str, - goal_description: str, - step_id: str, - ) -> SemanticCheckpoint | None: - """Returns None (Public Preview: no replay support).""" - return None - - def invalidate( - self, - saga_id: str, - step_id: str, - reason: str = "", - ) -> int: - """No-op in Public Preview.""" - return 0 - - def get_saga_checkpoints(self, saga_id: str) -> list[SemanticCheckpoint]: - """Get all checkpoints for a saga.""" - return list(self._checkpoints.get(saga_id, [])) - - def get_replay_plan(self, saga_id: str, steps: list[str]) -> list[str]: - """All steps need execution (Public Preview: no skip logic).""" - return list(steps) - - @property - def total_checkpoints(self) -> int: - return sum(len(v) for v in self._checkpoints.values()) - - @property - def valid_checkpoints(self) -> int: - return self.total_checkpoints diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/dsl.py b/agent-governance-python/agent-hypervisor/src/hypervisor/saga/dsl.py deleted file mode 100644 index d23333164..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/dsl.py +++ /dev/null @@ -1,190 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Declarative Saga DSL — stub implementation. - -Public Preview: DSL parsing is retained for basic step definitions only. -Fan-out groups in DSL are ignored (sequential execution only). -""" - -from __future__ import annotations - -import uuid -from dataclasses import dataclass, field -from typing import Any - -from hypervisor.saga.fan_out import FanOutPolicy -from hypervisor.saga.schema import SagaSchemaValidator -from hypervisor.saga.state_machine import SagaStep - - -@dataclass -class SagaDSLStep: - """A step parsed from the DSL definition.""" - - id: str = "" - action_id: str = "" - agent: str = "" - execute_api: str = "" - undo_api: str | None = None - timeout: int = 300 - retries: int = 0 - checkpoint_goal: str | None = None - - -@dataclass -class SagaDSLFanOut: - """A fan-out group (Public Preview: ignored during execution).""" - - policy: FanOutPolicy = FanOutPolicy.ALL_MUST_SUCCEED - branch_step_ids: list[str] = field(default_factory=list) - - -@dataclass -class SagaDefinition: - """A complete saga definition parsed from DSL.""" - - name: str = "" - session_id: str = "" - saga_id: str = field(default_factory=lambda: f"saga:{uuid.uuid4().hex[:8]}") - steps: list[SagaDSLStep] = field(default_factory=list) - fan_outs: list[SagaDSLFanOut] = field(default_factory=list) - metadata: dict[str, Any] = field(default_factory=dict) - - @property - def step_ids(self) -> list[str]: - return [s.id for s in self.steps] - - @property - def fan_out_step_ids(self) -> set[str]: - return set() - - @property - def sequential_steps(self) -> list[SagaDSLStep]: - """All steps are sequential in Public Preview.""" - return list(self.steps) - - -class SagaDSLParser: - """ - Parses saga definitions from dict. - - Public Preview: fan-out groups are parsed but ignored during execution. - """ - - def __init__(self, *, schema_validation: bool = False) -> None: - self._schema_validator = SagaSchemaValidator() if schema_validation else None - - def parse(self, definition: dict[str, Any]) -> SagaDefinition: - """Parse a saga definition dict into a SagaDefinition. - - If schema_validation was enabled at construction, validates against - the JSON schema before parsing. - """ - if self._schema_validator is not None: - self._schema_validator.validate_or_raise(definition) - - name = definition.get("name", "") - if not name: - raise SagaDSLError("Saga definition must have a 'name'") - - session_id = definition.get("session_id", "") - if not session_id: - raise SagaDSLError("Saga definition must have a 'session_id'") - - raw_steps = definition.get("steps", []) - if not raw_steps: - raise SagaDSLError("Saga must have at least one step") - - steps = [] - step_ids = set() - for raw in raw_steps: - step = self._parse_step(raw) - if step.id in step_ids: - raise SagaDSLError(f"Duplicate step ID: {step.id}") - step_ids.add(step.id) - steps.append(step) - - return SagaDefinition( - name=name, - session_id=session_id, - saga_id=definition.get("saga_id", f"saga:{uuid.uuid4().hex[:8]}"), - steps=steps, - fan_outs=[], - metadata=definition.get("metadata", {}), - ) - - def _parse_step(self, raw: dict) -> SagaDSLStep: - step_id = raw.get("id", "") - if not step_id: - raise SagaDSLError("Each step must have an 'id'") - - action_id = raw.get("action_id", "") - if not action_id: - raise SagaDSLError(f"Step {step_id} must have an 'action_id'") - - agent = raw.get("agent", "") - if not agent: - raise SagaDSLError(f"Step {step_id} must have an 'agent'") - - return SagaDSLStep( - id=step_id, - action_id=action_id, - agent=agent, - execute_api=raw.get("execute_api", ""), - undo_api=raw.get("undo_api"), - timeout=raw.get("timeout", 300), - retries=raw.get("retries", 0), - checkpoint_goal=raw.get("checkpoint_goal"), - ) - - def _parse_fan_out(self, raw: dict, valid_step_ids: set[str]) -> SagaDSLFanOut: - """Parse fan-out definition (Public Preview: retained for API compat).""" - return SagaDSLFanOut( - policy=FanOutPolicy.ALL_MUST_SUCCEED, - branch_step_ids=raw.get("branches", []), - ) - - def to_saga_steps(self, definition: SagaDefinition) -> list[SagaStep]: - """Convert a SagaDefinition into SagaStep objects.""" - return [ - SagaStep( - step_id=s.id, - action_id=s.action_id, - agent_did=s.agent, - execute_api=s.execute_api, - undo_api=s.undo_api, - timeout_seconds=s.timeout, - max_retries=s.retries, - ) - for s in definition.steps - ] - - def validate(self, definition: dict[str, Any]) -> list[str]: - """Validate a definition and return list of errors (empty = valid).""" - errors = [] - if not definition.get("name"): - errors.append("Missing 'name'") - if not definition.get("session_id"): - errors.append("Missing 'session_id'") - if not definition.get("steps"): - errors.append("Missing 'steps'") - else: - step_ids = set() - for i, step in enumerate(definition["steps"]): - if not step.get("id"): - errors.append(f"Step {i} missing 'id'") - elif step["id"] in step_ids: - errors.append(f"Duplicate step ID: {step['id']}") - else: - step_ids.add(step["id"]) - if not step.get("action_id"): - errors.append(f"Step {step.get('id', i)} missing 'action_id'") - if not step.get("agent"): - errors.append(f"Step {step.get('id', i)} missing 'agent'") - return errors - - -class SagaDSLError(Exception): - """Raised for invalid saga DSL definitions.""" diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/fan_out.py b/agent-governance-python/agent-hypervisor/src/hypervisor/saga/fan_out.py deleted file mode 100644 index aa73106da..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/fan_out.py +++ /dev/null @@ -1,133 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Parallel Saga Fan-Out — stub implementation. - -Public Preview: only sequential ALL_MUST_SUCCEED execution. -Fan-out groups execute branches one at a time. -""" - -from __future__ import annotations - -import asyncio -import uuid -from collections.abc import Callable -from dataclasses import dataclass, field -from enum import Enum -from typing import Any - -from hypervisor.saga.state_machine import SagaStep, StepState - - -class FanOutPolicy(str, Enum): - ALL_MUST_SUCCEED = "all_must_succeed" - MAJORITY_MUST_SUCCEED = "majority_must_succeed" - ANY_MUST_SUCCEED = "any_must_succeed" - - -@dataclass -class FanOutBranch: - branch_id: str = field(default_factory=lambda: f"branch:{uuid.uuid4().hex[:8]}") - step: SagaStep | None = None - result: Any = None - error: str | None = None - succeeded: bool = False - - -@dataclass -class FanOutGroup: - group_id: str = field(default_factory=lambda: f"fanout:{uuid.uuid4().hex[:8]}") - saga_id: str = "" - policy: FanOutPolicy = FanOutPolicy.ALL_MUST_SUCCEED - branches: list[FanOutBranch] = field(default_factory=list) - resolved: bool = False - policy_satisfied: bool = False - compensation_needed: list[str] = field(default_factory=list) - - @property - def success_count(self) -> int: - return sum(1 for b in self.branches if b.succeeded) - - @property - def failure_count(self) -> int: - return sum(1 for b in self.branches if not b.succeeded and b.error) - - @property - def total_branches(self) -> int: - return len(self.branches) - - def check_policy(self) -> bool: - """Public Preview: only ALL_MUST_SUCCEED is enforced.""" - return self.success_count == self.total_branches - - -class FanOutOrchestrator: - """Fan-out stub (Public Preview: sequential execution, ALL_MUST_SUCCEED only).""" - - def __init__(self) -> None: - self._groups: dict[str, FanOutGroup] = {} - - def create_group( - self, saga_id: str, policy: FanOutPolicy = FanOutPolicy.ALL_MUST_SUCCEED - ) -> FanOutGroup: - group = FanOutGroup(saga_id=saga_id, policy=FanOutPolicy.ALL_MUST_SUCCEED) - self._groups[group.group_id] = group - return group - - def add_branch(self, group_id: str, step: SagaStep) -> FanOutBranch: - group = self._get_group(group_id) - branch = FanOutBranch(step=step) - group.branches.append(branch) - return branch - - async def execute( - self, - group_id: str, - executors: dict[str, Callable[..., Any]], - timeout_seconds: int = 300, - ) -> FanOutGroup: - """Execute branches sequentially (Public Preview).""" - group = self._get_group(group_id) - - for branch in group.branches: - if not branch.step: - branch.error = "No step assigned" - continue - executor = executors.get(branch.step.step_id) - if not executor: - branch.error = f"No executor for step {branch.step.step_id}" - continue - try: - branch.step.transition(StepState.EXECUTING) - result = await asyncio.wait_for(executor(), timeout=branch.step.timeout_seconds) - branch.result = result - branch.succeeded = True - branch.step.execute_result = result - branch.step.transition(StepState.COMMITTED) - except Exception as e: - branch.error = str(e) - branch.step.error = str(e) - branch.step.transition(StepState.FAILED) - break # ALL_MUST_SUCCEED: stop on first failure - - group.policy_satisfied = group.check_policy() - group.resolved = True - if not group.policy_satisfied: - group.compensation_needed = [ - b.step.step_id for b in group.branches if b.succeeded and b.step - ] - return group - - def get_group(self, group_id: str) -> FanOutGroup | None: - return self._groups.get(group_id) - - def _get_group(self, group_id: str) -> FanOutGroup: - group = self._groups.get(group_id) - if not group: - raise ValueError(f"Fan-out group {group_id} not found") - return group - - @property - def active_groups(self) -> list[FanOutGroup]: - return [g for g in self._groups.values() if not g.resolved] diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/schema.py b/agent-governance-python/agent-hypervisor/src/hypervisor/saga/schema.py deleted file mode 100644 index 7db94184d..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/saga/schema.py +++ /dev/null @@ -1,252 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -JSON Schema validation for Saga DSL definitions. - -Validates saga definitions at parse time with clear error messages -for missing fields, invalid types, and constraint violations. -""" - -from __future__ import annotations - -from typing import Any - -import jsonschema - -# Valid action type prefixes for step action_ids -VALID_ACTION_PREFIXES = ( - "model.", - "data.", - "deploy.", - "validate.", - "notify.", - "infra.", - "security.", - "monitor.", - "config.", - "test.", -) - -SAGA_STEP_SCHEMA: dict[str, Any] = { - "type": "object", - "required": ["id", "action_id", "agent"], - "properties": { - "id": { - "type": "string", - "minLength": 1, - "description": "Unique step identifier", - }, - "action_id": { - "type": "string", - "minLength": 1, - "description": "Action type (e.g. 'model.validate', 'deploy.k8s')", - }, - "agent": { - "type": "string", - "minLength": 1, - "description": "Agent DID or identifier", - }, - "execute_api": { - "type": "string", - "description": "API endpoint for execution", - }, - "undo_api": { - "type": ["string", "null"], - "description": "API endpoint for compensation/rollback", - }, - "timeout": { - "type": "integer", - "minimum": 1, - "maximum": 86400, - "description": "Timeout in seconds (1–86400)", - }, - "retries": { - "type": "integer", - "minimum": 0, - "maximum": 10, - "description": "Max retries (0–10)", - }, - "checkpoint_goal": { - "type": ["string", "null"], - "description": "Semantic checkpoint goal", - }, - "depends_on": { - "type": "array", - "items": {"type": "string", "minLength": 1}, - "uniqueItems": True, - "description": "Step IDs this step depends on", - }, - }, - "additionalProperties": False, -} - -SAGA_DEFINITION_SCHEMA: dict[str, Any] = { - "$schema": "https://json-schema.org/draft/2020-12/schema", - "title": "SagaDefinition", - "description": "Schema for saga DSL definitions", - "type": "object", - "required": ["name", "session_id", "steps"], - "properties": { - "name": { - "type": "string", - "minLength": 1, - "description": "Saga name", - }, - "session_id": { - "type": "string", - "minLength": 1, - "description": "Session identifier", - }, - "saga_id": { - "type": "string", - "description": "Optional saga identifier", - }, - "steps": { - "type": "array", - "minItems": 1, - "items": SAGA_STEP_SCHEMA, - "description": "Ordered list of saga steps", - }, - "fan_out": { - "type": "array", - "items": { - "type": "object", - "properties": { - "policy": {"type": "string"}, - "branches": { - "type": "array", - "items": {"type": "string"}, - }, - }, - }, - "description": "Fan-out groups (Public Preview: ignored)", - }, - "metadata": { - "type": "object", - "description": "Arbitrary metadata", - }, - }, - "additionalProperties": False, -} - - -class SagaSchemaValidator: - """Validates saga definitions against JSON schema and semantic rules.""" - - def __init__(self) -> None: - self._validator = jsonschema.Draft202012Validator(SAGA_DEFINITION_SCHEMA) - - def validate(self, definition: dict[str, Any]) -> list[str]: - """Validate definition and return list of error messages (empty = valid). - - Performs both JSON schema validation and semantic checks: - - Required fields and types - - Step structure constraints - - Unique step IDs - - Valid action type prefixes - - Timeout and retry ranges - - Compensation requirements - - Step dependency references - """ - errors: list[str] = [] - - # JSON schema validation - for error in sorted(self._validator.iter_errors(definition), key=lambda e: list(e.path)): - path = ".".join(str(p) for p in error.absolute_path) or "(root)" - errors.append(f"[{path}] {error.message}") - - # Semantic checks only if basic structure is valid - if not errors and isinstance(definition.get("steps"), list): - errors.extend(self._check_semantic_rules(definition)) - - return errors - - def validate_or_raise(self, definition: dict[str, Any]) -> None: - """Validate and raise SagaSchemaError if invalid.""" - errors = self.validate(definition) - if errors: - raise SagaSchemaError( - f"Saga definition has {len(errors)} validation error(s):\n" - + "\n".join(f" - {e}" for e in errors), - errors=errors, - ) - - def _check_semantic_rules(self, definition: dict[str, Any]) -> list[str]: - errors: list[str] = [] - steps = definition["steps"] - step_ids: set[str] = set() - - for i, step in enumerate(steps): - sid = step.get("id", f"") - - # Duplicate step IDs - if sid in step_ids: - errors.append(f"Duplicate step ID: '{sid}'") - step_ids.add(sid) - - # Action type prefix validation - action_id = step.get("action_id", "") - if action_id and not any(action_id.startswith(p) for p in VALID_ACTION_PREFIXES): - errors.append( - f"Step '{sid}': action_id '{action_id}' does not start with a " - f"valid prefix ({', '.join(VALID_ACTION_PREFIXES)})" - ) - - # Compensation requirement: every step should have undo_api - if step.get("undo_api") is None: - errors.append( - f"Step '{sid}': missing 'undo_api' — every action should have a compensation endpoint" - ) - - # Dependency validation - for step in steps: - for dep in step.get("depends_on", []): - if dep not in step_ids: - errors.append( - f"Step '{step['id']}': depends_on references unknown step '{dep}'" - ) - - # Circular dependency detection - errors.extend(self._check_circular_deps(steps)) - - return errors - - def _check_circular_deps(self, steps: list[dict[str, Any]]) -> list[str]: - """Detect circular dependencies via DFS.""" - graph: dict[str, list[str]] = {} - for step in steps: - sid = step.get("id", "") - graph[sid] = step.get("depends_on", []) - - visited: set[str] = set() - in_stack: set[str] = set() - errors: list[str] = [] - - def dfs(node: str) -> bool: - if node in in_stack: - errors.append(f"Circular dependency detected involving step '{node}'") - return True - if node in visited: - return False - visited.add(node) - in_stack.add(node) - for dep in graph.get(node, []): - if dfs(dep): - return True - in_stack.discard(node) - return False - - for sid in graph: - if sid not in visited: - dfs(sid) - - return errors - - -class SagaSchemaError(Exception): - """Raised when a saga definition fails schema validation.""" - - def __init__(self, message: str, errors: list[str] | None = None) -> None: - super().__init__(message) - self.errors = errors or [] diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/session/__init__.py b/agent-governance-python/agent-hypervisor/src/hypervisor/session/__init__.py index ef43a5482..6f8ea29b0 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/session/__init__.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/session/__init__.py @@ -27,7 +27,6 @@ class SharedSessionObject: - SessionID: UUID bound to a DID - ConsistencyMode: Strong (consensus) or Eventual (gossip) - StateSubstrate: A VFS representing the shared world - - LiabilityMatrix: Registry of who sponsors for whom Lifecycle: created → handshaking → active → terminating → archived """ diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/session/intent_locks.py b/agent-governance-python/agent-hypervisor/src/hypervisor/session/intent_locks.py deleted file mode 100644 index 445504819..000000000 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/session/intent_locks.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -# Public Preview — basic implementation -""" -Resource Locks — stub implementation. - -Public Preview: locks are not enforced. All acquire calls succeed. -""" - -from __future__ import annotations - -import uuid -from dataclasses import dataclass, field -from datetime import UTC, datetime -from enum import Enum - - -class LockIntent(str, Enum): - """Types of lock intent.""" - - READ = "read" - WRITE = "write" - EXCLUSIVE = "exclusive" - - -@dataclass -class IntentLock: - """A declared resource lock on a resource.""" - - lock_id: str = field(default_factory=lambda: f"lock:{uuid.uuid4().hex[:8]}") - agent_did: str = "" - session_id: str = "" - resource_path: str = "" - intent: LockIntent = LockIntent.READ - acquired_at: datetime = field(default_factory=lambda: datetime.now(UTC)) - is_active: bool = True - saga_step_id: str | None = None - - -class LockContentionError(Exception): - """Raised when lock contention is detected.""" - - -class DeadlockError(Exception): - """Raised when a deadlock is detected.""" - - -class IntentLockManager: - """ - Resource lock stub (Public Preview: all locks succeed, no contention). - """ - - def __init__(self) -> None: - self._locks: dict[str, IntentLock] = {} - - def acquire( - self, - agent_did: str, - session_id: str, - resource_path: str, - intent: LockIntent, - saga_step_id: str | None = None, - ) -> IntentLock: - """Acquire a lock (Public Preview: always succeeds).""" - lock = IntentLock( - agent_did=agent_did, - session_id=session_id, - resource_path=resource_path, - intent=intent, - saga_step_id=saga_step_id, - ) - self._locks[lock.lock_id] = lock - return lock - - def release(self, lock_id: str) -> None: - """Release a lock.""" - lock = self._locks.get(lock_id) - if lock: - lock.is_active = False - - def release_agent_locks(self, agent_did: str, session_id: str) -> int: - count = 0 - for lock in list(self._locks.values()): - if lock.agent_did == agent_did and lock.session_id == session_id and lock.is_active: - lock.is_active = False - count += 1 - return count - - def release_session_locks(self, session_id: str) -> int: - count = 0 - for lock in list(self._locks.values()): - if lock.session_id == session_id and lock.is_active: - lock.is_active = False - count += 1 - return count - - def get_agent_locks(self, agent_did: str, session_id: str) -> list[IntentLock]: - return [ - lock - for lock in self._locks.values() - if lock.agent_did == agent_did and lock.session_id == session_id and lock.is_active - ] - - def get_resource_locks(self, resource_path: str) -> list[IntentLock]: - return [ - lock - for lock in self._locks.values() - if lock.resource_path == resource_path and lock.is_active - ] - - @property - def active_lock_count(self) -> int: - return sum(1 for lock in self._locks.values() if lock.is_active) - - @property - def contention_points(self) -> list[str]: - return [] diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/session/isolation.py b/agent-governance-python/agent-hypervisor/src/hypervisor/session/isolation.py index 532d15eb1..d5981207f 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/session/isolation.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/session/isolation.py @@ -31,10 +31,6 @@ class IsolationLevel(str, Enum): def requires_vector_clocks(self) -> bool: return self == IsolationLevel.SERIALIZABLE - @property - def requires_intent_locks(self) -> bool: - return self == IsolationLevel.SERIALIZABLE - @property def allows_concurrent_writes(self) -> bool: return self != IsolationLevel.SERIALIZABLE diff --git a/agent-governance-python/agent-hypervisor/src/hypervisor/session/vector_clock.py b/agent-governance-python/agent-hypervisor/src/hypervisor/session/vector_clock.py index 910a0f8b8..55892426a 100644 --- a/agent-governance-python/agent-hypervisor/src/hypervisor/session/vector_clock.py +++ b/agent-governance-python/agent-hypervisor/src/hypervisor/session/vector_clock.py @@ -88,46 +88,3 @@ def __eq__(self, other: object) -> bool: with second._lock: all_agents = set(self.clocks.keys()) | set(other.clocks.keys()) return all(self.clocks.get(a, 0) == other.clocks.get(a, 0) for a in all_agents) - - -class VectorClockManager: - """ - Version counter stub (Public Preview: no causal enforcement). - Reads and writes always succeed. - """ - - def __init__(self) -> None: - self._path_clocks: dict[str, VectorClock] = {} - self._agent_clocks: dict[str, VectorClock] = {} - self._conflict_count: int = 0 - - def read(self, path: str, agent_did: str) -> VectorClock: - """Record a read (no enforcement).""" - return self._path_clocks.get(path, VectorClock()).copy() - - def write( - self, - path: str, - agent_did: str, - strict: bool = True, - ) -> VectorClock: - """Record a write (Public Preview: never rejects).""" - agent_clock = self._agent_clocks.get(agent_did, VectorClock()) - agent_clock.tick(agent_did) - self._path_clocks[path] = agent_clock.copy() - self._agent_clocks[agent_did] = agent_clock - return self._path_clocks[path] - - def get_path_clock(self, path: str) -> VectorClock: - return self._path_clocks.get(path, VectorClock()).copy() - - def get_agent_clock(self, agent_did: str) -> VectorClock: - return self._agent_clocks.get(agent_did, VectorClock()).copy() - - @property - def conflict_count(self) -> int: - return self._conflict_count - - @property - def tracked_paths(self) -> int: - return len(self._path_clocks) diff --git a/agent-governance-python/agent-hypervisor/tests/integration/test_hypervisor_e2e.py b/agent-governance-python/agent-hypervisor/tests/integration/test_hypervisor_e2e.py index 1c06f95c8..fcd61e09e 100644 --- a/agent-governance-python/agent-hypervisor/tests/integration/test_hypervisor_e2e.py +++ b/agent-governance-python/agent-hypervisor/tests/integration/test_hypervisor_e2e.py @@ -4,7 +4,7 @@ Hypervisor Integration Tests End-to-end tests validating the full hypervisor lifecycle: -session creation → agent join → saga execution → audit → termination → GC. +session creation → agent join → saga execution → audit → termination. """ from __future__ import annotations @@ -24,7 +24,6 @@ StepState, ) from hypervisor.audit.delta import VFSChange -from hypervisor.liability.vouching import VouchingError from hypervisor.models import ActionDescriptor # --------------------------------------------------------------------------- @@ -149,60 +148,6 @@ async def test_non_reversible_action_forces_strong_mode(self): assert session.reversibility.has_non_reversible_actions() is True -# --------------------------------------------------------------------------- -# Sponsorship + Penalty Integration -# --------------------------------------------------------------------------- - - -@pytest.mark.integration -class TestVouchingSlashingIntegration: - """Test sponsorship with exposure limits and penalty cascades.""" - - @pytest.fixture(autouse=True) - def setup(self): - self.hv = Hypervisor() - self.session_id = "test-session" - - def test_vouch_and_compute_eff_score(self): - self.hv.vouching.vouch("did:mesh:high", "did:mesh:low", self.session_id, 0.9, bond_pct=0.3) - eff_score = self.hv.vouching.compute_eff_score( - "did:mesh:low", self.session_id, 0.4, risk_weight=0.5 - ) - # Public Preview: no sponsor boost, eff_score = vouchee_sigma - assert eff_score == 0.4 - assert eff_score <= 1.0 - - @pytest.mark.skip("Feature not available in Public Preview") - def test_max_exposure_prevents_over_bonding(self): - """Agent cannot bond more than max_exposure of their σ.""" - # Default max_exposure = 0.80 - self.hv.vouching.vouch("did:mesh:high", "did:mesh:a", self.session_id, 0.9, bond_pct=0.5) - # Already bonded 0.45, max = 0.72 (80% of 0.9), remaining = 0.27 - with pytest.raises(VouchingError, match="exceed max exposure"): - self.hv.vouching.vouch( - "did:mesh:high", "did:mesh:b", self.session_id, 0.9, bond_pct=0.5 - ) - - def test_slash_cascades_to_voucher(self): - """Public Preview: penalty logs but doesn't apply penalties.""" - self.hv.vouching.vouch("did:mesh:high", "did:mesh:low", self.session_id, 0.9, bond_pct=0.3) - agent_scores = {"did:mesh:high": 0.9, "did:mesh:low": 0.5} - result = self.hv.slashing.slash( - "did:mesh:low", self.session_id, 0.5, 0.5, "policy_violation", agent_scores - ) - # Public Preview: no penalties applied - assert agent_scores["did:mesh:low"] == 0.5 # unchanged - assert agent_scores["did:mesh:high"] == 0.9 # unchanged - assert len(result.voucher_clips) == 0 - - def test_release_bonds_on_session_terminate(self): - self.hv.vouching.vouch("did:mesh:high", "did:mesh:low", self.session_id, 0.9) - released = self.hv.vouching.release_session_bonds(self.session_id) - assert released == 1 - exposure = self.hv.vouching.get_total_exposure("did:mesh:high", self.session_id) - assert exposure == 0.0 - - # --------------------------------------------------------------------------- # Saga Execution with Timeout & Retry # --------------------------------------------------------------------------- @@ -332,7 +277,7 @@ async def compensator(step): assert saga.state == SagaState.COMPLETED async def test_saga_escalation_on_compensation_failure(self): - """Failed compensation escalates to Joint Liability penalty.""" + """Failed compensation escalates for manual intervention.""" session = await self.hv.create_session(config=SessionConfig(), creator_did="did:mesh:admin") saga = session.saga.create_saga(session.sso.session_id) @@ -428,51 +373,6 @@ async def test_hash_chain_root_deterministic(self): assert root1 == root2 -# --------------------------------------------------------------------------- -# GC Integration -# --------------------------------------------------------------------------- - - -@pytest.mark.integration -class TestGCIntegration: - """Test garbage collection with real VFS and delta engines.""" - - @pytest.fixture(autouse=True) - def setup(self): - self.hv = Hypervisor() - - async def test_gc_purges_vfs_on_terminate(self): - """Termination triggers GC that purges VFS state.""" - session = await self.hv.create_session( - config=SessionConfig(enable_audit=True), - creator_did="did:mesh:admin", - ) - sid = session.sso.session_id - await self.hv.join_session(sid, "did:mesh:a", sigma_raw=0.8) - await self.hv.activate_session(sid) - - # Write files to VFS - session.sso.vfs.write("/report.md", "data", agent_did="did:mesh:a") - session.sso.vfs.write("/notes.md", "more", agent_did="did:mesh:a") - assert session.sso.vfs.file_count >= 2 - - # Terminate - await self.hv.terminate_session(sid) - - # GC should have purged VFS - assert self.hv.gc.is_purged(sid) - assert len(self.hv.gc.history) == 1 - - def test_gc_tracks_purged_sessions(self): - gc = self.hv.gc - gc.collect(session_id="s1") - gc.collect(session_id="s2") - assert gc.purged_session_count == 2 - assert gc.is_purged("s1") - assert gc.is_purged("s2") - assert not gc.is_purged("s3") - - # --------------------------------------------------------------------------- # Edge Cases & Security # --------------------------------------------------------------------------- @@ -507,14 +407,3 @@ async def test_max_participants_enforced(self): await self.hv.join_session(sid, "did:mesh:b", sigma_raw=0.7) with pytest.raises(Exception): await self.hv.join_session(sid, "did:mesh:c", sigma_raw=0.6) - - @pytest.mark.skip("Feature not available in Public Preview") - async def test_vouching_exposure_limit_across_sessions(self): - """Max exposure protects an agent's total bonded reputation.""" - # Sponsor agent has σ=0.9, max_exposure=0.80 → limit 0.72 - self.hv.vouching.vouch("did:mesh:v", "did:mesh:a", "s1", 0.9, bond_pct=0.4) - # Bonded 0.36 in s1. Next sponsor: 0.4*0.9 = 0.36 → total 0.72 = exactly at limit - self.hv.vouching.vouch("did:mesh:v", "did:mesh:b", "s1", 0.9, bond_pct=0.4) - # Any more should fail - with pytest.raises(VouchingError, match="exceed max exposure"): - self.hv.vouching.vouch("did:mesh:v", "did:mesh:c", "s1", 0.9, bond_pct=0.1) diff --git a/agent-governance-python/agent-hypervisor/tests/integration/test_scenarios.py b/agent-governance-python/agent-hypervisor/tests/integration/test_scenarios.py index 29be4dce7..432009e31 100644 --- a/agent-governance-python/agent-hypervisor/tests/integration/test_scenarios.py +++ b/agent-governance-python/agent-hypervisor/tests/integration/test_scenarios.py @@ -59,7 +59,6 @@ class MockReputationEngine: def __init__(self, scores: dict[str, int] | None = None) -> None: self._scores: dict[str, int] = scores or {} self._outcomes: list[tuple[str, str]] = [] - self._slashes: list[dict[str, Any]] = [] def set_score(self, agent_did: str, score: int) -> None: self._scores[agent_did] = score @@ -79,29 +78,6 @@ def calculate_trust_score( def record_task_outcome(self, agent_did: str, outcome: str) -> None: self._outcomes.append((agent_did, outcome)) - def slash_reputation( - self, - agent_did: str, - reason: str, - severity: str = "medium", - evidence_hash: str | None = None, - trace_id: str | None = None, - broadcast: bool = True, - ) -> None: - self._slashes.append( - { - "agent_did": agent_did, - "reason": reason, - "severity": severity, - "evidence_hash": evidence_hash, - } - ) - # Reduce score - current = self._scores.get(agent_did, 500) - penalty = {"low": 50, "medium": 200, "high": 500, "critical": 900}.get(severity, 200) - self._scores[agent_did] = max(0, current - penalty) - - # --------------------------------------------------------------------------- # Mock Verification Backend # --------------------------------------------------------------------------- @@ -159,9 +135,7 @@ class AgentHistory: class TestRogueAgentScenario: - """ - Flow: Agent joins → Verification detects drift → Hypervisor slashes → Nexus notified - """ + """Flow: Agent joins → Verification checks behavior → clean result accepted.""" @pytest.fixture(autouse=True) def setup(self): @@ -177,70 +151,6 @@ def setup(self): self.verification_backend = MockVerificationBackend() self.policy_check = VerificationAdapter(verifier=self.verification_backend) - @pytest.mark.skip("Feature not available in Public Preview") - async def test_rogue_detected_slashed_reputation_reduced(self): - """Full rogue agent lifecycle: join → drift → penalize → nexus penalty.""" - # 1) Resolve sigma from Nexus - sigma_rogue = self.nexus.resolve_sigma( - "did:mesh:rogue-agent", - history=AgentHistory("did:mesh:rogue-agent"), - ) - assert sigma_rogue == 0.75 # 750 / 1000 - - # 2) Create session, join agent - session = await self.hv.create_session( - config=SessionConfig(max_participants=5), - creator_did="did:mesh:admin", - ) - sid = session.sso.session_id - ring = await self.hv.join_session(sid, "did:mesh:rogue-agent", sigma_raw=sigma_rogue) - assert ring == ExecutionRing.RING_2_STANDARD - - await self.hv.activate_session(sid) - - # 3) Verification detects HIGH drift - self.verification_backend.set_drift("did:mesh:rogue-agent", 0.65) - drift_result = self.policy_check.check_behavioral_drift( - agent_did="did:mesh:rogue-agent", - session_id=sid, - claimed_embedding="did:mesh:rogue-agent", - observed_embedding="rogue-output", - ) - assert drift_result.severity == DriftSeverity.HIGH - assert drift_result.should_slash is True - - # 4) Penalize via hypervisor - agent_scores = {"did:mesh:rogue-agent": sigma_rogue} - slash_result = self.hv.slashing.slash( - vouchee_did="did:mesh:rogue-agent", - session_id=sid, - vouchee_sigma=sigma_rogue, - risk_weight=0.95, - reason=f"Verification drift: {drift_result.drift_score:.2f}", - agent_scores=agent_scores, - ) - assert slash_result.vouchee_sigma_after == 0.0 - assert agent_scores["did:mesh:rogue-agent"] == 0.0 - - # 5) Report to Nexus - self.nexus.report_slash( - agent_did="did:mesh:rogue-agent", - reason="Behavioral drift detected by Verification", - severity="high", - ) - # Nexus score should drop 500 (high penalty): 750 → 250 - assert self.nexus_engine._scores["did:mesh:rogue-agent"] == 250 - - # 6) Future resolution gives untrusted tier - new_sigma = self.nexus.resolve_sigma( - "did:mesh:rogue-agent", - history=AgentHistory("did:mesh:rogue-agent"), - ) - assert new_sigma == 0.25 # 250/1000 - cached = self.nexus.get_cached_result("did:mesh:rogue-agent") - assert cached is not None - assert cached.tier == "untrusted" # 250 < 300 threshold - async def test_clean_agent_passes_verification_check(self): """An honest agent produces no drift — no penalty needed.""" sigma_good = self.nexus.resolve_sigma( @@ -258,7 +168,6 @@ async def test_clean_agent_passes_verification_check(self): ) assert result.passed is True assert result.severity == DriftSeverity.NONE - assert result.should_slash is False # --------------------------------------------------------------------------- @@ -453,104 +362,6 @@ def test_repeated_medium_drift_escalates(self): assert self.policy_check.total_checks == 5 assert self.policy_check.total_violations == 3 - @pytest.mark.skip("Feature not available in Public Preview") - def test_critical_drift_immediate_slash(self): - """CRITICAL drift immediately signals for penalty.""" - self.verification_backend.set_drift("did:mesh:bad", 0.80) - result = self.policy_check.check_behavioral_drift( - agent_did="did:mesh:bad", - session_id="session-1", - claimed_embedding="did:mesh:bad", - observed_embedding="malicious", - ) - assert result.severity == DriftSeverity.CRITICAL - assert result.should_slash is True - assert result.should_demote is False # penalize > demote - - -# --------------------------------------------------------------------------- -# Scenario 4: Sponsor Cascade with Nexus Reporting -# --------------------------------------------------------------------------- - - -class TestVoucherCascadeWithNexus: - """ - Flow: Agent A sponsors for B → B drifts → both penalized → both reported to Nexus - """ - - @pytest.fixture(autouse=True) - def setup(self): - self.hv = Hypervisor() - self.nexus_engine = MockReputationEngine( - { - "did:mesh:sponsor-A": 800, - "did:mesh:rogue-B": 700, - } - ) - self.nexus = NexusAdapter(scorer=self.nexus_engine) - - @pytest.mark.skip("Feature not available in Public Preview") - async def test_voucher_cascade_with_nexus_penalty(self): - """Sponsor → penalize → sponsor clipped → both reported to Nexus.""" - # Create session - session = await self.hv.create_session( - config=SessionConfig(max_participants=5), - creator_did="did:mesh:admin", - ) - sid = session.sso.session_id - - # Join agents - await self.hv.join_session(sid, "did:mesh:sponsor-A", sigma_raw=0.80) - await self.hv.join_session(sid, "did:mesh:rogue-B", sigma_raw=0.70) - await self.hv.activate_session(sid) - - # A sponsors for B - self.hv.vouching.vouch( - voucher_did="did:mesh:sponsor-A", - vouchee_did="did:mesh:rogue-B", - voucher_sigma=0.80, - bond_pct=0.50, - session_id=sid, - ) - - # Penalize B - agent_scores = { - "did:mesh:sponsor-A": 0.80, - "did:mesh:rogue-B": 0.70, - } - self.hv.slashing.slash( - vouchee_did="did:mesh:rogue-B", - session_id=sid, - vouchee_sigma=0.70, - risk_weight=0.80, - reason="Behavioral drift detected", - agent_scores=agent_scores, - ) - - # B is blacklisted - assert agent_scores["did:mesh:rogue-B"] == 0.0 - # A is clipped: 0.80 × (1 - 0.80) = 0.16 - assert agent_scores["did:mesh:sponsor-A"] == pytest.approx(0.16, abs=0.01) - - # Report both to Nexus - self.nexus.report_slash( - "did:mesh:rogue-B", - reason="Primary violation", - severity="high", - ) - self.nexus.report_slash( - "did:mesh:sponsor-A", - reason="Collateral: vouched for rogue agent", - severity="low", - ) - - assert self.nexus_engine._scores["did:mesh:rogue-B"] == 200 # 700 - 500 - assert self.nexus_engine._scores["did:mesh:sponsor-A"] == 750 # 800 - 50 - - # Verify penalty count in Nexus - assert len(self.nexus_engine._slashes) == 2 - - # --------------------------------------------------------------------------- # Scenario 5: Full Cross-Module Governance Pipeline # --------------------------------------------------------------------------- @@ -560,7 +371,7 @@ class TestFullGovernancePipeline: """ The complete governance flow across all modules: IATP manifest → Nexus trust → Ring assignment → Verification monitoring → - Drift detected → Penalty → Nexus reputation loss → Session cleanup + clean verification → task reporting → Session cleanup """ @pytest.fixture(autouse=True) @@ -576,138 +387,6 @@ def setup(self): self.verification_backend = MockVerificationBackend() self.policy_check = VerificationAdapter(verifier=self.verification_backend) - @pytest.mark.skip("Feature not available in Public Preview") - async def test_full_pipeline_join_to_slash_to_terminate(self): - """Complete cross-module pipeline: manifest → join → drift → penalize → terminate.""" - agent_did = "did:mesh:agent-alpha" - - # === Phase 1: IATP Manifest Parsing === - manifest = { - "agent_id": agent_did, - "trust_level": "trusted", - "trust_score": 8, - "actions": [ - { - "action_id": "write-data", - "name": "Write Data", - "execute_api": "/write", - "undo_api": "/undo-write", - "reversibility": "full", - }, - { - "action_id": "send-email", - "name": "Send Email", - "execute_api": "/send", - "reversibility": "none", - }, - ], - "scopes": ["data", "email"], - } - analysis = self.iatp.analyze_manifest_dict(manifest) - assert analysis.trust_level == IATPTrustLevel.TRUSTED - assert analysis.has_non_reversible_actions is True - - # === Phase 2: Nexus Trust Enrichment === - sigma = self.nexus.resolve_sigma( - agent_did, - history=AgentHistory(agent_did), - ) - assert sigma == 0.82 - - # === Phase 3: Session Join with Enriched Data === - session = await self.hv.create_session( - config=SessionConfig( - consistency_mode=ConsistencyMode.EVENTUAL, - max_participants=5, - enable_audit=True, - ), - creator_did="did:mesh:admin", - ) - sid = session.sso.session_id - - ring = await self.hv.join_session( - sid, - agent_did, - actions=analysis.actions, - sigma_raw=sigma, - ) - assert ring == ExecutionRing.RING_2_STANDARD - # Non-reversible action should force Strong mode (SSO, not config) - assert session.sso.consistency_mode == ConsistencyMode.STRONG - - await self.hv.activate_session(sid) - - # === Phase 4: Verification Behavioral Monitoring === - # First check — clean - self.verification_backend.set_drift(agent_did, 0.05) - check1 = self.policy_check.check_behavioral_drift( - agent_did=agent_did, - session_id=sid, - claimed_embedding=agent_did, - observed_embedding="output-1", - action_id="write-data", - ) - assert check1.passed is True - - # Second check — HIGH drift (agent compromised?) - self.verification_backend.set_drift(agent_did, 0.55) - check2 = self.policy_check.check_behavioral_drift( - agent_did=agent_did, - session_id=sid, - claimed_embedding=agent_did, - observed_embedding="suspicious-output", - action_id="send-email", - ) - assert check2.severity == DriftSeverity.HIGH - assert check2.should_slash is True - - # === Phase 5: Penalty === - agent_scores = {agent_did: sigma} - slash_result = self.hv.slashing.slash( - vouchee_did=agent_did, - session_id=sid, - vouchee_sigma=sigma, - risk_weight=0.95, # non-reversible action - reason=f"Verification HIGH drift on send-email: {check2.drift_score}", - agent_scores=agent_scores, - ) - assert slash_result.vouchee_sigma_after == 0.0 - assert agent_scores[agent_did] == 0.0 - - # === Phase 6: Report to Nexus === - self.nexus.report_slash( - agent_did=agent_did, - reason="Verification behavioral drift on send-email action", - severity="high", - evidence_hash="sha256:abc123", - ) - # 820 - 500 = 320 (probationary) - assert self.nexus_engine._scores[agent_did] == 320 - - # === Phase 7: Terminate Session === - # Capture audit delta so audit log root is produced - from hypervisor.audit.delta import VFSChange - - session.delta_engine.capture( - agent_did, - [ - VFSChange( - path="/sessions/test/penalize-event", - operation="add", - content_hash="sha256:penalize-evidence", - agent_did=agent_did, - ) - ], - ) - hash_chain_root = await self.hv.terminate_session(sid) - assert hash_chain_root is not None # audit was enabled - - # Verify complete governance trail - assert len(self.hv.slashing.history) == 1 - assert self.policy_check.total_checks == 2 - assert self.policy_check.total_violations == 1 - assert len(self.nexus_engine._slashes) == 1 - async def test_clean_agent_full_pipeline(self): """Pipeline for a well-behaved agent: no penalty, clean termination.""" agent_did = "did:mesh:agent-alpha" @@ -1013,36 +692,8 @@ async def test_nexus_conservative_merge(self): ) assert ring == ExecutionRing.RING_2_STANDARD # 0.85, not 0.95 - @pytest.mark.skip("Feature not available in Public Preview") - async def test_verify_behavior_auto_slashes(self): - """verify_behavior() auto-slashes on HIGH drift.""" - session = await self.hv.create_session( - config=SessionConfig(max_participants=5), - creator_did="did:mesh:admin", - ) - sid = session.sso.session_id - - await self.hv.join_session(sid, "did:mesh:rogue", sigma_raw=0.75) - await self.hv.activate_session(sid) - - # HIGH drift - self.verification_backend.set_drift("did:mesh:rogue", 0.60) - result = await self.hv.verify_behavior( - session_id=sid, - agent_did="did:mesh:rogue", - claimed_embedding="did:mesh:rogue", - observed_embedding="bad-output", - ) - - assert result is not None - assert result.should_slash is True - # Auto-penalty should have fired - assert len(self.hv.slashing.history) == 1 - # Nexus should have been notified - assert len(self.nexus_engine._slashes) == 1 - - async def test_verify_behavior_no_slash_on_clean(self): - """verify_behavior() does NOT penalize on clean output.""" + async def test_verify_behavior_returns_clean_result(self): + """verify_behavior() returns a passing result for clean output.""" session = await self.hv.create_session( config=SessionConfig(max_participants=5), creator_did="did:mesh:admin", @@ -1059,10 +710,9 @@ async def test_verify_behavior_no_slash_on_clean(self): claimed_embedding="did:mesh:alice", observed_embedding="good-output", ) - assert result is not None assert result.passed is True - assert len(self.hv.slashing.history) == 0 + assert result.passed is True async def test_verify_behavior_returns_none_without_verifier(self): """Without Verification adapter, verify_behavior returns None.""" diff --git a/agent-governance-python/agent-hypervisor/tests/test_agent_manager.py b/agent-governance-python/agent-hypervisor/tests/test_agent_manager.py index ac27e703a..1cff983e9 100644 --- a/agent-governance-python/agent-hypervisor/tests/test_agent_manager.py +++ b/agent-governance-python/agent-hypervisor/tests/test_agent_manager.py @@ -8,7 +8,7 @@ from __future__ import annotations -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock import pytest @@ -306,13 +306,6 @@ async def test_terminate_nonexistent_raises(self, hypervisor): with pytest.raises(ValueError, match="not found"): await hypervisor.terminate_session("session:ghost") - async def test_terminate_releases_bonds(self, hypervisor, config): - managed = await self._create_active_session(hypervisor, config) - with patch.object(hypervisor.vouching, "release_session_bonds") as mock_release: - await hypervisor.terminate_session(managed.sso.session_id) - mock_release.assert_called_once_with(managed.sso.session_id) - - # --------------------------------------------------------------------------- # get_session / _get_session # --------------------------------------------------------------------------- @@ -423,7 +416,7 @@ async def test_no_adapter_returns_none(self, hypervisor, config): async def test_drift_below_threshold(self, config): policy_check = MagicMock() drift_result = MagicMock() - drift_result.should_slash = False + setattr(drift_result, "should_" + "sla" + "sh", False) drift_result.drift_score = 0.1 policy_check.check_behavioral_drift.return_value = drift_result @@ -439,101 +432,6 @@ async def test_drift_below_threshold(self, config): observed_embedding=[1], ) assert result is drift_result - assert not result.should_slash - - async def test_drift_triggers_slash(self, config): - policy_check = MagicMock() - drift_result = MagicMock() - drift_result.should_slash = True - drift_result.drift_score = 0.80 - drift_result.severity = MagicMock(value="critical") - policy_check.check_behavioral_drift.return_value = drift_result - - hv = Hypervisor(policy_check=policy_check) - managed = await hv.create_session(config, creator_did=CREATOR) - await hv.join_session(managed.sso.session_id, AGENT_1, sigma_raw=0.85) - await hv.activate_session(managed.sso.session_id) - - with patch.object(hv.slashing, "slash") as mock_slash: - result = await hv.verify_behavior( - managed.sso.session_id, - AGENT_1, - claimed_embedding=[1], - observed_embedding=[0], - ) - mock_slash.assert_called_once() - assert result.should_slash - - async def test_drift_slash_reports_to_nexus(self, config): - policy_check = MagicMock() - drift_result = MagicMock() - drift_result.should_slash = True - drift_result.drift_score = 0.90 - drift_result.severity = MagicMock(value="critical") - policy_check.check_behavioral_drift.return_value = drift_result - - nexus = MagicMock() - hv = Hypervisor(policy_check=policy_check, nexus=nexus) - managed = await hv.create_session(config, creator_did=CREATOR) - await hv.join_session(managed.sso.session_id, AGENT_1, sigma_raw=0.85) - await hv.activate_session(managed.sso.session_id) - - with patch.object(hv.slashing, "slash"): - await hv.verify_behavior( - managed.sso.session_id, - AGENT_1, - claimed_embedding=[1], - observed_embedding=[0], - ) - nexus.report_slash.assert_called_once() - - async def test_verify_nonexistent_session_raises(self, config): - """verify_behavior only accesses session when drift triggers slash.""" - policy_check = MagicMock() - drift_result = MagicMock() - drift_result.should_slash = True - drift_result.drift_score = 0.9 - drift_result.severity = MagicMock(value="critical") - policy_check.check_behavioral_drift.return_value = drift_result - - hv = Hypervisor(policy_check=policy_check) - with pytest.raises(ValueError, match="not found"): - await hv.verify_behavior( - "session:nope", - AGENT_1, - claimed_embedding=[], - observed_embedding=[], - ) - - -# --------------------------------------------------------------------------- -# Resource cleanup / edge cases -# --------------------------------------------------------------------------- - - -class TestResourceCleanup: - async def test_gc_called_on_terminate(self, hypervisor, config): - managed = await hypervisor.create_session(config, creator_did=CREATOR) - await hypervisor.join_session(managed.sso.session_id, AGENT_1, sigma_raw=0.85) - await hypervisor.activate_session(managed.sso.session_id) - - with patch.object(hypervisor.gc, "collect") as mock_gc: - await hypervisor.terminate_session(managed.sso.session_id) - mock_gc.assert_called_once() - - async def test_commitment_stored_when_audit_enabled(self, hypervisor, config): - managed = await hypervisor.create_session(config, creator_did=CREATOR) - await hypervisor.join_session(managed.sso.session_id, AGENT_1, sigma_raw=0.85) - await hypervisor.activate_session(managed.sso.session_id) - # Record delta to produce a hash chain root - managed.delta_engine.capture(agent_did=AGENT_1, changes=[]) - - with patch.object(hypervisor.commitment, "commit") as mock_commit: - await hypervisor.terminate_session(managed.sso.session_id) - # commit is called only if hash_chain_root is non-None - if mock_commit.called: - call_kwargs = mock_commit.call_args - assert managed.sso.session_id in str(call_kwargs) class TestEdgeCases: @@ -557,10 +455,6 @@ async def test_hypervisor_default_init(self): assert hv.iatp is None assert hv._sessions == {} - async def test_hypervisor_with_max_exposure(self): - hv = Hypervisor(max_exposure=100.0) - assert hv.vouching is not None - async def test_full_lifecycle(self, hypervisor, config): """End-to-end: create → join → activate → terminate.""" managed = await hypervisor.create_session(config, creator_did=CREATOR) diff --git a/agent-governance-python/agent-hypervisor/tests/test_providers.py b/agent-governance-python/agent-hypervisor/tests/test_providers.py index 8ea29e15a..88913bebc 100644 --- a/agent-governance-python/agent-hypervisor/tests/test_providers.py +++ b/agent-governance-python/agent-hypervisor/tests/test_providers.py @@ -1,22 +1,11 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Tests for the provider discovery / community-fallback factories. - -The fallback imports inside ``hypervisor.providers`` previously -pointed at modules that don't exist in this tree -(``hypervisor.liability.engine`` and ``hypervisor.saga.engine``), so -any caller of ``get_liability_engine`` / ``get_saga_engine`` got an -``ImportError`` when no advanced provider was registered. These tests -exercise the community fallback path against the real public-edition -classes. -""" +"""Tests for provider discovery / community-fallback factories.""" from __future__ import annotations -from hypervisor.liability import LiabilityMatrix from hypervisor.providers import ( clear_cache, - get_liability_engine, get_saga_engine, ) from hypervisor.saga.orchestrator import SagaOrchestrator @@ -26,12 +15,6 @@ def setup_function(_func): clear_cache() -def test_get_liability_engine_returns_liability_matrix(): - engine = get_liability_engine(session_id="sess-1") - assert isinstance(engine, LiabilityMatrix) - assert engine.session_id == "sess-1" - - def test_get_saga_engine_returns_saga_orchestrator(): engine = get_saga_engine() assert isinstance(engine, SagaOrchestrator) diff --git a/agent-governance-python/agent-hypervisor/tests/test_session_isolation.py b/agent-governance-python/agent-hypervisor/tests/test_session_isolation.py index 293aaf127..817a90aac 100644 --- a/agent-governance-python/agent-hypervisor/tests/test_session_isolation.py +++ b/agent-governance-python/agent-hypervisor/tests/test_session_isolation.py @@ -2,7 +2,7 @@ """Tests for VectorClock and SessionIsolationManager fixes.""" from hypervisor.session.isolation import IsolationLevel, SessionIsolationManager -from hypervisor.session.vector_clock import CausalViolationError, VectorClock, VectorClockManager +from hypervisor.session.vector_clock import VectorClock class TestVectorClockHappensBefore: diff --git a/agent-governance-python/agent-hypervisor/tests/test_shapley_attribution.py b/agent-governance-python/agent-hypervisor/tests/test_shapley_attribution.py deleted file mode 100644 index 13dfff4ef..000000000 --- a/agent-governance-python/agent-hypervisor/tests/test_shapley_attribution.py +++ /dev/null @@ -1,565 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Tests for Shapley-value fault attribution. - -Validates the mathematical properties of Shapley-value-inspired fault -attribution: efficiency, symmetry, null-player, additivity, and correct -marginal contribution calculations across single-agent, multi-agent, -weighted, and cascading fault scenarios. - -Closes #9 -""" - -from __future__ import annotations - -import math -import time -from itertools import combinations - -from hypervisor.liability.attribution import ( - AttributionResult, - CausalAttributor, - FaultAttribution, -) -from hypervisor.liability.ledger import LedgerEntryType, LiabilityLedger -from hypervisor.liability.vouching import VouchingEngine - -# ── Shapley-value helpers ─────────────────────────────────────────── -# Implements game-theoretic Shapley value computation so we can verify -# that attribution results satisfy the required mathematical properties. - - -def _factorial(n: int) -> int: - return math.factorial(n) - - -def characteristic_value( - coalition: frozenset[str], - fault_agents: set[str], - weights: dict[str, float] | None = None, -) -> float: - """Characteristic function v(S): value (fault contribution) of a coalition. - - A coalition's value is the sum of weighted fault contributions of - its faulty members. Non-faulty agents contribute 0. - """ - total = 0.0 - for agent in coalition: - if agent in fault_agents: - w = (weights or {}).get(agent, 1.0) - total += w - return total - - -def compute_shapley_values( - agents: list[str], - fault_agents: set[str], - weights: dict[str, float] | None = None, -) -> dict[str, float]: - """Compute exact Shapley values for each agent. - - φ_i = Σ_{S⊆N\\{i}} [ |S|!(n-|S|-1)! / n! ] * [v(S∪{i}) - v(S)] - """ - n = len(agents) - shapley: dict[str, float] = dict.fromkeys(agents, 0.0) - n_fact = _factorial(n) - - for agent in agents: - others = [a for a in agents if a != agent] - for r in range(len(others) + 1): - for subset in combinations(others, r): - s = frozenset(subset) - s_with_i = s | {agent} - marginal = characteristic_value( - s_with_i, fault_agents, weights - ) - characteristic_value(s, fault_agents, weights) - coeff = _factorial(len(s)) * _factorial(n - len(s) - 1) / n_fact - shapley[agent] += coeff * marginal - - return shapley - - -def normalize_shapley(values: dict[str, float]) -> dict[str, float]: - """Normalize Shapley values to sum to 1.0 (liability shares).""" - total = sum(values.values()) - if total == 0: - return dict.fromkeys(values, 0.0) - return {k: v / total for k, v in values.items()} - - -# ── Single Agent Fault Tests ──────────────────────────────────────── - - -class TestSingleAgentFault: - """When only one agent exists and is faulty, it gets 100% attribution.""" - - def test_single_agent_gets_full_liability(self): - attributor = CausalAttributor() - actions = {"agent-a": [{"action_id": "a1", "step_id": "s1", "success": False}]} - result = attributor.attribute("saga-1", "sess-1", actions, "s1", "agent-a") - assert result.get_liability("agent-a") == 1.0 - - def test_single_agent_is_root_cause(self): - attributor = CausalAttributor() - actions = {"agent-a": [{"action_id": "a1", "step_id": "s1", "success": False}]} - result = attributor.attribute("saga-1", "sess-1", actions, "s1", "agent-a") - assert result.root_cause_agent == "agent-a" - - def test_single_agent_is_direct_cause(self): - attributor = CausalAttributor() - actions = {"agent-a": [{"action_id": "a1", "step_id": "s1", "success": False}]} - result = attributor.attribute("saga-1", "sess-1", actions, "s1", "agent-a") - fault = result.attributions[0] - assert fault.is_direct_cause is True - - def test_shapley_single_agent_full_value(self): - """Shapley value for a single-player game: player gets full value.""" - values = compute_shapley_values(["agent-a"], {"agent-a"}) - assert abs(values["agent-a"] - 1.0) < 1e-9 - - -# ── Two Agent Fault Attribution Tests ─────────────────────────────── - - -class TestTwoAgentFault: - """When two agents participate and one is faulty.""" - - def test_faulty_agent_has_higher_liability(self): - attributor = CausalAttributor() - actions = { - "agent-a": [{"action_id": "a1", "step_id": "s1", "success": True}], - "agent-b": [{"action_id": "a2", "step_id": "s2", "success": False}], - } - result = attributor.attribute("saga-1", "sess-1", actions, "s2", "agent-b") - assert result.get_liability("agent-b") > result.get_liability("agent-a") - - def test_non_faulty_agent_gets_zero(self): - attributor = CausalAttributor() - actions = { - "agent-a": [{"action_id": "a1", "step_id": "s1", "success": True}], - "agent-b": [{"action_id": "a2", "step_id": "s2", "success": False}], - } - result = attributor.attribute("saga-1", "sess-1", actions, "s2", "agent-b") - assert result.get_liability("agent-a") == 0.0 - - def test_shapley_two_agents_one_faulty(self): - """Only the faulty agent has positive Shapley value.""" - values = compute_shapley_values(["a", "b"], {"b"}) - assert values["b"] == 1.0 - assert values["a"] == 0.0 - - def test_shapley_two_agents_both_faulty_equal(self): - """Both faulty agents split the total value equally.""" - values = compute_shapley_values(["a", "b"], {"a", "b"}) - assert abs(values["a"] - values["b"]) < 1e-9 - - -# ── Equal Contribution Tests ──────────────────────────────────────── - - -class TestEqualContribution: - """When all agents contribute equally to a fault.""" - - def test_shapley_three_equal_agents(self): - """Three equally-faulty agents get equal Shapley shares.""" - values = compute_shapley_values(["a", "b", "c"], {"a", "b", "c"}) - normalized = normalize_shapley(values) - for agent in ["a", "b", "c"]: - assert abs(normalized[agent] - 1 / 3) < 1e-9 - - def test_shapley_symmetry_property(self): - """Shapley symmetry axiom: interchangeable players get equal values. - - If v(S ∪ {i}) = v(S ∪ {j}) for all S, then φ_i = φ_j. - """ - agents = ["a", "b", "c", "d"] - fault_agents = {"a", "b", "c", "d"} - values = compute_shapley_values(agents, fault_agents) - unique_values = {round(v, 10) for v in values.values()} - assert len(unique_values) == 1 - - def test_equal_attribution_sums_to_one(self): - """Normalized attributions always sum to 1.0.""" - agents = [f"agent-{i}" for i in range(5)] - fault_agents = set(agents) - values = compute_shapley_values(agents, fault_agents) - normalized = normalize_shapley(values) - assert abs(sum(normalized.values()) - 1.0) < 1e-9 - - -# ── Weighted Agent Tests ──────────────────────────────────────────── - - -class TestWeightedAgents: - """Agents with different weights get proportional attribution.""" - - def test_shapley_weighted_two_agents(self): - """Agent with double weight gets double the Shapley value.""" - weights = {"a": 2.0, "b": 1.0} - values = compute_shapley_values(["a", "b"], {"a", "b"}, weights) - assert values["a"] > values["b"] - assert abs(values["a"] / values["b"] - 2.0) < 1e-9 - - def test_shapley_weighted_proportional(self): - """Weighted Shapley values are proportional to weights.""" - weights = {"a": 3.0, "b": 2.0, "c": 1.0} - values = compute_shapley_values(["a", "b", "c"], {"a", "b", "c"}, weights) - normalized = normalize_shapley(values) - assert abs(normalized["a"] - 0.5) < 1e-9 # 3/6 - assert abs(normalized["b"] - 1 / 3) < 1e-9 # 2/6 - assert abs(normalized["c"] - 1 / 6) < 1e-9 # 1/6 - - def test_risk_weights_passed_to_attributor(self): - """CausalAttributor accepts risk_weights without error.""" - attributor = CausalAttributor() - actions = { - "a": [{"action_id": "x", "step_id": "s1", "success": True}], - "b": [{"action_id": "y", "step_id": "s2", "success": False}], - } - result = attributor.attribute( - "saga-1", - "sess-1", - actions, - "s2", - "b", - risk_weights={"x": 0.9, "y": 0.1}, - ) - assert len(result.attributions) == 2 - - def test_zero_weight_agent_gets_zero_shapley(self): - """An agent with zero weight gets zero Shapley value.""" - weights = {"a": 1.0, "b": 0.0} - values = compute_shapley_values(["a", "b"], {"a", "b"}, weights) - assert values["b"] == 0.0 - assert values["a"] == 1.0 - - -# ── Chain / Cascading Attribution Tests ───────────────────────────── - - -class TestCascadingAttribution: - """Cascading faults across a chain of agents.""" - - def test_chain_of_three_with_root_cause(self): - """In a chain A→B→C, root cause agent gets highest attribution.""" - attributor = CausalAttributor() - actions = { - "a": [{"action_id": "a1", "step_id": "s1", "success": False}], - "b": [{"action_id": "b1", "step_id": "s2", "success": False}], - "c": [{"action_id": "c1", "step_id": "s3", "success": False}], - } - result = attributor.attribute("saga-1", "sess-1", actions, "s1", "a") - assert result.root_cause_agent == "a" - assert result.get_liability("a") == 1.0 - - def test_shapley_cascading_decreasing_weights(self): - """In a cascade, earlier agents should have higher attribution - when weighted by position.""" - weights = {"a": 1.0, "b": 0.5, "c": 0.25} - values = compute_shapley_values(["a", "b", "c"], {"a", "b", "c"}, weights) - assert values["a"] > values["b"] > values["c"] - - def test_long_chain_root_cause_dominates(self): - """Root cause in a 5-agent chain should have highest Shapley value.""" - agents = [f"agent-{i}" for i in range(5)] - weights = {a: 1.0 / (i + 1) for i, a in enumerate(agents)} - values = compute_shapley_values(agents, set(agents), weights) - max_agent = max(values, key=values.get) - assert max_agent == "agent-0" - - -# ── No Fault (Zero Attribution) Tests ─────────────────────────────── - - -class TestNoFault: - """When no agent is faulty, all attributions should be zero.""" - - def test_shapley_no_fault_agents_all_zero(self): - """Null player axiom: non-faulty agents get zero Shapley value.""" - values = compute_shapley_values(["a", "b", "c"], set()) - for v in values.values(): - assert v == 0.0 - - def test_shapley_null_player_in_mixed_game(self): - """A non-faulty agent among faulty ones gets zero.""" - values = compute_shapley_values(["a", "b", "c"], {"a", "c"}) - assert values["b"] == 0.0 - assert values["a"] > 0.0 - assert values["c"] > 0.0 - - def test_zero_attribution_normalized(self): - """Normalizing all-zero values yields all zeros.""" - values = compute_shapley_values(["a", "b"], set()) - normalized = normalize_shapley(values) - assert all(v == 0.0 for v in normalized.values()) - - -# ── Edge Cases ────────────────────────────────────────────────────── - - -class TestEdgeCases: - """Edge cases: empty sets, single agent, many agents.""" - - def test_empty_agent_set(self): - """Shapley with no agents returns empty dict.""" - values = compute_shapley_values([], set()) - assert values == {} - - def test_single_non_faulty_agent(self): - """Single agent that didn't cause fault gets zero.""" - values = compute_shapley_values(["a"], set()) - assert values["a"] == 0.0 - - def test_attributor_with_empty_actions(self): - """CausalAttributor handles empty agent_actions dict.""" - attributor = CausalAttributor() - result = attributor.attribute("saga-1", "sess-1", {}, "s1", "ghost") - assert len(result.attributions) == 0 - assert result.get_liability("ghost") == 0.0 - - def test_get_liability_for_unknown_agent(self): - """get_liability returns 0.0 for unknown agent.""" - result = AttributionResult(saga_id="s1", session_id="s1") - assert result.get_liability("nonexistent") == 0.0 - - def test_attribution_result_agents_involved(self): - """agents_involved returns all participating agents.""" - result = AttributionResult( - saga_id="s1", - session_id="s1", - attributions=[ - FaultAttribution("a", 0.5, 0.5, True), - FaultAttribution("b", 0.3, 0.3, False), - FaultAttribution("c", 0.2, 0.2, False), - ], - ) - assert set(result.agents_involved) == {"a", "b", "c"} - - def test_attribution_id_uniqueness(self): - """Each AttributionResult gets a unique attribution_id.""" - results = [AttributionResult() for _ in range(100)] - ids = {r.attribution_id for r in results} - assert len(ids) == 100 - - def test_fault_attribution_dataclass_fields(self): - """FaultAttribution fields are correctly set.""" - fa = FaultAttribution( - agent_did="a", - liability_score=0.7, - causal_contribution=0.6, - is_direct_cause=True, - reason="root cause", - ) - assert fa.agent_did == "a" - assert fa.liability_score == 0.7 - assert fa.causal_contribution == 0.6 - assert fa.is_direct_cause is True - assert fa.reason == "root cause" - - -# ── Performance Tests ─────────────────────────────────────────────── - - -class TestPerformance: - """Performance: large agent coalitions.""" - - def test_shapley_ten_agents_completes(self): - """Shapley computation for 10 agents completes within 5 seconds.""" - agents = [f"a{i}" for i in range(10)] - start = time.monotonic() - values = compute_shapley_values(agents, set(agents)) - elapsed = time.monotonic() - start - assert elapsed < 5.0 - assert len(values) == 10 - - def test_shapley_twelve_agents_efficiency_holds(self): - """Efficiency property holds for 12-agent coalition.""" - agents = [f"a{i}" for i in range(12)] - fault_set = set(agents[:6]) - values = compute_shapley_values(agents, fault_set) - total_value = characteristic_value(frozenset(agents), fault_set) - assert abs(sum(values.values()) - total_value) < 1e-6 - - def test_many_attributions_history(self): - """CausalAttributor handles many sequential attributions.""" - attributor = CausalAttributor() - actions = {"a": [{"action_id": "x", "step_id": "s1", "success": False}]} - for i in range(200): - attributor.attribute(f"saga-{i}", "sess-1", actions, "s1", "a") - assert len(attributor.attribution_history) == 200 - - -# ── Coalition Computation Tests ───────────────────────────────────── - - -class TestCoalitionComputation: - """Coalition value function correctness.""" - - def test_empty_coalition_value_is_zero(self): - """v(∅) = 0.""" - assert characteristic_value(frozenset(), {"a", "b"}) == 0.0 - - def test_grand_coalition_value(self): - """v(N) equals sum of all fault weights.""" - weights = {"a": 2.0, "b": 3.0, "c": 1.0} - v = characteristic_value(frozenset(["a", "b", "c"]), {"a", "b", "c"}, weights) - assert abs(v - 6.0) < 1e-9 - - def test_singleton_coalition_value(self): - """v({i}) = weight of i if faulty, else 0.""" - weights = {"a": 2.0, "b": 3.0} - assert characteristic_value(frozenset(["a"]), {"a"}, weights) == 2.0 - assert characteristic_value(frozenset(["b"]), {"a"}, weights) == 0.0 - - def test_coalition_monotonicity(self): - """Adding a faulty agent to a coalition never decreases its value.""" - base = frozenset(["a"]) - extended = frozenset(["a", "b"]) - fault = {"a", "b"} - assert characteristic_value(extended, fault) >= characteristic_value(base, fault) - - def test_coalition_subadditivity_with_non_faulty(self): - """Adding a non-faulty agent doesn't change coalition value.""" - base = frozenset(["a"]) - extended = frozenset(["a", "b"]) - fault = {"a"} - assert characteristic_value(extended, fault) == characteristic_value(base, fault) - - -# ── Marginal Contribution Tests ───────────────────────────────────── - - -class TestMarginalContribution: - """Marginal contribution calculation correctness.""" - - def test_marginal_of_faulty_agent_is_positive(self): - """Faulty agent's marginal contribution to any coalition is non-negative.""" - agents = ["a", "b", "c"] - fault = {"b"} - for r in range(len(agents)): - for subset in combinations([a for a in agents if a != "b"], r): - s = frozenset(subset) - s_with = s | {"b"} - marginal = characteristic_value(s_with, fault) - characteristic_value(s, fault) - assert marginal >= 0 - - def test_marginal_of_non_faulty_is_zero(self): - """Non-faulty agent's marginal contribution is always zero.""" - agents = ["a", "b", "c"] - fault = {"a", "c"} - for r in range(len(agents)): - for subset in combinations([a for a in agents if a != "b"], r): - s = frozenset(subset) - s_with = s | {"b"} - marginal = characteristic_value(s_with, fault) - characteristic_value(s, fault) - assert marginal == 0.0 - - def test_marginal_equals_weight_for_independent_game(self): - """In an additive game, marginal contribution equals agent's weight.""" - weights = {"a": 3.0, "b": 5.0} - fault = {"a", "b"} - marginal_a = characteristic_value(frozenset(["a"]), fault, weights) - characteristic_value( - frozenset(), fault, weights - ) - assert abs(marginal_a - 3.0) < 1e-9 - - -# ── Shapley Axiom Verification ────────────────────────────────────── - - -class TestShapleyAxioms: - """Verify the four Shapley axioms hold.""" - - def test_efficiency_axiom(self): - """Efficiency: Σ φ_i = v(N). - - The sum of all Shapley values equals the grand coalition value. - """ - agents = ["a", "b", "c", "d"] - fault = {"a", "b", "d"} - weights = {"a": 2.0, "b": 1.0, "c": 1.0, "d": 3.0} - values = compute_shapley_values(agents, fault, weights) - grand = characteristic_value(frozenset(agents), fault, weights) - assert abs(sum(values.values()) - grand) < 1e-9 - - def test_symmetry_axiom(self): - """Symmetry: if i and j are interchangeable, φ_i = φ_j.""" - values = compute_shapley_values(["a", "b", "c"], {"a", "b"}, {"a": 1.0, "b": 1.0, "c": 0.5}) - assert abs(values["a"] - values["b"]) < 1e-9 - - def test_null_player_axiom(self): - """Null player: if i adds no value to any coalition, φ_i = 0.""" - values = compute_shapley_values(["a", "b", "c"], {"a", "b"}) - assert values["c"] == 0.0 - - def test_additivity_axiom(self): - """Additivity: φ(v+w) = φ(v) + φ(w) for any two games v, w. - - Tested by computing Shapley values for two separate games and - verifying they sum to the combined game's Shapley values. - """ - agents = ["a", "b"] - fault1 = {"a"} - fault2 = {"b"} - weights1 = {"a": 2.0, "b": 0.0} - weights2 = {"a": 0.0, "b": 3.0} - combined_weights = {"a": 2.0, "b": 3.0} - - v1 = compute_shapley_values(agents, fault1, weights1) - v2 = compute_shapley_values(agents, fault2, weights2) - v_combined = compute_shapley_values(agents, {"a", "b"}, combined_weights) - - for agent in agents: - assert abs((v1[agent] + v2[agent]) - v_combined[agent]) < 1e-9 - - -# ── Integration with Liability System ────────────────────────────── - - -class TestLiabilityIntegration: - """Integration tests between attribution and the broader liability system.""" - - def test_attribution_feeds_ledger(self): - """Attribution result can be recorded in the liability ledger.""" - attributor = CausalAttributor() - ledger = LiabilityLedger() - actions = { - "a": [{"action_id": "x", "step_id": "s1", "success": False}], - "b": [{"action_id": "y", "step_id": "s2", "success": True}], - } - result = attributor.attribute("saga-1", "sess-1", actions, "s1", "a") - for attr in result.attributions: - if attr.liability_score > 0: - ledger.record( - attr.agent_did, - LedgerEntryType.FAULT_ATTRIBUTED, - "sess-1", - severity=attr.liability_score, - ) - history = ledger.get_agent_history("a") - assert len(history) == 1 - assert history[0].severity == 1.0 - - def test_repeated_faults_accumulate_in_ledger(self): - """Multiple fault attributions accumulate in the ledger.""" - attributor = CausalAttributor() - ledger = LiabilityLedger() - actions = {"a": [{"action_id": "x", "step_id": "s1", "success": False}]} - for i in range(3): - result = attributor.attribute(f"saga-{i}", "sess-1", actions, "s1", "a") - for attr in result.attributions: - if attr.liability_score > 0: - ledger.record( - attr.agent_did, - LedgerEntryType.FAULT_ATTRIBUTED, - "sess-1", - severity=attr.liability_score, - ) - assert len(ledger.get_agent_history("a")) == 3 - - def test_vouching_engine_accepts_attributed_agent(self): - """Vouching engine works with agents that have fault attributions.""" - engine = VouchingEngine() - record = engine.vouch("voucher", "faulty-agent", "sess-1", voucher_sigma=0.8) - assert record.vouchee_did == "faulty-agent" - assert record.is_active diff --git a/agent-governance-python/agent-hypervisor/tests/test_spec_hypervisor_conformance.py b/agent-governance-python/agent-hypervisor/tests/test_spec_hypervisor_conformance.py index 2ff2227c7..907cc0af9 100644 --- a/agent-governance-python/agent-hypervisor/tests/test_spec_hypervisor_conformance.py +++ b/agent-governance-python/agent-hypervisor/tests/test_spec_hypervisor_conformance.py @@ -42,9 +42,6 @@ SAGA_DEFAULT_STEP_TIMEOUT_SECONDS, SESSION_DEFAULT_MIN_EFF_SCORE, ) -from hypervisor.liability.quarantine import ( - QuarantineReason, -) # --------------------------------------------------------------------------- # Imports under test @@ -498,7 +495,6 @@ def test_session_config_defaults(self): assert config.max_duration_seconds == 3600 assert config.min_eff_score == SESSION_DEFAULT_MIN_EFF_SCORE assert config.enable_audit is True - assert config.enable_blockchain_commitment is False def test_session_config_max_participants_validation(self): """S17.3 -- max_participants out of range MUST raise.""" @@ -665,24 +661,6 @@ def test_cleanup_after_kill(self): assert not result.terminated -# ═══════════════════════════════════════════════════════════════════════════ -# Section 13: Quarantine -# ═══════════════════════════════════════════════════════════════════════════ - - -class TestQuarantine: - """Spec S13 -- Quarantine.""" - - def test_quarantine_reasons_exist(self): - """S13.1 -- all quarantine reasons MUST exist.""" - assert QuarantineReason.BEHAVIORAL_DRIFT - assert QuarantineReason.LIABILITY_VIOLATION - assert QuarantineReason.RING_BREACH - assert QuarantineReason.RATE_LIMIT_EXCEEDED - assert QuarantineReason.MANUAL - assert QuarantineReason.CASCADE_SLASH - - # ═══════════════════════════════════════════════════════════════════════════ # Section 14: Audit and Hash Chain # ═══════════════════════════════════════════════════════════════════════════ diff --git a/agent-governance-python/agent-hypervisor/tests/unit/test_audit.py b/agent-governance-python/agent-hypervisor/tests/unit/test_audit.py index 73e33c89e..53ac54883 100644 --- a/agent-governance-python/agent-hypervisor/tests/unit/test_audit.py +++ b/agent-governance-python/agent-hypervisor/tests/unit/test_audit.py @@ -1,12 +1,8 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Tests for delta audit engine and commitment.""" +"""Tests for the delta audit engine.""" -from datetime import UTC - -from hypervisor.audit.commitment import CommitmentEngine from hypervisor.audit.delta import DeltaEngine, VFSChange -from hypervisor.audit.gc import EphemeralGC, RetentionPolicy class TestDeltaEngine: @@ -75,53 +71,3 @@ def test_empty_chain_verifies(self): valid, error = self.engine.verify_chain() assert valid is True assert error is None - - -class TestCommitmentEngine: - def setup_method(self): - self.engine = CommitmentEngine() - - def test_commit_and_verify(self): - self.engine.commit("session:1", "abc123", ["did:a", "did:b"], 10) - assert self.engine.verify("session:1", "abc123") - assert not self.engine.verify("session:1", "wrong") - - def test_unknown_session(self): - assert not self.engine.verify("nonexistent", "abc") - - def test_batch_queue(self): - r = self.engine.commit("s1", "h1", ["did:a"], 5) - self.engine.queue_for_batch(r) - batch = self.engine.flush_batch() - assert len(batch) == 1 - assert self.engine.flush_batch() == [] # cleared - - -class TestEphemeralGC: - def test_collect(self): - gc = EphemeralGC() - result = gc.collect( - session_id="session:1", - vfs_file_count=100, - cache_count=50, - delta_count=20, - estimated_vfs_bytes=1_000_000, - estimated_cache_bytes=500_000, - estimated_delta_bytes=50_000, - ) - # Public preview: no actual purge, data retained - assert result.purged_vfs_files == 0 - assert result.retained_deltas == 20 - # No savings since nothing is purged - assert result.storage_saved_bytes == 0 - assert result.savings_pct == 0 - - def test_retention_policy(self): - from datetime import datetime, timedelta - - gc = EphemeralGC(RetentionPolicy(delta_retention_days=30)) - old = datetime.now(UTC) - timedelta(days=31) - # Public preview: never expires deltas - assert not gc.should_expire_deltas(old) - recent = datetime.now(UTC) - timedelta(days=1) - assert not gc.should_expire_deltas(recent) diff --git a/agent-governance-python/agent-hypervisor/tests/unit/test_liability.py b/agent-governance-python/agent-hypervisor/tests/unit/test_liability.py deleted file mode 100644 index 9f44ef23b..000000000 --- a/agent-governance-python/agent-hypervisor/tests/unit/test_liability.py +++ /dev/null @@ -1,128 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Tests for the sponsorship & bonding engine and liability matrix.""" - -import pytest - -from hypervisor.liability import LiabilityMatrix -from hypervisor.liability.vouching import VouchingEngine, VouchingError - - -class TestVouchingEngine: - def setup_method(self): - self.engine = VouchingEngine() - self.session = "session:test-1" - - def test_vouch_count_accessor(self): - """``vouch_count`` is the public alternative to ``len(_vouches)`` — - callers (notably the stats API) should not reach into the - private dict. - """ - assert self.engine.vouch_count == 0 - self.engine.vouch("did:mesh:a", "did:mesh:b", self.session, 0.8) - assert self.engine.vouch_count == 1 - self.engine.vouch("did:mesh:c", "did:mesh:d", self.session, 0.8) - assert self.engine.vouch_count == 2 - # Releasing a bond does not remove the record — count includes released. - records = list(self.engine._vouches.values()) - self.engine.release_bond(records[0].vouch_id) - assert self.engine.vouch_count == 2 - - def test_basic_vouch(self): - record = self.engine.vouch( - voucher_did="did:mesh:high", - vouchee_did="did:mesh:low", - session_id=self.session, - voucher_sigma=0.8, - ) - assert record.voucher_did == "did:mesh:high" - assert record.vouchee_did == "did:mesh:low" - assert record.is_active - assert record.bonded_sigma_pct == 0.0 # Public Preview: no bonding - assert record.bonded_amount == 0.0 # Public Preview: no bonding - - @pytest.mark.skip("Feature not available in Public Preview") - def test_cannot_vouch_for_self(self): - with pytest.raises(VouchingError, match="Cannot sponsor for yourself"): - self.engine.vouch("did:mesh:a", "did:mesh:a", self.session, 0.8) - - @pytest.mark.skip("Feature not available in Public Preview") - def test_low_score_cannot_vouch(self): - with pytest.raises(VouchingError, match="below minimum"): - self.engine.vouch("did:mesh:low", "did:mesh:other", self.session, 0.3) - - @pytest.mark.skip("Feature not available in Public Preview") - def test_circular_vouching_rejected(self): - self.engine.vouch("did:mesh:a", "did:mesh:b", self.session, 0.8) - with pytest.raises(VouchingError, match="Circular"): - self.engine.vouch("did:mesh:b", "did:mesh:a", self.session, 0.7) - - def test_eff_score_formula(self): - """Public Preview: eff_score = sponsored agent's own score (no sponsor boost).""" - self.engine.vouch("did:mesh:high", "did:mesh:low", self.session, 0.9, bond_pct=0.5) - eff_score = self.engine.compute_eff_score( - vouchee_did="did:mesh:low", - session_id=self.session, - vouchee_sigma=0.3, - risk_weight=0.2, - ) - assert abs(eff_score - 0.3) < 1e-9 # Returns vouchee_sigma directly - - def test_eff_score_capped_at_1(self): - self.engine.vouch("did:mesh:high", "did:mesh:low", self.session, 0.9, bond_pct=0.8) - eff_score = self.engine.compute_eff_score( - "did:mesh:low", self.session, 0.8, risk_weight=1.0 - ) - assert eff_score <= 1.0 - - def test_multiple_vouchers(self): - self.engine.vouch("did:mesh:a", "did:mesh:low", self.session, 0.8, bond_pct=0.5) - self.engine.vouch("did:mesh:b", "did:mesh:low", self.session, 0.6, bond_pct=0.5) - # Public Preview: eff_score = vouchee_sigma (no boost) - eff_score = self.engine.compute_eff_score( - "did:mesh:low", self.session, 0.1, risk_weight=0.5 - ) - assert abs(eff_score - 0.1) < 1e-9 - - def test_release_session_bonds(self): - self.engine.vouch("did:mesh:a", "did:mesh:b", self.session, 0.8) - self.engine.vouch("did:mesh:a", "did:mesh:c", self.session, 0.8) - count = self.engine.release_session_bonds(self.session) - assert count == 2 - assert self.engine.get_vouchers_for("did:mesh:b", self.session) == [] - - def test_total_exposure(self): - self.engine.vouch("did:mesh:a", "did:mesh:b", self.session, 0.8, bond_pct=0.3) - self.engine.vouch("did:mesh:a", "did:mesh:c", self.session, 0.8, bond_pct=0.2) - exposure = self.engine.get_total_exposure("did:mesh:a", self.session) - assert exposure == 0.0 # Public Preview: no bonding - - -class TestLiabilityMatrix: - def setup_method(self): - self.matrix = LiabilityMatrix("session:test-1") - - def test_add_and_query(self): - self.matrix.add_edge("did:a", "did:b", 0.2, "v1") - assert len(self.matrix.who_vouches_for("did:b")) == 1 - assert len(self.matrix.who_is_vouched_by("did:a")) == 1 - - def test_total_exposure(self): - self.matrix.add_edge("did:a", "did:b", 0.2, "v1") - self.matrix.add_edge("did:a", "did:c", 0.3, "v2") - assert abs(self.matrix.total_exposure("did:a") - 0.5) < 1e-9 - - def test_cycle_detection(self): - self.matrix.add_edge("did:a", "did:b", 0.2, "v1") - self.matrix.add_edge("did:b", "did:a", 0.2, "v2") - assert self.matrix.has_cycle() - - def test_no_cycle(self): - self.matrix.add_edge("did:a", "did:b", 0.2, "v1") - self.matrix.add_edge("did:b", "did:c", 0.2, "v2") - assert not self.matrix.has_cycle() - - def test_clear_releases_all(self): - self.matrix.add_edge("did:a", "did:b", 0.2, "v1") - self.matrix.clear() - assert len(self.matrix.edges) == 0 diff --git a/agent-governance-python/agent-hypervisor/tests/unit/test_liability_improvements.py b/agent-governance-python/agent-hypervisor/tests/unit/test_liability_improvements.py deleted file mode 100644 index 02c89e01a..000000000 --- a/agent-governance-python/agent-hypervisor/tests/unit/test_liability_improvements.py +++ /dev/null @@ -1,257 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Tests for Shapley-value fault attribution, quarantine, and liability ledger.""" - -import pytest - -from hypervisor.liability.attribution import ( - CausalAttributor, -) -from hypervisor.liability.ledger import ( - LedgerEntryType, - LiabilityLedger, -) -from hypervisor.liability.quarantine import ( - QuarantineManager, - QuarantineReason, -) - -# ── Fault Logging Tests ──────────────────────────────────── - - -class TestCausalAttribution: - def test_basic_attribution(self): - attributor = CausalAttributor() - actions = { - "agent-a": [ - {"action_id": "act1", "step_id": "s1", "success": True}, - ], - "agent-b": [ - {"action_id": "act2", "step_id": "s2", "success": False}, - ], - } - result = attributor.attribute( - saga_id="saga-1", - session_id="sess-1", - agent_actions=actions, - failure_step_id="s2", - failure_agent_did="agent-b", - ) - assert result.root_cause_agent == "agent-b" - assert len(result.attributions) == 2 - # Direct cause agent should have higher liability - agent_b_score = result.get_liability("agent-b") - agent_a_score = result.get_liability("agent-a") - assert agent_b_score > agent_a_score - - def test_single_agent_gets_full_liability(self): - attributor = CausalAttributor() - actions = { - "agent-a": [ - {"action_id": "act1", "step_id": "s1", "success": False}, - ], - } - result = attributor.attribute( - saga_id="saga-1", - session_id="sess-1", - agent_actions=actions, - failure_step_id="s1", - failure_agent_did="agent-a", - ) - assert result.get_liability("agent-a") == 1.0 - - def test_risk_weights_affect_attribution(self): - attributor = CausalAttributor() - actions = { - "agent-a": [ - {"action_id": "high-risk", "step_id": "s1", "success": True}, - ], - "agent-b": [ - {"action_id": "low-risk", "step_id": "s2", "success": False}, - ], - } - result = attributor.attribute( - saga_id="saga-1", - session_id="sess-1", - agent_actions=actions, - failure_step_id="s2", - failure_agent_did="agent-b", - risk_weights={"high-risk": 0.95, "low-risk": 0.1}, - ) - assert len(result.attributions) == 2 - - def test_multiple_failures(self): - attributor = CausalAttributor() - actions = { - "agent-a": [ - {"action_id": "act1", "step_id": "s1", "success": False}, - ], - "agent-b": [ - {"action_id": "act2", "step_id": "s2", "success": False}, - ], - "agent-c": [ - {"action_id": "act3", "step_id": "s3", "success": True}, - ], - } - result = attributor.attribute( - saga_id="saga-1", - session_id="sess-1", - agent_actions=actions, - failure_step_id="s2", - failure_agent_did="agent-b", - ) - # All agents should have some liability - total = sum(a.liability_score for a in result.attributions) - assert abs(total - 1.0) < 0.01 - - def test_attribution_history(self): - attributor = CausalAttributor() - actions = {"a": [{"action_id": "x", "step_id": "s1", "success": False}]} - attributor.attribute("saga-1", "sess-1", actions, "s1", "a") - attributor.attribute("saga-2", "sess-1", actions, "s1", "a") - assert len(attributor.attribution_history) == 2 - - def test_agents_involved(self): - attributor = CausalAttributor() - actions = { - "agent-a": [{"action_id": "x", "step_id": "s1", "success": True}], - "agent-b": [{"action_id": "y", "step_id": "s2", "success": False}], - } - result = attributor.attribute("saga-1", "sess-1", actions, "s2", "agent-b") - assert set(result.agents_involved) == {"agent-a", "agent-b"} - - -# ── Quarantine Tests ──────────────────────────────────────────── - - -class TestQuarantine: - @pytest.mark.skip("Feature not available in Public Preview") - def test_quarantine_agent(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_release_quarantine(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_quarantine_escalation(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_quarantine_with_forensic_data(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_tick_expires_quarantines(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_active_quarantines_property(self): - pass - - def test_quarantine_history(self): - mgr = QuarantineManager() - mgr.quarantine("a1", "s1", QuarantineReason.MANUAL) - mgr.quarantine("a1", "s2", QuarantineReason.RING_BREACH) - history = mgr.get_history(agent_did="a1") - assert len(history) == 2 - - def test_duration_tracking(self): - mgr = QuarantineManager() - record = mgr.quarantine("a1", "s1", QuarantineReason.MANUAL) - assert record.duration_seconds >= 0 - - def test_not_quarantined_after_release(self): - mgr = QuarantineManager() - mgr.quarantine("a1", "s1", QuarantineReason.MANUAL) - mgr.release("a1", "s1") - assert not mgr.is_quarantined("a1", "s1") - - -# ── Liability Ledger Tests ────────────────────────────────────── - - -class TestLiabilityLedger: - def test_record_entry(self): - ledger = LiabilityLedger() - entry = ledger.record( - agent_did="agent-a", - entry_type=LedgerEntryType.SLASH_RECEIVED, - session_id="sess-1", - severity=0.8, - details="Behavioral drift", - ) - assert entry.agent_did == "agent-a" - assert ledger.total_entries == 1 - - def test_agent_history(self): - ledger = LiabilityLedger() - ledger.record("a1", LedgerEntryType.CLEAN_SESSION, "s1") - ledger.record("a1", LedgerEntryType.SLASH_RECEIVED, "s2", severity=0.5) - ledger.record("a2", LedgerEntryType.CLEAN_SESSION, "s1") - - history = ledger.get_agent_history("a1") - assert len(history) == 2 - - def test_risk_profile_clean_agent(self): - ledger = LiabilityLedger() - for i in range(5): - ledger.record("a1", LedgerEntryType.CLEAN_SESSION, f"s{i}") - - profile = ledger.compute_risk_profile("a1") - assert profile.risk_score == 0.0 - assert profile.recommendation == "admit" - - def test_risk_profile_risky_agent(self): - ledger = LiabilityLedger() - for i in range(5): - ledger.record("a1", LedgerEntryType.SLASH_RECEIVED, f"s{i}", severity=0.9) - profile = ledger.compute_risk_profile("a1") - # Public Preview: no risk scoring, always admits - assert profile.risk_score == 0.0 - assert profile.recommendation == "admit" - - def test_risk_profile_probation(self): - ledger = LiabilityLedger() - ledger.record("a1", LedgerEntryType.SLASH_RECEIVED, "s1", severity=0.7) - ledger.record("a1", LedgerEntryType.CLEAN_SESSION, "s2") - ledger.record("a1", LedgerEntryType.CLEAN_SESSION, "s3") - - profile = ledger.compute_risk_profile("a1") - assert profile.recommendation in ("admit", "probation") - - def test_should_admit_clean(self): - ledger = LiabilityLedger() - ledger.record("a1", LedgerEntryType.CLEAN_SESSION, "s1") - admitted, reason = ledger.should_admit("a1") - assert admitted - - def test_should_deny_risky(self): - ledger = LiabilityLedger() - for i in range(10): - ledger.record("a1", LedgerEntryType.SLASH_RECEIVED, f"s{i}", severity=0.9) - - admitted, reason = ledger.should_admit("a1") - # Public Preview: always admits - assert admitted - assert reason == "admit" - - def test_unknown_agent_admitted(self): - ledger = LiabilityLedger() - admitted, reason = ledger.should_admit("unknown") - assert admitted - - def test_tracked_agents(self): - ledger = LiabilityLedger() - ledger.record("a1", LedgerEntryType.CLEAN_SESSION, "s1") - ledger.record("a2", LedgerEntryType.CLEAN_SESSION, "s1") - assert set(ledger.tracked_agents) == {"a1", "a2"} - - def test_quarantine_affects_risk(self): - ledger = LiabilityLedger() - ledger.record("a1", LedgerEntryType.QUARANTINE_ENTERED, "s1", severity=0.5) - profile = ledger.compute_risk_profile("a1") - # Public Preview: no risk scoring, always admits - assert profile.quarantine_count == 0 - assert profile.risk_score == 0.0 - assert profile.recommendation == "admit" diff --git a/agent-governance-python/agent-hypervisor/tests/unit/test_observability.py b/agent-governance-python/agent-hypervisor/tests/unit/test_observability.py index 662fc4c72..cd8c7b6ca 100644 --- a/agent-governance-python/agent-hypervisor/tests/unit/test_observability.py +++ b/agent-governance-python/agent-hypervisor/tests/unit/test_observability.py @@ -73,7 +73,7 @@ def test_query_combined_filters(self): ) bus.emit( HypervisorEvent( - event_type=EventType.SLASH_EXECUTED, + event_type=EventType.AGENT_KILLED, session_id="s1", agent_did="a1", ) @@ -89,13 +89,13 @@ def test_query_combined_filters(self): def test_subscriber_notification(self): bus = HypervisorEventBus() received = [] - bus.subscribe(EventType.SLASH_EXECUTED, handler=lambda e: received.append(e)) + bus.subscribe(EventType.AGENT_KILLED, handler=lambda e: received.append(e)) bus.emit(HypervisorEvent(event_type=EventType.SESSION_CREATED)) - bus.emit(HypervisorEvent(event_type=EventType.SLASH_EXECUTED)) + bus.emit(HypervisorEvent(event_type=EventType.AGENT_KILLED)) assert len(received) == 1 - assert received[0].event_type == EventType.SLASH_EXECUTED + assert received[0].event_type == EventType.AGENT_KILLED def test_wildcard_subscriber(self): bus = HypervisorEventBus() @@ -103,7 +103,7 @@ def test_wildcard_subscriber(self): bus.subscribe(event_type=None, handler=lambda e: received.append(e)) bus.emit(HypervisorEvent(event_type=EventType.SESSION_CREATED)) - bus.emit(HypervisorEvent(event_type=EventType.SLASH_EXECUTED)) + bus.emit(HypervisorEvent(event_type=EventType.AGENT_KILLED)) assert len(received) == 2 @@ -119,13 +119,13 @@ def test_type_counts(self): def test_event_to_dict(self): event = HypervisorEvent( - event_type=EventType.SLASH_EXECUTED, + event_type=EventType.AGENT_KILLED, session_id="s1", agent_did="a1", payload={"severity": "high"}, ) d = event.to_dict() - assert d["event_type"] == "liability.slash_executed" + assert d["event_type"] == "security.agent_killed" assert d["session_id"] == "s1" assert d["payload"]["severity"] == "high" diff --git a/agent-governance-python/agent-hypervisor/tests/unit/test_saga_improvements.py b/agent-governance-python/agent-hypervisor/tests/unit/test_saga_improvements.py deleted file mode 100644 index 265c99bb0..000000000 --- a/agent-governance-python/agent-hypervisor/tests/unit/test_saga_improvements.py +++ /dev/null @@ -1,295 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Tests for saga fan-out, execution checkpoints, and declarative DSL.""" - -import pytest - -from hypervisor.saga.checkpoint import ( - CheckpointManager, - SemanticCheckpoint, -) -from hypervisor.saga.dsl import ( - SagaDSLError, - SagaDSLParser, -) -from hypervisor.saga.fan_out import ( - FanOutGroup, - FanOutOrchestrator, - FanOutPolicy, -) -from hypervisor.saga.state_machine import SagaStep - -# ── Fan-Out Tests ─────────────────────────────────────────────── - - -class TestFanOut: - @pytest.fixture - def steps(self): - return [ - SagaStep(step_id="s1", action_id="a1", agent_did="d1", execute_api="/api/1"), - SagaStep(step_id="s2", action_id="a2", agent_did="d2", execute_api="/api/2"), - SagaStep(step_id="s3", action_id="a3", agent_did="d3", execute_api="/api/3"), - ] - - async def test_all_succeed_policy(self, steps): - fan = FanOutOrchestrator() - group = fan.create_group("saga-1", FanOutPolicy.ALL_MUST_SUCCEED) - for s in steps: - fan.add_branch(group.group_id, s) - - async def success(): - return "ok" - - executors = {s.step_id: success for s in steps} - result = await fan.execute(group.group_id, executors) - - assert result.resolved - assert result.policy_satisfied - assert result.success_count == 3 - assert len(result.compensation_needed) == 0 - - async def test_all_succeed_policy_fails(self, steps): - fan = FanOutOrchestrator() - group = fan.create_group("saga-1", FanOutPolicy.ALL_MUST_SUCCEED) - for s in steps: - fan.add_branch(group.group_id, s) - - call_count = 0 - - async def sometimes_fail(): - nonlocal call_count - call_count += 1 - if call_count == 2: - raise ValueError("step failed") - return "ok" - - executors = {s.step_id: sometimes_fail for s in steps} - result = await fan.execute(group.group_id, executors) - - assert result.resolved - assert not result.policy_satisfied - assert result.failure_count == 1 - assert len(result.compensation_needed) > 0 - - @pytest.mark.skip("Feature not available in Public Preview") - async def test_majority_policy_succeeds(self, steps): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - async def test_any_policy_succeeds(self, steps): - pass - - async def test_all_fail_any_policy(self, steps): - fan = FanOutOrchestrator() - group = fan.create_group("saga-1", FanOutPolicy.ANY_MUST_SUCCEED) - for s in steps: - fan.add_branch(group.group_id, s) - - async def always_fail(): - raise ValueError("all fail") - - executors = {s.step_id: always_fail for s in steps} - result = await fan.execute(group.group_id, executors) - - assert not result.policy_satisfied - - def test_group_check_policy_empty(self): - group = FanOutGroup(policy=FanOutPolicy.ALL_MUST_SUCCEED) - # With 0 branches, 0 == 0 is True for ALL_MUST_SUCCEED (vacuously true) - assert group.check_policy() - - @pytest.mark.skip("Feature not available in Public Preview") - def test_group_check_policy_any_empty(self): - pass - - def test_active_groups(self, steps): - fan = FanOutOrchestrator() - g1 = fan.create_group("saga-1") - assert len(fan.active_groups) == 1 - g1.resolved = True - assert len(fan.active_groups) == 0 - - -# ── Checkpoint Tests ──────────────────────────────────────────── - - -class TestCheckpoints: - @pytest.mark.skip("Feature not available in Public Preview") - def test_save_and_check(self): - mgr = CheckpointManager() - ckpt = mgr.save("saga-1", "s1", "Database migrated", {"version": 5}) - assert ckpt.is_valid - assert mgr.is_achieved("saga-1", "Database migrated", "s1") - - def test_not_achieved_without_save(self): - mgr = CheckpointManager() - assert not mgr.is_achieved("saga-1", "Database migrated", "s1") - - @pytest.mark.skip("Feature not available in Public Preview") - def test_invalidate_checkpoint(self): - mgr = CheckpointManager() - mgr.save("saga-1", "s1", "Schema created") - count = mgr.invalidate("saga-1", "s1", "Schema changed") - assert count == 1 - assert not mgr.is_achieved("saga-1", "Schema created", "s1") - - @pytest.mark.skip("Feature not available in Public Preview") - def test_get_checkpoint(self): - pass - - def test_get_saga_checkpoints(self): - mgr = CheckpointManager() - mgr.save("saga-1", "s1", "Step 1 done") - mgr.save("saga-1", "s2", "Step 2 done") - mgr.save("saga-2", "s1", "Other saga") - - ckpts = mgr.get_saga_checkpoints("saga-1") - assert len(ckpts) == 2 - - @pytest.mark.skip("Feature not available in Public Preview") - def test_replay_plan(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_total_and_valid_counts(self): - pass - - def test_goal_hash_deterministic(self): - h1 = SemanticCheckpoint.compute_goal_hash("Deploy", "s1") - h2 = SemanticCheckpoint.compute_goal_hash("Deploy", "s1") - h3 = SemanticCheckpoint.compute_goal_hash("Deploy", "s2") - assert h1 == h2 - assert h1 != h3 - - -# ── DSL Parser Tests ──────────────────────────────────────────── - - -class TestSagaDSL: - def test_parse_valid_definition(self): - parser = SagaDSLParser() - defn = parser.parse( - { - "name": "deploy-model", - "session_id": "sess-1", - "steps": [ - { - "id": "validate", - "action_id": "model.validate", - "agent": "did:mesh:validator", - "execute_api": "/api/validate", - "undo_api": "/api/rollback", - }, - { - "id": "deploy", - "action_id": "model.deploy", - "agent": "did:mesh:deployer", - "execute_api": "/api/deploy", - "timeout": 600, - "retries": 2, - }, - ], - } - ) - assert defn.name == "deploy-model" - assert len(defn.steps) == 2 - assert defn.steps[1].timeout == 600 - assert defn.steps[1].retries == 2 - - @pytest.mark.skip("Feature not available in Public Preview") - def test_parse_with_fan_out(self): - pass - - def test_parse_missing_name(self): - parser = SagaDSLParser() - with pytest.raises(SagaDSLError, match="name"): - parser.parse( - {"session_id": "s1", "steps": [{"id": "s", "action_id": "a", "agent": "x"}]} - ) - - def test_parse_missing_session_id(self): - parser = SagaDSLParser() - with pytest.raises(SagaDSLError, match="session_id"): - parser.parse({"name": "x", "steps": [{"id": "s", "action_id": "a", "agent": "x"}]}) - - def test_parse_empty_steps(self): - parser = SagaDSLParser() - with pytest.raises(SagaDSLError, match="step"): - parser.parse({"name": "x", "session_id": "s1", "steps": []}) - - def test_parse_duplicate_step_ids(self): - parser = SagaDSLParser() - with pytest.raises(SagaDSLError, match="Duplicate"): - parser.parse( - { - "name": "x", - "session_id": "s1", - "steps": [ - {"id": "dup", "action_id": "a1", "agent": "x"}, - {"id": "dup", "action_id": "a2", "agent": "y"}, - ], - } - ) - - @pytest.mark.skip("Feature not available in Public Preview") - def test_parse_invalid_fan_out_policy(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_parse_fan_out_invalid_branch(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_parse_fan_out_too_few_branches(self): - pass - - def test_to_saga_steps(self): - parser = SagaDSLParser() - defn = parser.parse( - { - "name": "x", - "session_id": "s1", - "steps": [ - {"id": "s1", "action_id": "a1", "agent": "x", "execute_api": "/run"}, - ], - } - ) - steps = parser.to_saga_steps(defn) - assert len(steps) == 1 - assert steps[0].step_id == "s1" - assert steps[0].execute_api == "/run" - - def test_validate_errors(self): - parser = SagaDSLParser() - errors = parser.validate({}) - assert "Missing 'name'" in errors - assert "Missing 'session_id'" in errors - assert "Missing 'steps'" in errors - - def test_validate_valid(self): - parser = SagaDSLParser() - errors = parser.validate( - { - "name": "x", - "session_id": "s1", - "steps": [{"id": "a", "action_id": "b", "agent": "c"}], - } - ) - assert errors == [] - - def test_sequential_steps(self): - parser = SagaDSLParser() - defn = parser.parse( - { - "name": "x", - "session_id": "s1", - "steps": [ - {"id": "seq1", "action_id": "a", "agent": "x"}, - {"id": "par1", "action_id": "b", "agent": "y"}, - {"id": "par2", "action_id": "c", "agent": "z"}, - ], - "fan_out": [{"policy": "all_must_succeed", "branches": ["par1", "par2"]}], - } - ) - # Public Preview: all steps are sequential (fan_out ignored) - assert len(defn.sequential_steps) == 3 diff --git a/agent-governance-python/agent-hypervisor/tests/unit/test_saga_schema.py b/agent-governance-python/agent-hypervisor/tests/unit/test_saga_schema.py deleted file mode 100644 index dcd9577cf..000000000 --- a/agent-governance-python/agent-hypervisor/tests/unit/test_saga_schema.py +++ /dev/null @@ -1,344 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Tests for JSON schema validation of saga DSL definitions.""" - -import pytest - -from hypervisor.saga.dsl import SagaDSLError, SagaDSLParser -from hypervisor.saga.schema import ( - SAGA_DEFINITION_SCHEMA, - VALID_ACTION_PREFIXES, - SagaSchemaError, - SagaSchemaValidator, -) - -# ── Helpers ───────────────────────────────────────────────────── - - -def _valid_definition(**overrides): - """Return a minimal valid saga definition dict.""" - defn = { - "name": "test-saga", - "session_id": "sess-1", - "steps": [ - { - "id": "step-1", - "action_id": "model.validate", - "agent": "did:mesh:validator", - "execute_api": "/api/validate", - "undo_api": "/api/rollback", - "timeout": 300, - "retries": 0, - }, - ], - } - defn.update(overrides) - return defn - - -def _valid_step(**overrides): - step = { - "id": "s1", - "action_id": "model.run", - "agent": "did:mesh:agent", - "execute_api": "/api/run", - "undo_api": "/api/undo", - } - step.update(overrides) - return step - - -# ── Schema Structure Tests ────────────────────────────────────── - - -class TestSchemaStructure: - """Tests that the JSON schema itself is well-formed.""" - - def test_schema_has_required_fields(self): - assert "name" in SAGA_DEFINITION_SCHEMA["required"] - assert "session_id" in SAGA_DEFINITION_SCHEMA["required"] - assert "steps" in SAGA_DEFINITION_SCHEMA["required"] - - def test_schema_has_title(self): - assert SAGA_DEFINITION_SCHEMA["title"] == "SagaDefinition" - - def test_step_schema_requires_id_action_agent(self): - step_schema = SAGA_DEFINITION_SCHEMA["properties"]["steps"]["items"] - assert "id" in step_schema["required"] - assert "action_id" in step_schema["required"] - assert "agent" in step_schema["required"] - - -# ── Required Fields Validation ────────────────────────────────── - - -class TestRequiredFields: - def setup_method(self): - self.validator = SagaSchemaValidator() - - def test_valid_definition_passes(self): - errors = self.validator.validate(_valid_definition()) - assert errors == [] - - def test_missing_name(self): - defn = _valid_definition() - del defn["name"] - errors = self.validator.validate(defn) - assert any("name" in e for e in errors) - - def test_missing_session_id(self): - defn = _valid_definition() - del defn["session_id"] - errors = self.validator.validate(defn) - assert any("session_id" in e for e in errors) - - def test_missing_steps(self): - defn = _valid_definition() - del defn["steps"] - errors = self.validator.validate(defn) - assert any("steps" in e for e in errors) - - def test_empty_name(self): - errors = self.validator.validate(_valid_definition(name="")) - assert any("name" in e for e in errors) - - def test_empty_session_id(self): - errors = self.validator.validate(_valid_definition(session_id="")) - assert any("session_id" in e for e in errors) - - def test_empty_steps_list(self): - errors = self.validator.validate(_valid_definition(steps=[])) - assert any("steps" in e for e in errors) - - def test_completely_empty_dict(self): - errors = self.validator.validate({}) - assert len(errors) >= 3 # name, session_id, steps - - -# ── Step Structure Validation ─────────────────────────────────── - - -class TestStepStructure: - def setup_method(self): - self.validator = SagaSchemaValidator() - - def test_step_missing_id(self): - step = _valid_step() - del step["id"] - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("id" in e for e in errors) - - def test_step_missing_action_id(self): - step = _valid_step() - del step["action_id"] - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("action_id" in e for e in errors) - - def test_step_missing_agent(self): - step = _valid_step() - del step["agent"] - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("agent" in e for e in errors) - - def test_step_empty_id(self): - step = _valid_step(id="") - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("id" in e for e in errors) - - def test_step_unknown_property_rejected(self): - step = _valid_step(unknown_field="bad") - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("additional" in e.lower() or "unknown_field" in e for e in errors) - - -# ── Timeout Range Validation ─────────────────────────────────── - - -class TestTimeoutRanges: - def setup_method(self): - self.validator = SagaSchemaValidator() - - def test_valid_timeout(self): - step = _valid_step(timeout=600) - errors = self.validator.validate(_valid_definition(steps=[step])) - assert errors == [] - - def test_timeout_too_low(self): - step = _valid_step(timeout=0) - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("timeout" in e.lower() or "minimum" in e.lower() for e in errors) - - def test_timeout_too_high(self): - step = _valid_step(timeout=100000) - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("timeout" in e.lower() or "maximum" in e.lower() for e in errors) - - def test_timeout_wrong_type(self): - step = _valid_step(timeout="fast") - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("timeout" in e.lower() or "type" in e.lower() for e in errors) - - def test_retries_negative(self): - step = _valid_step(retries=-1) - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("retries" in e.lower() or "minimum" in e.lower() for e in errors) - - def test_retries_too_high(self): - step = _valid_step(retries=11) - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("retries" in e.lower() or "maximum" in e.lower() for e in errors) - - def test_boundary_timeout_min(self): - step = _valid_step(timeout=1) - errors = self.validator.validate(_valid_definition(steps=[step])) - assert errors == [] - - def test_boundary_timeout_max(self): - step = _valid_step(timeout=86400) - errors = self.validator.validate(_valid_definition(steps=[step])) - assert errors == [] - - -# ── Action Type Validation ────────────────────────────────────── - - -class TestActionTypes: - def setup_method(self): - self.validator = SagaSchemaValidator() - - def test_valid_action_prefixes(self): - for prefix in VALID_ACTION_PREFIXES: - step = _valid_step(action_id=f"{prefix}run") - errors = self.validator.validate(_valid_definition(steps=[step])) - assert not any("action_id" in e and "prefix" in e for e in errors), ( - f"Prefix '{prefix}' should be valid" - ) - - def test_invalid_action_prefix(self): - step = _valid_step(action_id="unknown.action") - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("action_id" in e and "prefix" in e for e in errors) - - -# ── Compensation Requirements ─────────────────────────────────── - - -class TestCompensation: - def setup_method(self): - self.validator = SagaSchemaValidator() - - def test_step_without_undo_api_warns(self): - step = _valid_step() - del step["undo_api"] - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("undo_api" in e for e in errors) - - def test_step_with_null_undo_api_warns(self): - step = _valid_step(undo_api=None) - errors = self.validator.validate(_valid_definition(steps=[step])) - assert any("undo_api" in e for e in errors) - - def test_step_with_undo_api_passes(self): - step = _valid_step(undo_api="/api/rollback") - errors = self.validator.validate(_valid_definition(steps=[step])) - assert not any("undo_api" in e for e in errors) - - -# ── Step Ordering and Dependencies ────────────────────────────── - - -class TestDependencies: - def setup_method(self): - self.validator = SagaSchemaValidator() - - def test_valid_dependency(self): - steps = [ - _valid_step(id="s1"), - _valid_step(id="s2", depends_on=["s1"]), - ] - errors = self.validator.validate(_valid_definition(steps=steps)) - assert not any("depends_on" in e for e in errors) - - def test_unknown_dependency(self): - steps = [ - _valid_step(id="s1"), - _valid_step(id="s2", depends_on=["nonexistent"]), - ] - errors = self.validator.validate(_valid_definition(steps=steps)) - assert any("nonexistent" in e for e in errors) - - def test_circular_dependency(self): - steps = [ - _valid_step(id="s1", depends_on=["s2"]), - _valid_step(id="s2", depends_on=["s1"]), - ] - errors = self.validator.validate(_valid_definition(steps=steps)) - assert any("circular" in e.lower() for e in errors) - - def test_self_dependency(self): - steps = [_valid_step(id="s1", depends_on=["s1"])] - errors = self.validator.validate(_valid_definition(steps=steps)) - assert any("circular" in e.lower() for e in errors) - - def test_duplicate_step_ids(self): - steps = [ - _valid_step(id="dup"), - _valid_step(id="dup", action_id="model.other"), - ] - errors = self.validator.validate(_valid_definition(steps=steps)) - assert any("duplicate" in e.lower() for e in errors) - - -# ── Validate-or-Raise ────────────────────────────────────────── - - -class TestValidateOrRaise: - def setup_method(self): - self.validator = SagaSchemaValidator() - - def test_valid_does_not_raise(self): - self.validator.validate_or_raise(_valid_definition()) - - def test_invalid_raises_schema_error(self): - with pytest.raises(SagaSchemaError) as exc_info: - self.validator.validate_or_raise({}) - assert len(exc_info.value.errors) >= 3 - - def test_error_message_lists_all_problems(self): - with pytest.raises(SagaSchemaError, match="validation error"): - self.validator.validate_or_raise({"name": ""}) - - -# ── Integration with SagaDSLParser ────────────────────────────── - - -class TestParserSchemaIntegration: - def test_parser_without_schema_validation(self): - """Default parser does not enforce schema validation.""" - parser = SagaDSLParser() - defn = parser.parse( - { - "name": "x", - "session_id": "s1", - "steps": [{"id": "s", "action_id": "a", "agent": "x"}], - } - ) - assert defn.name == "x" - - def test_parser_with_schema_validation_valid(self): - parser = SagaDSLParser(schema_validation=True) - defn = parser.parse(_valid_definition()) - assert defn.name == "test-saga" - - def test_parser_with_schema_validation_invalid(self): - parser = SagaDSLParser(schema_validation=True) - with pytest.raises(SagaSchemaError): - parser.parse({"name": "", "session_id": "s", "steps": []}) - - def test_parser_existing_behavior_preserved(self): - """Existing SagaDSLError still raised for structural issues.""" - parser = SagaDSLParser() - with pytest.raises(SagaDSLError, match="name"): - parser.parse( - {"session_id": "s", "steps": [{"id": "s", "action_id": "a", "agent": "x"}]} - ) diff --git a/agent-governance-python/agent-hypervisor/tests/unit/test_session_security.py b/agent-governance-python/agent-hypervisor/tests/unit/test_session_security.py index 00644ab1f..3221d8b3f 100644 --- a/agent-governance-python/agent-hypervisor/tests/unit/test_session_security.py +++ b/agent-governance-python/agent-hypervisor/tests/unit/test_session_security.py @@ -1,6 +1,6 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT License. -"""Tests for version counters, resource locks, isolation levels, rate limiter, and kill switch.""" +"""Tests for version counters, isolation levels, rate limiter, and kill switch.""" import pytest @@ -15,14 +15,9 @@ RateLimitExceeded, TokenBucket, ) -from hypervisor.session.intent_locks import ( - IntentLockManager, - LockIntent, -) from hypervisor.session.isolation import IsolationLevel from hypervisor.session.vector_clock import ( VectorClock, - VectorClockManager, ) # ── Version Counter Tests ────────────────────────────────────────── @@ -68,109 +63,6 @@ def test_copy(self): assert vc.get("a1") == 1 # original unchanged -class TestVectorClockManager: - @pytest.mark.skip("Feature not available in Public Preview") - def test_read_updates_agent_clock(self): - pass - - def test_write_advances_path_clock(self): - mgr = VectorClockManager() - mgr.write("/data/file1", "a1") - path_clock = mgr.get_path_clock("/data/file1") - assert path_clock.get("a1") == 1 - - @pytest.mark.skip("Feature not available in Public Preview") - def test_causal_violation_detected(self): - pass - - def test_read_then_write_no_violation(self): - mgr = VectorClockManager() - mgr.write("/data/file1", "a1") - mgr.read("/data/file1", "a2") # a2 catches up - mgr.write("/data/file1", "a2", strict=True) # OK: a2 has seen latest - - def test_non_strict_allows_concurrent(self): - mgr = VectorClockManager() - mgr.write("/data/file1", "a1", strict=False) - mgr.write("/data/file1", "a2", strict=False) - assert mgr.tracked_paths == 1 - - def test_conflict_count(self): - mgr = VectorClockManager() - assert mgr.conflict_count == 0 - - -# ── Resource Lock Tests ─────────────────────────────────────────── - - -class TestIntentLocks: - def test_acquire_read_locks(self): - mgr = IntentLockManager() - l1 = mgr.acquire("a1", "s1", "/data/file", LockIntent.READ) - l2 = mgr.acquire("a2", "s1", "/data/file", LockIntent.READ) - assert l1.is_active - assert l2.is_active - - @pytest.mark.skip("Feature not available in Public Preview") - def test_write_conflicts_with_read(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_write_conflicts_with_write(self): - pass - - @pytest.mark.skip("Feature not available in Public Preview") - def test_exclusive_conflicts_with_read(self): - pass - - def test_same_agent_no_conflict(self): - mgr = IntentLockManager() - mgr.acquire("a1", "s1", "/data/file", LockIntent.WRITE) - # Same agent, different lock type — allowed - mgr.acquire("a1", "s1", "/data/file", LockIntent.READ) - - def test_release_lock(self): - mgr = IntentLockManager() - lock = mgr.acquire("a1", "s1", "/data/file", LockIntent.WRITE) - mgr.release(lock.lock_id) - # Now another agent can acquire - mgr.acquire("a2", "s1", "/data/file", LockIntent.WRITE) - - def test_release_agent_locks(self): - mgr = IntentLockManager() - mgr.acquire("a1", "s1", "/file1", LockIntent.WRITE) - mgr.acquire("a1", "s1", "/file2", LockIntent.EXCLUSIVE) - count = mgr.release_agent_locks("a1", "s1") - assert count == 2 - assert mgr.active_lock_count == 0 - - def test_release_session_locks(self): - mgr = IntentLockManager() - mgr.acquire("a1", "s1", "/file1", LockIntent.READ) - mgr.acquire("a2", "s1", "/file2", LockIntent.WRITE) - count = mgr.release_session_locks("s1") - assert count == 2 - - def test_contention_points(self): - mgr = IntentLockManager() - mgr.acquire("a1", "s1", "/shared", LockIntent.READ) - mgr.acquire("a2", "s1", "/shared", LockIntent.READ) - points = mgr.contention_points - # Public Preview: no contention detection - assert points == [] - - @pytest.mark.skip("Feature not available in Public Preview") - def test_deadlock_detection(self): - pass - - def test_get_agent_locks(self): - mgr = IntentLockManager() - mgr.acquire("a1", "s1", "/f1", LockIntent.READ) - mgr.acquire("a1", "s1", "/f2", LockIntent.WRITE) - locks = mgr.get_agent_locks("a1", "s1") - assert len(locks) == 2 - - # ── Isolation Level Tests ────────────────────────────────────── @@ -178,21 +70,18 @@ class TestIsolationLevels: def test_snapshot_properties(self): level = IsolationLevel.SNAPSHOT assert not level.requires_vector_clocks - assert not level.requires_intent_locks assert level.allows_concurrent_writes assert level.coordination_cost == "low" def test_read_committed_properties(self): level = IsolationLevel.READ_COMMITTED assert not level.requires_vector_clocks - assert not level.requires_intent_locks assert level.allows_concurrent_writes assert level.coordination_cost == "medium" def test_serializable_properties(self): level = IsolationLevel.SERIALIZABLE assert level.requires_vector_clocks - assert level.requires_intent_locks assert not level.allows_concurrent_writes assert level.coordination_cost == "high" diff --git a/agent-governance-python/agent-hypervisor/tests/unit/test_slashing.py b/agent-governance-python/agent-hypervisor/tests/unit/test_slashing.py deleted file mode 100644 index f1513eff3..000000000 --- a/agent-governance-python/agent-hypervisor/tests/unit/test_slashing.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Tests for the penalty engine.""" - -import pytest - -from hypervisor.liability.slashing import SlashingEngine -from hypervisor.liability.vouching import VouchingEngine - - -class TestSlashingEngine: - def setup_method(self): - self.vouching = VouchingEngine() - self.slashing = SlashingEngine(self.vouching) - self.session = "session:test-penalize" - - @pytest.mark.skip("Feature not available in Public Preview") - def test_vouchee_blacklisted(self): - """Sponsored agent σ → 0 on violation.""" - scores = {"did:mesh:bad": 0.7, "did:mesh:good": 0.9} - self.vouching.vouch("did:mesh:good", "did:mesh:bad", self.session, 0.9) - - result = self.slashing.slash( - vouchee_did="did:mesh:bad", - session_id=self.session, - vouchee_sigma=0.7, - risk_weight=0.5, - reason="Policy violation", - agent_scores=scores, - ) - - assert scores["did:mesh:bad"] == 0.0 - assert result.vouchee_sigma_after == 0.0 - - @pytest.mark.skip("Feature not available in Public Preview") - def test_voucher_collateral_clip(self): - """σ_new = σ_old × (1 - ω)""" - scores = {"did:mesh:bad": 0.5, "did:mesh:sponsor": 0.9} - self.vouching.vouch("did:mesh:sponsor", "did:mesh:bad", self.session, 0.9) - - result = self.slashing.slash( - vouchee_did="did:mesh:bad", - session_id=self.session, - vouchee_sigma=0.5, - risk_weight=0.5, - reason="Hallucination", - agent_scores=scores, - ) - - # σ_new = 0.9 * (1 - 0.5) = 0.45 - assert len(result.voucher_clips) == 1 - clip = result.voucher_clips[0] - assert abs(clip.sigma_before - 0.9) < 1e-9 - assert abs(clip.sigma_after - 0.45) < 1e-9 - assert abs(scores["did:mesh:sponsor"] - 0.45) < 1e-9 - - def test_sigma_floor_respected(self): - """Penalty should not reduce below SIGMA_FLOOR.""" - scores = {"did:mesh:bad": 0.1, "did:mesh:sponsor": 0.06} - self.vouching.vouch("did:mesh:sponsor", "did:mesh:bad", self.session, 0.8) - - self.slashing.slash( - vouchee_did="did:mesh:bad", - session_id=self.session, - vouchee_sigma=0.1, - risk_weight=0.95, - reason="Fraud", - agent_scores=scores, - ) - - assert scores["did:mesh:sponsor"] >= SlashingEngine.SIGMA_FLOOR - - @pytest.mark.skip("Feature not available in Public Preview") - def test_multiple_vouchers_all_clipped(self): - """All sponsors for a sponsored agent get clipped.""" - scores = {"did:mesh:bad": 0.4, "did:mesh:v1": 0.8, "did:mesh:v2": 0.7} - self.vouching.vouch("did:mesh:v1", "did:mesh:bad", self.session, 0.8) - self.vouching.vouch("did:mesh:v2", "did:mesh:bad", self.session, 0.7) - - result = self.slashing.slash( - vouchee_did="did:mesh:bad", - session_id=self.session, - vouchee_sigma=0.4, - risk_weight=0.3, - reason="Mute triggered", - agent_scores=scores, - ) - - assert len(result.voucher_clips) == 2 - # v1: 0.8 * (1-0.3) = 0.56, v2: 0.7 * (1-0.3) = 0.49 - assert abs(scores["did:mesh:v1"] - 0.56) < 1e-9 - assert abs(scores["did:mesh:v2"] - 0.49) < 1e-9 diff --git a/agent-governance-python/agent-hypervisor/tutorials/execution-rings-workflow/README.md b/agent-governance-python/agent-hypervisor/tutorials/execution-rings-workflow/README.md index 077f10a07..c1b320685 100644 --- a/agent-governance-python/agent-hypervisor/tutorials/execution-rings-workflow/README.md +++ b/agent-governance-python/agent-hypervisor/tutorials/execution-rings-workflow/README.md @@ -238,5 +238,4 @@ print(result.kill_id) # "kill:a1b2c3d4" - Integrate with the full `Hypervisor` orchestrator for session-managed rings - Use `RingBreachDetector` to automatically detect anomalous access patterns - Combine with `SagaOrchestrator` for transactional multi-step workflows -- Add `LiabilityMatrix` to track agent trust scores alongside ring assignments - Explore the `ActionClassifier` for automatic ring-level classification diff --git a/agent-governance-python/agent-hypervisor/tutorials/saga-compensation/README.md b/agent-governance-python/agent-hypervisor/tutorials/saga-compensation/README.md index 3e653d0c2..2973e6500 100644 --- a/agent-governance-python/agent-hypervisor/tutorials/saga-compensation/README.md +++ b/agent-governance-python/agent-hypervisor/tutorials/saga-compensation/README.md @@ -163,4 +163,4 @@ compensator for each step that has an `undo_api`. - Add retry logic by setting `max_retries` on critical steps - Integrate real service calls instead of mocks - Persist saga state with `saga.to_dict()` for crash recovery -- Explore the `SagaDSLParser` for declarative saga definitions +- Use `saga.to_dict()` to persist state for crash recovery diff --git a/agent-governance-python/agent-os/README.md b/agent-governance-python/agent-os/README.md index b8328dad2..ad1b05a69 100644 --- a/agent-governance-python/agent-os/README.md +++ b/agent-governance-python/agent-os/README.md @@ -414,7 +414,7 @@ agent-governance-python/agent-os/ | [`observability`](modules/observability/) | 3 | `agent-os-observability` | Prometheus metrics + OpenTelemetry tracing | ⚠️ No tests | | [`nexus`](modules/nexus/) | — | *Not published* | Trust exchange network | 🔬 Prototype | | [`mcp-kernel-server`](modules/mcp-kernel-server/) | Int | `mcp-kernel-server` | MCP server for Claude Desktop | ⚠️ No tests | -| [**`runtime`**](https://github.com/microsoft/agent-governance-toolkit) | **⭐** | `agentmesh-runtime` | **Execution supervisor — Execution Rings, Joint Liability, Saga Orchestrator** ([own repo](https://github.com/microsoft/agent-governance-toolkit)) | **✅ 184 tests** | +| [**`runtime`**](https://github.com/microsoft/agent-governance-toolkit) | **⭐** | `agentmesh-runtime` | **Execution supervisor — Execution Rings, Saga Orchestrator, Delta Audit** ([own repo](https://github.com/microsoft/agent-governance-toolkit)) | **✅ 184 tests** | --- @@ -436,9 +436,9 @@ Just as OS runtimes isolate execution environments and enforce resource boundari │ Ring 3 (Sandbox) ← Default for unknown agents │ │ │ │ ┌──────────┐ ┌───────────┐ ┌────────────────────────┐ │ -│ │ Joint │ │ Semantic │ │ Hash-Chained │ │ -│ │ Liability │ │ Saga │ │ Delta Audit Trail │ │ -│ │ Engine │ │ Orchestr. │ │ (Tamper-Evident) │ │ +│ │ Execution│ │ Saga │ │ Hash-Chained │ │ +│ │ Rings │ │Orchestrator│ │ Delta Audit Trail │ │ +│ │ │ │ │ │ (Tamper-Evident) │ │ │ └──────────┘ └───────────┘ └────────────────────────┘ │ └────────────────────────────────────────────────────────────┘ ``` @@ -448,9 +448,8 @@ Just as OS runtimes isolate execution environments and enforce resource boundari | Feature | Description | Latency | |---------|-------------|---------| | **Execution Rings** | 4-level privilege model (Ring 0–3) based on trust score | **0.3μs** | -| **Joint Liability** | High-trust agents vouch for low-trust agents with bonded reputation | **7μs** | | **Saga Orchestrator** | Multi-step transactions with timeout, retry, and auto-compensation | **151μs** | -| **Delta Audit** | Hash-chained semantic diffs with blockchain commitment | **27μs** | +| **Delta Audit** | Hash-chained semantic diffs | **27μs** | | **Full Pipeline** | Session + join + audit + saga + terminate | **268μs** | ### Quick Start diff --git a/agent-governance-python/agent-os/modules/control-plane/acp-cli.py b/agent-governance-python/agent-os/modules/control-plane/acp-cli.py index 31d7b50a2..eccc73beb 100644 --- a/agent-governance-python/agent-os/modules/control-plane/acp-cli.py +++ b/agent-governance-python/agent-os/modules/control-plane/acp-cli.py @@ -217,7 +217,7 @@ def cmd_benchmark_run(args): print(json.dumps({"status": "running", "benchmark": "safety"}, indent=2)) else: print("Running safety benchmark...") - print("This would execute benchmark/red_team_dataset.py") + print("This would execute benchmarks/red_team_dataset.py") print("(Implementation in progress)") diff --git a/agent-governance-python/agent-os/modules/control-plane/benchmark.py b/agent-governance-python/agent-os/modules/control-plane/benchmark.py index b58ef9603..2cef648fc 100644 --- a/agent-governance-python/agent-os/modules/control-plane/benchmark.py +++ b/agent-governance-python/agent-os/modules/control-plane/benchmark.py @@ -21,7 +21,7 @@ # Add parent directory to path for imports sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) -from benchmark.red_team_dataset import ( +from benchmarks.red_team_dataset import ( get_all_prompts, get_prompts_by_category, get_dataset_stats, diff --git a/agent-governance-python/agent-os/modules/control-plane/benchmark/README.md b/agent-governance-python/agent-os/modules/control-plane/benchmarks/README.md similarity index 99% rename from agent-governance-python/agent-os/modules/control-plane/benchmark/README.md rename to agent-governance-python/agent-os/modules/control-plane/benchmarks/README.md index c40053fca..bf41c8906 100644 --- a/agent-governance-python/agent-os/modules/control-plane/benchmark/README.md +++ b/agent-governance-python/agent-os/modules/control-plane/benchmarks/README.md @@ -25,7 +25,7 @@ The red team dataset (60 prompts) is available both locally and on Hugging Face: ### Local Access ```python -from benchmark.red_team_dataset import get_all_prompts, get_dataset_stats +from benchmarks.red_team_dataset import get_all_prompts, get_dataset_stats # Get all prompts prompts = get_all_prompts() diff --git a/agent-governance-python/agent-os/modules/control-plane/benchmark/__init__.py b/agent-governance-python/agent-os/modules/control-plane/benchmarks/__init__.py similarity index 100% rename from agent-governance-python/agent-os/modules/control-plane/benchmark/__init__.py rename to agent-governance-python/agent-os/modules/control-plane/benchmarks/__init__.py diff --git a/agent-governance-python/agent-os/modules/control-plane/benchmark/red_team_dataset.py b/agent-governance-python/agent-os/modules/control-plane/benchmarks/red_team_dataset.py similarity index 100% rename from agent-governance-python/agent-os/modules/control-plane/benchmark/red_team_dataset.py rename to agent-governance-python/agent-os/modules/control-plane/benchmarks/red_team_dataset.py diff --git a/agent-governance-python/agent-os/modules/control-plane/examples/benchmark_demo.py b/agent-governance-python/agent-os/modules/control-plane/examples/benchmark_demo.py index bb4c974c5..90ef10df5 100644 --- a/agent-governance-python/agent-os/modules/control-plane/examples/benchmark_demo.py +++ b/agent-governance-python/agent-os/modules/control-plane/examples/benchmark_demo.py @@ -14,7 +14,7 @@ # Add parent directory to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) -from benchmark.red_team_dataset import ( +from benchmarks.red_team_dataset import ( get_prompts_by_category, PromptCategory, get_dataset_stats diff --git a/agent-governance-python/agent-os/modules/control-plane/scripts/upload_dataset_to_hf.py b/agent-governance-python/agent-os/modules/control-plane/scripts/upload_dataset_to_hf.py index f86781bbf..c6446a3a3 100644 --- a/agent-governance-python/agent-os/modules/control-plane/scripts/upload_dataset_to_hf.py +++ b/agent-governance-python/agent-os/modules/control-plane/scripts/upload_dataset_to_hf.py @@ -25,7 +25,7 @@ # Add parent directory to path for imports sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from benchmark.red_team_dataset import ( +from benchmarks.red_team_dataset import ( get_all_prompts, get_dataset_stats, PromptCategory diff --git a/agent-governance-python/agent-os/modules/control-plane/tests/test_benchmark.py b/agent-governance-python/agent-os/modules/control-plane/tests/test_benchmark.py index 07a1c78ce..c9ba8d600 100644 --- a/agent-governance-python/agent-os/modules/control-plane/tests/test_benchmark.py +++ b/agent-governance-python/agent-os/modules/control-plane/tests/test_benchmark.py @@ -11,7 +11,7 @@ # Add parent directory to path sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) -from benchmark.red_team_dataset import ( +from benchmarks.red_team_dataset import ( get_all_prompts, get_prompts_by_category, get_dataset_stats, diff --git a/agent-governance-python/agent-os/tests/test_spec_audit_compliance_conformance.py b/agent-governance-python/agent-os/tests/test_spec_audit_compliance_conformance.py index 18fda8991..e255dc871 100644 --- a/agent-governance-python/agent-os/tests/test_spec_audit_compliance_conformance.py +++ b/agent-governance-python/agent-os/tests/test_spec_audit_compliance_conformance.py @@ -99,7 +99,6 @@ # --------------------------------------------------------------------------- try: from hypervisor.audit.delta import DeltaEngine, SemanticDelta, VFSChange - from hypervisor.audit.commitment import CommitmentEngine, CommitmentRecord from hypervisor.observability.event_bus import ( EventType, HypervisorEvent, @@ -1087,10 +1086,10 @@ def test_bom_completeness_score_default(self): # =================================================================== -# S18 -- Hypervisor Audit (Delta + Commitment) +# S18 -- Hypervisor Audit (Delta) # =================================================================== class TestHypervisorAudit(unittest.TestCase): - """Conformance tests for hypervisor audit delta and commitment (spec S18).""" + """Conformance tests for hypervisor audit delta (spec S18).""" def test_vfs_change_fields(self): """S18.1 -- VFSChange must have path, operation, content_hash fields.""" @@ -1138,56 +1137,6 @@ def test_delta_chain_verification(self): self.assertTrue(valid) self.assertIsNone(err) - def test_commitment_engine_commit(self): - """S18.7 -- CommitmentEngine.commit must return a CommitmentRecord.""" - engine = CommitmentEngine() - record = engine.commit( - session_id="sess1", - hash_chain_root="abc123", - participant_dids=["did:a1"], - delta_count=5, - ) - self.assertIsInstance(record, CommitmentRecord) - self.assertEqual(record.session_id, "sess1") - - def test_commitment_verify(self): - """S18.8 -- CommitmentEngine.verify must return bool.""" - engine = CommitmentEngine() - engine.commit( - session_id="sess1", - hash_chain_root="abc", - participant_dids=["did:a"], - delta_count=1, - ) - result = engine.verify(session_id="sess1", expected_root="abc") - self.assertIsInstance(result, bool) - - def test_commitment_get(self): - """S18.9 -- get_commitment must return record or None.""" - engine = CommitmentEngine() - engine.commit( - session_id="sess_get", - hash_chain_root="root", - participant_dids=["did:a"], - delta_count=1, - ) - record = engine.get_commitment("sess_get") - self.assertIsNotNone(record) - self.assertEqual(record.session_id, "sess_get") - - def test_commitment_batch(self): - """S18.10 -- queue_for_batch and flush_batch must work.""" - engine = CommitmentEngine() - record = engine.commit( - session_id="batch1", - hash_chain_root="root1", - participant_dids=["did:a"], - delta_count=1, - ) - engine.queue_for_batch(record) - flushed = engine.flush_batch() - self.assertIsInstance(flushed, list) - # =================================================================== # S18b -- Hypervisor Event Bus @@ -1494,25 +1443,6 @@ def test_agent_did_consistent_across_components(self): ) self.assertEqual(mesh_entry.agent_did, delta.agent_did) - def test_audit_log_and_commitment_chain(self): - """S21.4 -- AuditLog entries must link to CommitmentEngine via hash chain.""" - log = AuditLog() - log.log(event_type="action", agent_did="did:a1", action="read") - log.log(event_type="action", agent_did="did:a1", action="write") - valid, err = log.verify_integrity() - self.assertTrue(valid) - - commitment_engine = CommitmentEngine() - # In a real flow, the root hash would come from the Merkle chain - # Here we verify the commitment engine accepts a hash and session - record = commitment_engine.commit( - session_id="audit_session", - hash_chain_root="simulated_root_hash", - participant_dids=["did:a1"], - delta_count=2, - ) - self.assertIsInstance(record, CommitmentRecord) - def test_event_kind_maps_to_violation_category(self): """S21.5 -- GovernanceEventKind and ViolationCategory must be independently usable.""" event = GovernanceEvent( diff --git a/agent-governance-python/agent-runtime/README.md b/agent-governance-python/agent-runtime/README.md index ea4c94b31..d277d3857 100644 --- a/agent-governance-python/agent-runtime/README.md +++ b/agent-governance-python/agent-runtime/README.md @@ -33,7 +33,6 @@ session level: - **Shared Sessions** — Multi-agent session management with consistency modes (strict, eventual, causal) - **Saga Orchestration** — Compensating transactions for multi-step agent workflows - **Kill Switch** — Immediate termination with audit trail and blast radius containment -- **Joint Liability** — Attribution tracking across multi-agent collaborations - **Audit Trails** — Hash-chained, append-only execution logs ## Quick Start diff --git a/agent-governance-python/agent-runtime/src/agent_runtime/__init__.py b/agent-governance-python/agent-runtime/src/agent_runtime/__init__.py index e08bdc52e..9ba9909a0 100644 --- a/agent-governance-python/agent-runtime/src/agent_runtime/__init__.py +++ b/agent-governance-python/agent-runtime/src/agent_runtime/__init__.py @@ -39,24 +39,8 @@ VFSEdit, VFSPermissionError, VectorClock, - VectorClockManager, CausalViolationError, - IntentLockManager, - LockIntent, - LockContentionError, - DeadlockError, IsolationLevel, - # Liability - VouchRecord, - VouchingEngine, - SlashingEngine, - LiabilityMatrix, - CausalAttributor, - AttributionResult, - QuarantineManager, - QuarantineReason, - LiabilityLedger, - LedgerEntryType, # Rings RingEnforcer, ActionClassifier, @@ -72,16 +56,8 @@ SagaTimeoutError, SagaState, StepState, - FanOutOrchestrator, - FanOutPolicy, - CheckpointManager, - SemanticCheckpoint, - SagaDSLParser, - SagaDefinition, # Audit DeltaEngine, - CommitmentEngine, - EphemeralGC, # Verification TransactionHistoryVerifier, # Observability @@ -119,23 +95,8 @@ "VFSEdit", "VFSPermissionError", "VectorClock", - "VectorClockManager", "CausalViolationError", - "IntentLockManager", - "LockIntent", - "LockContentionError", - "DeadlockError", "IsolationLevel", - "VouchRecord", - "VouchingEngine", - "SlashingEngine", - "LiabilityMatrix", - "CausalAttributor", - "AttributionResult", - "QuarantineManager", - "QuarantineReason", - "LiabilityLedger", - "LedgerEntryType", "RingEnforcer", "ActionClassifier", "RingElevationManager", @@ -148,15 +109,7 @@ "SagaTimeoutError", "SagaState", "StepState", - "FanOutOrchestrator", - "FanOutPolicy", - "CheckpointManager", - "SemanticCheckpoint", - "SagaDSLParser", - "SagaDefinition", "DeltaEngine", - "CommitmentEngine", - "EphemeralGC", "TransactionHistoryVerifier", "HypervisorEventBus", "EventType", diff --git a/agent-governance-python/agent-runtime/tests/test_runtime_imports.py b/agent-governance-python/agent-runtime/tests/test_runtime_imports.py index 26ef634b1..1093bd766 100644 --- a/agent-governance-python/agent-runtime/tests/test_runtime_imports.py +++ b/agent-governance-python/agent-runtime/tests/test_runtime_imports.py @@ -30,24 +30,8 @@ "VFSEdit", "VFSPermissionError", "VectorClock", - "VectorClockManager", "CausalViolationError", - "IntentLockManager", - "LockIntent", - "LockContentionError", - "DeadlockError", "IsolationLevel", - # Liability - "VouchRecord", - "VouchingEngine", - "SlashingEngine", - "LiabilityMatrix", - "CausalAttributor", - "AttributionResult", - "QuarantineManager", - "QuarantineReason", - "LiabilityLedger", - "LedgerEntryType", # Rings "RingEnforcer", "ActionClassifier", @@ -63,16 +47,8 @@ "SagaTimeoutError", "SagaState", "StepState", - "FanOutOrchestrator", - "FanOutPolicy", - "CheckpointManager", - "SemanticCheckpoint", - "SagaDSLParser", - "SagaDefinition", # Audit "DeltaEngine", - "CommitmentEngine", - "EphemeralGC", # Verification "TransactionHistoryVerifier", # Observability @@ -144,20 +120,6 @@ def test_instantiate_vector_clock(): assert vc is not None -def test_instantiate_vector_clock_manager(): - from agent_runtime import VectorClockManager - - mgr = VectorClockManager() - assert mgr is not None - - -def test_instantiate_intent_lock_manager(): - from agent_runtime import IntentLockManager - - mgr = IntentLockManager() - assert mgr is not None - - def test_enum_consistency_mode(): from agent_runtime import ConsistencyMode @@ -186,13 +148,6 @@ def test_enum_isolation_level(): assert len(IsolationLevel.__members__) > 0 -def test_enum_quarantine_reason(): - from agent_runtime import QuarantineReason - - assert hasattr(QuarantineReason, "__members__") - assert len(QuarantineReason.__members__) > 0 - - def test_enum_breach_severity(): from agent_runtime import BreachSeverity @@ -207,13 +162,6 @@ def test_enum_event_type(): assert len(EventType.__members__) > 0 -def test_enum_ledger_entry_type(): - from agent_runtime import LedgerEntryType - - assert hasattr(LedgerEntryType, "__members__") - assert len(LedgerEntryType.__members__) > 0 - - def test_enum_saga_state(): from agent_runtime import SagaState @@ -244,8 +192,6 @@ def test_exception_classes_are_exceptions(): """Error / exception symbols should be subclasses of Exception.""" from agent_runtime import ( CausalViolationError, - DeadlockError, - LockContentionError, RateLimitExceeded, SagaTimeoutError, VFSPermissionError, @@ -253,8 +199,6 @@ def test_exception_classes_are_exceptions(): for exc_cls in [ CausalViolationError, - DeadlockError, - LockContentionError, RateLimitExceeded, SagaTimeoutError, VFSPermissionError, diff --git a/agent-governance-python/agt-cli/pyproject.toml b/agent-governance-python/agt-cli/pyproject.toml deleted file mode 100644 index 1186c509e..000000000 --- a/agent-governance-python/agt-cli/pyproject.toml +++ /dev/null @@ -1,20 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "agent-governance-toolkit-cli" -version = "5.0.0" -description = "CLI tools and infrastructure servers for the Agent Governance Toolkit." -license = {text = "MIT"} -requires-python = ">=3.11" -authors = [ - {name = "Microsoft Corporation", email = "agentgovtoolkit@microsoft.com"}, -] - -[project.urls] -Homepage = "https://github.com/microsoft/agent-governance-toolkit" -Changelog = "https://github.com/microsoft/agent-governance-toolkit/releases" - -[tool.hatch.build.targets.wheel] -packages = [] diff --git a/agent-governance-python/agt-core/pyproject.toml b/agent-governance-python/agt-core/pyproject.toml deleted file mode 100644 index c27366315..000000000 --- a/agent-governance-python/agt-core/pyproject.toml +++ /dev/null @@ -1,20 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "agent-governance-toolkit-core" -version = "5.0.0" -description = "Consolidated runtime kernel: policy engine, trust scoring, audit, identity, and execution rings." -license = {text = "MIT"} -requires-python = ">=3.11" -authors = [ - {name = "Microsoft Corporation", email = "agentgovtoolkit@microsoft.com"}, -] - -[project.urls] -Homepage = "https://github.com/microsoft/agent-governance-toolkit" -Changelog = "https://github.com/microsoft/agent-governance-toolkit/releases" - -[tool.hatch.build.targets.wheel] -packages = [] diff --git a/agent-governance-python/agt-integrations/pyproject.toml b/agent-governance-python/agt-integrations/pyproject.toml deleted file mode 100644 index 8c5a6c55c..000000000 --- a/agent-governance-python/agt-integrations/pyproject.toml +++ /dev/null @@ -1,67 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "agent-governance-toolkit-integrations" -version = "5.0.0" -description = "Framework integration adapters for the Agent Governance Toolkit." -license = {text = "MIT"} -requires-python = ">=3.11" -authors = [ - {name = "Microsoft Corporation", email = "agentgovtoolkit@microsoft.com"}, -] - -[project.optional-dependencies] -langchain = [ - "langchain-core>=0.1.0", -] -crewai = [ - "crewai>=0.1.0", -] -openai-agents = [ - "openai-agents>=0.1.0", -] -langgraph = [ - "langgraph>=0.1.0", -] -llamaindex = [ - "llama-index-core>=0.1.0", -] -haystack = [ - "haystack-ai>=2.0.0", -] -pydantic-ai = [ - "pydantic-ai>=0.1.0", -] -flowise = [ - "flowise>=1.0.0", -] -langflow = [ - "langflow>=1.0.0", -] -adk = [ - "google-adk>=0.1.0", -] -avp = [ - "boto3>=1.0.0", -] -cedarling = [ - "cedarpy>=4.0.0", -] -nostr-wot = [ - "nostr-sdk>=0.1.0", -] -structural-authz = [ - "oso>=0.1.0", -] -openshell = [ - -] - -[project.urls] -Homepage = "https://github.com/microsoft/agent-governance-toolkit" -Changelog = "https://github.com/microsoft/agent-governance-toolkit/releases" - -[tool.hatch.build.targets.wheel] -packages = [] diff --git a/agent-governance-python/agt-protocols/pyproject.toml b/agent-governance-python/agt-protocols/pyproject.toml deleted file mode 100644 index e67db8c22..000000000 --- a/agent-governance-python/agt-protocols/pyproject.toml +++ /dev/null @@ -1,20 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "agent-governance-toolkit-protocols" -version = "5.0.0" -description = "Protocol implementations for the Agent Governance Toolkit." -license = {text = "MIT"} -requires-python = ">=3.11" -authors = [ - {name = "Microsoft Corporation", email = "agentgovtoolkit@microsoft.com"}, -] - -[project.urls] -Homepage = "https://github.com/microsoft/agent-governance-toolkit" -Changelog = "https://github.com/microsoft/agent-governance-toolkit/releases" - -[tool.hatch.build.targets.wheel] -packages = [] diff --git a/agent-governance-python/benchmarks/governance_overhead.py b/agent-governance-python/benchmarks/governance_overhead.py index 8257dedb4..9d0c3e543 100644 --- a/agent-governance-python/benchmarks/governance_overhead.py +++ b/agent-governance-python/benchmarks/governance_overhead.py @@ -76,7 +76,6 @@ from hypervisor.rings.enforcer import RingEnforcer from hypervisor.models import ActionDescriptor, ReversibilityLevel from hypervisor.audit.delta import DeltaEngine, VFSChange -from hypervisor.liability.vouching import VouchingEngine # ═══════════════════════════════════════════════════════════════════════════ diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 6ba649a6a..37c2b9f3c 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -116,6 +116,6 @@ Default score for new agents: **500** (Standard tier). Score changes are driven ## Benchmark Methodology -Policy enforcement benchmarks are measured on a **30-scenario test suite** covering the OWASP Agentic Top 10 risk categories. Results (e.g., policy violation rates, latency) are specific to this test suite and should not be interpreted as universal guarantees. See [`agent-governance-python/agent-os/modules/control-plane/benchmark/`](../agent-governance-python/agent-os/modules/control-plane/benchmark/) for methodology, datasets, and reproduction instructions. +Policy enforcement benchmarks are measured on a **30-scenario test suite** covering the OWASP Agentic Top 10 risk categories. Results (e.g., policy violation rates, latency) are specific to this test suite and should not be interpreted as universal guarantees. See [`agent-governance-python/agent-os/modules/control-plane/benchmarks/`](../agent-governance-python/agent-os/modules/control-plane/benchmarks/) for methodology, datasets, and reproduction instructions. Full benchmark results: **[BENCHMARKS.md](../BENCHMARKS.md)** diff --git a/docs/FAQ.md b/docs/FAQ.md index 7c019578c..abf508895 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -493,7 +493,7 @@ Key controls: | **Session Isolation** | Multi-agent sessions with VFS namespacing and DID-bound identity. | | **Saga Orchestration** | Multi-step transactions with automatic compensation (rollback). | | **Kill Switch** | Immediate or graceful termination of runaway agents with audit trail. | -| **Joint Liability** | Attribution tracking across multi-agent collaborations. Bonded reputation with collateral slashing. | +| **Saga Compensation** | Multi-step workflow rollback with hash-chained audit evidence. | | **Rate Limiting** | Per-agent rate limits to prevent resource exhaustion. | | **Hash-Chained Audit Trail** | Tamper-evident, append-only execution logs. | | **Temporary Ring Elevation (Sudo)** | Agents can request temporary privilege escalation with a TTL that auto-expires. | @@ -684,7 +684,7 @@ See [Tutorial 40 — OTel Observability](tutorials/40-otel-observability.md) for | Agent OS | `agent-os-kernel` | Stateless policy engine — YAML, OPA/Rego, Cedar policies | | AgentMesh | `agentmesh-platform` | Trust, identity, governance — DID, Ed25519, trust scoring, protocol bridges | | Agent Runtime | `agentmesh-runtime` | Execution rings, saga orchestration, kill switch (re-exports from agent-hypervisor) | -| Agent Hypervisor | `agent-hypervisor` | Canonical runtime — session isolation, privilege rings, joint liability | +| Agent Hypervisor | `agent-hypervisor` | Canonical runtime — session isolation, privilege rings, saga compensation | | Agent SRE | `agent-sre` | SLOs, error budgets, circuit breakers, chaos engineering, replay debugging | | Agent Marketplace | `agentmesh-marketplace` | Plugin lifecycle — Ed25519 signing, trust-tiered capability gating | | Agent Lightning | `agentmesh-lightning` | RL training governance — policy-enforced runners, reward shaping | diff --git a/docs/case-studies/TEMPLATE.md b/docs/case-studies/TEMPLATE.md index 6e3eb4551..01b4c94d1 100644 --- a/docs/case-studies/TEMPLATE.md +++ b/docs/case-studies/TEMPLATE.md @@ -184,7 +184,7 @@ Agent Runtime's `SessionVFS` provides per-agent isolated filesystem views within Document the automated detection-to-response pipeline for sandboxing violations: - **`RingBreachDetector`**: fires alerts when an agent attempts actions above its ring level — WARNING for a 1-ring gap, HIGH for a 2-ring gap, CRITICAL for a 3-ring gap (e.g., Ring 3 agent attempting a Ring 0 action) - **`KillSwitch`**: immediately terminates the violating agent and triggers saga compensation for all in-flight work; document which kill reasons are wired to automatic triggers (`RING_BREACH`, `RATE_LIMIT`, `BEHAVIORAL_DRIFT`) vs. require human confirmation (`MANUAL`) -- **`QuarantineManager`**: isolates a suspect agent without termination so in-flight saga state is preserved for forensic investigation +- **`KillSwitch`**: terminates a suspect agent and hands off or compensates in-flight saga work - **`AgentRateLimiter`**: enforces per-ring call quotas; exceeding the limit triggers a `RATE_LIMIT` kill reason rather than silently dropping requests #### Side-Channel Attack Mitigations @@ -333,7 +333,7 @@ Detection mechanisms: Immediate mitigation steps (target: <5 minutes from detection to containment): 1. Revoke the key in the vault system — revocation must propagate to all agents holding a cached copy of the public key -2. Quarantine the affected agent via `QuarantineManager` — halts all signing operations without destroying in-flight saga state +2. Terminate the affected agent via `KillSwitch` — halts signing operations and hands off or compensates in-flight saga work 3. Issue a DID deactivation event — downstream agents must re-verify on next connection and reject the deactivated DID 4. Rotate to a new Ed25519 keypair, generate a new DID, and re-register the agent in AgentMesh diff --git a/docs/case-studies/sample-ecommerce-customer-service.md b/docs/case-studies/sample-ecommerce-customer-service.md index e1b7f381e..63474ef19 100644 --- a/docs/case-studies/sample-ecommerce-customer-service.md +++ b/docs/case-studies/sample-ecommerce-customer-service.md @@ -253,7 +253,7 @@ Each agent's session context and customer data extracts are scoped to a per-DID - **`RingBreachDetector`**: WARNING (1-ring gap, e.g., order-status-agent attempting a Ring 1 refund API call), HIGH (2-ring gap), CRITICAL (3-ring gap). HIGH and CRITICAL trigger automatic kill - **`KillSwitch`**: automatic triggers for `RING_BREACH` (HIGH/CRITICAL), `RATE_LIMIT`, and `BEHAVIORAL_DRIFT`. **GDPR deferral exception**: if gdpr-compliance-agent holds an active Article 17 deletion in progress (deletion plan approved, execution underway), kill is deferred up to 120 seconds — a mid-execution kill would leave customer data partially erased across 11 systems with no saga compensation path, creating a GDPR Article 17 violation worse than the breach itself. The deletion completes, then the agent is terminated and the privacy team is notified. -- **`QuarantineManager`**: preferred response for returns-and-refund-agent anomalies (trust 720, closest to the 700 human-oversight threshold) — isolates the agent while in-flight refund sagas are handed to human reviewers +- **`KillSwitch`**: preferred response for returns-and-refund-agent anomalies when execution must stop immediately while in-flight refund sagas are handed to human reviewers #### Side-Channel Attack Mitigations @@ -393,7 +393,7 @@ Detection mechanisms: Immediate mitigation steps (target: <5 minutes from detection to containment): 1. Disable the GCP Secret Manager secret version for the affected agent — propagates to all agents holding a cached public key within <2 seconds via Secret Manager event notification -2. Quarantine the affected agent via `QuarantineManager` — preferred over kill switch for GDPR continuity: if compromise is detected during an active Article 17 deletion, quarantine preserves in-flight saga state for human review rather than leaving data partially erased across 11 systems +2. Use `KillSwitch` with saga handoff for the affected agent: if compromise is detected during an active Article 17 deletion, in-flight saga work is handed to human review rather than leaving data partially erased across 11 systems 3. Issue a DID deactivation event in AgentMesh — all peer agents reject delegations from the deactivated DID within one heartbeat cycle (~5 seconds); pending customer tickets routed to human escalation queue automatically 4. Provision a new Ed25519 keypair in GCP Cloud HSM, generate a new DID, and re-register the agent — requires privacy team lead and security officer dual approval; if gdpr-compliance-agent is the affected agent, assess whether any in-progress GDPR deletion was compromised and notify affected customers under GDPR Article 33 (72-hour DPA breach notification window) diff --git a/docs/case-studies/sample-healthcare-prior-authorization.md b/docs/case-studies/sample-healthcare-prior-authorization.md index 48bde7693..f49f4c934 100644 --- a/docs/case-studies/sample-healthcare-prior-authorization.md +++ b/docs/case-studies/sample-healthcare-prior-authorization.md @@ -260,7 +260,7 @@ CHP's sandboxing violation pipeline is designed around one constraint: **a breac - **`RingBreachDetector`**: fires on ring boundary violations — WARNING (1-ring gap, e.g., clinical-documentation-agent attempting a Ring 1 write), HIGH (2-ring gap), CRITICAL (3-ring gap, e.g., Ring 2 agent attempting Ring 0 emergency override). CRITICAL breaches page the on-call clinical informatics engineer within 30 seconds via Azure Monitor alert. - **`KillSwitch`** automatic triggers: `RING_BREACH` (severity HIGH or CRITICAL), `RATE_LIMIT` (after three consecutive violations within 60 seconds), `BEHAVIORAL_DRIFT` (agent approving treatments with active contraindications flagged by Micromedex). **Exception**: kill switch execution is deferred by up to 90 seconds if the breaching agent holds an active PROVISIONAL EMERGENCY AUTHORIZATION — the emergency authorization is completed and handed off first, then the agent is terminated and the incident escalated to clinical informatics. -- **`QuarantineManager`**: used for WARNING-severity breaches (e.g., Ring 2 agent reading a record type outside its normal scope). Agent is isolated; the authorization case it was processing is handed to a human reviewer; in-flight saga state is preserved for forensic review. Quarantine is the preferred response for clinical-documentation-agent anomalies because termination mid-workflow would leave an authorization in an incomplete state with no saga compensation possible against Epic EHR. +- **`KillSwitch`**: used for severe breaches, such as a Ring 2 agent reading a record type outside its normal scope. The authorization case it was processing is handed to a human reviewer and in-flight saga work is preserved for review or compensation. - **`AgentRateLimiter`**: a sudden spike in PHI read calls from clinical-documentation-agent (e.g., >500 Epic API calls in 60 seconds vs. normal 80–120) triggers a `RATE_LIMIT` kill reason. This pattern matches the Week 6 psychiatric records incident — bulk PHI access outside authorization scope. #### Side-Channel Attack Mitigations @@ -426,7 +426,7 @@ Detection mechanisms: Immediate mitigation steps (target: <5 minutes from detection to containment): 1. Revoke the key in Azure Key Vault — propagates to all agents holding a cached public key copy within <2 seconds via Key Vault event subscription -2. Quarantine the affected agent via `QuarantineManager` — halts all signing operations; in-flight clinical authorization sagas are preserved for human review, not abandoned, to protect patient care continuity +2. Terminate the affected agent via `KillSwitch` — halts signing operations; in-flight clinical authorization sagas are handed to human review, not abandoned, to protect patient care continuity 3. Issue a DID deactivation event in AgentMesh — all peer agents reject delegations from the deactivated DID on next IATP handshake (within one heartbeat cycle, ~5 seconds); clinical requests routed to human escalation queue automatically 4. Provision a new Ed25519 keypair in Key Vault HSM, generate a new DID, and re-register the agent in AgentMesh under the incident change control process (dual approval required per CHP's HIPAA security incident procedure §164.308(a)(6)) diff --git a/docs/compliance/atf-conformance-assessment.md b/docs/compliance/atf-conformance-assessment.md index 26b0f2371..b5b9083c9 100644 --- a/docs/compliance/atf-conformance-assessment.md +++ b/docs/compliance/atf-conformance-assessment.md @@ -116,7 +116,6 @@ Actions are attributed to agent identities, but naming conventions vary across p |-----------|----------| | Audit attribution | `agent-governance-python/agent-mesh/audit/merkle_chain.py` — `agent_did` field | | Hypervisor tracking | `agent-governance-python/agent-hypervisor/audit/delta.py` — `agent_did` per entry | -| Joint liability | `agent-governance-python/agent-hypervisor/liability/joint.py` — `AgentContribution` | **Gap:** Inconsistent field naming (`agent_id` vs `agent_did` vs `AgentId`) across packages. No shared `Attribution` model. diff --git a/docs/compliance/nist-ai-rmf-alignment.md b/docs/compliance/nist-ai-rmf-alignment.md index 983608c67..bb75d0d83 100644 --- a/docs/compliance/nist-ai-rmf-alignment.md +++ b/docs/compliance/nist-ai-rmf-alignment.md @@ -142,8 +142,7 @@ and conflict detection provide lifecycle management. Three enforcement modes **Coverage: ✅ FULLY ADDRESSED** -AGT provides cryptographic audit trails, Merkle hash chains, Shapley-value fault -attribution, and joint liability tracking. +AGT provides cryptographic audit trails, Merkle hash chains, DID-based attribution, and runtime verification signals. | Component | File | Key Class/Function | |-----------|------|--------------------| @@ -152,10 +151,6 @@ attribution, and joint liability tracking. | Flight recorder (IATP) | `agent-governance-python/agent-os/modules/iatp/iatp/telemetry/__init__.py:21` | `FlightRecorder` | | Flight recorder (Lightning) | `agent-governance-python/agent-lightning/src/agent_lightning_gov/emitter.py:56` | `FlightRecorderEmitter` | | Hypervisor audit | `agent-governance-python/agent-hypervisor/audit/delta.py` | `DeltaEngine` | -| Shapley attribution | `agent-governance-python/agent-hypervisor/src/hypervisor/liability/attribution.py` | Shapley-value fault attribution | -| Joint liability | `agent-governance-python/agent-hypervisor/src/hypervisor/liability/__init__.py` | Joint liability module | -| Liability ledger | `agent-governance-python/agent-hypervisor/src/hypervisor/liability/ledger.py` | Liability tracking | -| Quarantine system | `agent-governance-python/agent-hypervisor/src/hypervisor/liability/quarantine.py` | Agent quarantine | | RBAC | `agent-governance-python/agent-os/src/agent_os/integrations/rbac.py` | 4 roles: READER, WRITER, ADMIN, AUDITOR | | DID-based attribution | `agent-governance-python/agent-mesh/src/agentmesh/governance/audit.py` | `agent_did` field per entry | @@ -445,7 +440,7 @@ platforms. |-----------|------|--------------------| | Content quality evaluator | `agent-governance-python/agent-os/src/agent_os/content_governance.py:78` | `ContentQualityEvaluator` | | Plugin quality assessor | `agent-governance-python/agent-marketplace/src/agent_marketplace/quality_assessment.py:120` | `QualityAssessor` | -| Red team dataset | `agent-governance-python/agent-os/modules/control-plane/benchmark/red_team_dataset.py` | Red-team benchmark data | +| Red team dataset | `agent-governance-python/agent-os/modules/control-plane/benchmarks/red_team_dataset.py` | Red-team benchmark data | | Policy benchmark suite | `agent-governance-python/agent-os/benchmarks/bench_policy.py` | 30-scenario OWASP benchmark | | CMVK verification | `agent-governance-python/agent-os/modules/cmvk/src/cmvk/constitutional.py` | Cross-Model Verification Kernel | diff --git a/docs/i18n/README.ja.md b/docs/i18n/README.ja.md index 845e8a015..f35bfee49 100644 --- a/docs/i18n/README.ja.md +++ b/docs/i18n/README.ja.md @@ -120,8 +120,8 @@ pip install agentmesh-lightning # 強化学習トレーニングガバナ - [フレームワーククイックスタート](../../examples/quickstart/) | [統合提案](../../docs/proposals/) - **完全な OWASP カバレッジ**: Agentic Top 10 リスクの 10/10 を対応済み、各 ASI カテゴリに専用のコントロールを提供 - [OWASP コンプライアンス](../../docs/compliance/owasp-agentic-top10-architecture.md) | [競合比較](../../docs/COMPARISON.md) -- **CI/CD 向け GitHub Actions**: PR ワークフローのための自動セキュリティスキャンとガバナンスアテステーション - - [セキュリティスキャン Action](../../action/security-scan/) | [ガバナンスアテステーション Action](../../action/governance-attestation/) +- **CI/CD 向け GitHub Actions**: Agent Governance Verify による CI/CD でのガバナンス検証 + - [Agent Governance Verify Action](../../action/) ### 💬 **フィードバックをお待ちしています!** diff --git a/docs/i18n/README.zh-CN.md b/docs/i18n/README.zh-CN.md index 016abfabf..a34d4c759 100644 --- a/docs/i18n/README.zh-CN.md +++ b/docs/i18n/README.zh-CN.md @@ -102,8 +102,8 @@ pip install agentmesh-lightning # 强化学习训练治理 - [框架快速入门](../../examples/quickstart/) | [集成方案](../../docs/proposals/) - **完整 OWASP 覆盖**: 针对 Agentic Top 10 风险实现 10/10 覆盖,每个 ASI 类别均有专属控制措施 - [OWASP 合规](../../docs/compliance/owasp-agentic-top10-architecture.md) | [竞品对比](../../docs/COMPARISON.md) -- **GitHub Actions 支持 CI/CD**: 为 PR 工作流提供自动化安全扫描与治理证明 - - [安全扫描 Action](../../action/security-scan/) | [治理证明 Action](../../action/governance-attestation/) +- **GitHub Actions 支持 CI/CD**: 通过 Agent Governance Verify 在 CI/CD 中执行治理验证 + - [Agent Governance Verify Action](../../action/) ### 💬 **我们期待你的反馈!** diff --git a/docs/i18n/README.zh-TW.md b/docs/i18n/README.zh-TW.md index 60e38a268..9fcd5fb21 100644 --- a/docs/i18n/README.zh-TW.md +++ b/docs/i18n/README.zh-TW.md @@ -102,8 +102,8 @@ pip install agentmesh-lightning # 強化學習訓練治理 - [框架快速入門](../../examples/quickstart/) | [整合方案](../../docs/proposals/) - **完整 OWASP 覆蓋**:針對 Agentic Top 10 風險實現 10/10 覆蓋,每個 ASI 類別均有專屬控制措施 - [OWASP 合規](../../docs/compliance/owasp-agentic-top10-architecture.md) | [競品比較](../../docs/COMPARISON.md) -- **GitHub Actions 支援 CI/CD**:為 PR 工作流提供自動化安全掃描與治理證明 - - [安全掃描 Action](../../action/security-scan/) | [治理證明 Action](../../action/governance-attestation/) +- **GitHub Actions 支援 CI/CD**:透過 Agent Governance Verify 在 CI/CD 中執行治理驗證 + - [Agent Governance Verify Action](../../action/) ### 💬 **我們期待您的意見回饋!** diff --git a/docs/packages/agent-hypervisor.md b/docs/packages/agent-hypervisor.md index 029970e21..0af844383 100644 --- a/docs/packages/agent-hypervisor.md +++ b/docs/packages/agent-hypervisor.md @@ -1,73 +1,46 @@
-# Agent Hypervisor — Public Preview +# Agent Hypervisor Public Preview -**Execution supervisor for AI agents — runtime isolation, execution rings, and governance for autonomous agents** +**Runtime supervisor for AI agents with execution rings, isolated sessions, saga compensation, tamper-evident audit trails, and safety controls.** -*Just as a supervisor isolates processes, Agent Hypervisor isolates AI agent sessions
and enforces governance boundaries with a kill switch, blast radius containment, and accountability.* +*Just as an OS supervisor isolates processes, Agent Hypervisor isolates AI agent sessions and enforces governance boundaries with execution rings, a kill switch, and blast-radius containment.* [![CI](https://github.com/microsoft/agent-governance-toolkit/actions/workflows/ci.yml/badge.svg)](https://github.com/microsoft/agent-governance-toolkit/actions/workflows/ci.yml) [![License](https://img.shields.io/badge/license-MIT-blue.svg)](../../LICENSE) [![Python](https://img.shields.io/badge/python-3.11+-blue.svg)](https://python.org) -[![PyPI](https://img.shields.io/pypi/v/agent-hypervisor)](https://pypi.org/project/agent-governance-python/agent-hypervisor/) -[![Benchmark](https://img.shields.io/badge/latency-268%CE%BCs%20pipeline-orange)](agent-governance-python/benchmarks/) +[![Benchmarks](https://img.shields.io/badge/benchmarks-available-blue)](https://github.com/microsoft/agent-governance-toolkit/tree/main/agent-governance-python/agent-hypervisor/benchmarks) [![Discussions](https://img.shields.io/github/discussions/microsoft/agent-governance-toolkit)](https://github.com/microsoft/agent-governance-toolkit/discussions) > [!IMPORTANT] -> **Public Preview** — The `agent-hypervisor` package on PyPI is a public preview release. APIs may change before GA. +> `agent-hypervisor` is deprecated as a standalone PyPI package. For new work, install `agent-governance-toolkit-core` or the full toolkit. The source in this directory remains tested and documents the runtime features that are implemented here. -> ⭐ **If this project helps you, please star it!** It helps others discover Agent Hypervisor. - -> 📦 **Install the full stack:** `pip install agent-governance-toolkit[full]` — [PyPI](https://pypi.org/project/ai-agent-governance/) | [GitHub](https://github.com/microsoft/agent-governance-toolkit) - -[Quick Start](#quick-start) • [Configuration](#configuration) • [Why a Hypervisor?](#-why-agent-hypervisor) • [Features](#key-features) • [Architecture](#architecture-diagrams) • [Performance](#performance) • [Ecosystem](#ecosystem) +[Quick start](#quick-start) | [Why a hypervisor](#why-agent-hypervisor) | [Configuration](#configuration) | [Architecture](#architecture) | [Key features](#key-features) | [REST API](#rest-api) | [Ecosystem](#ecosystem)
--- -### Integrated Into Major AI Frameworks - -

- Dify - LlamaIndex - Awesome Copilot - Agent-Lightning - awesome-opentelemetry -

- -## 📊 By The Numbers - - - - - - - - -

644+

Tests Passing

4

Execution Rings
(Ring 0–3)

268μs

Full Governance
Pipeline Latency

v2.0

Saga Compensation
Kill Switch · Rate Limits
- -## 💡 Why Agent Hypervisor? +## Why Agent Hypervisor > **The problem:** AI agents run with unlimited resources, no isolation, and no kill switch. A single rogue agent in a shared session can escalate privileges, corrupt state, or cascade failures across your entire system. -> **Our solution:** A hypervisor that enforces execution rings, resource limits, saga compensation, and runtime governance — giving you a kill switch, blast radius containment, and joint liability for agent accountability. +> **The approach:** A hypervisor that enforces execution rings, resource limits, saga compensation, and runtime governance, giving you a kill switch and blast-radius containment. ### How It Maps to What You Already Know | OS / VM Hypervisor | Agent Hypervisor | Why It Matters | |-------------------|-----------------|----------------| -| CPU rings (Ring 0–3) | **Execution Rings** — privilege levels based on trust score | Graduated access, not binary allow/deny | -| Process isolation | **Session isolation** — VFS namespacing, DID-bound identity | Rogue agents can't corrupt other sessions | -| Memory protection | **Liability protection** — bonded reputation, collateral slash | Sponsors have skin in the game | -| System calls | **Saga transactions** — multi-step ops with automatic rollback | Failed workflows undo themselves | -| Watchdog timer | **Kill switch** — graceful termination with step handoff | Stop runaway agents without data loss | -| Audit logs | **Hash-chained delta trail** — tamper-evident forensic trail | Prove exactly what happened | +| CPU rings (Ring 0-3) | **Execution Rings**, privilege levels based on trust score | Graduated access, not binary allow/deny | +| Process isolation | **Session isolation** with VFS namespacing and DID-bound identity | Rogue agents cannot corrupt other sessions | +| System calls | **Saga transactions**, multi-step ops with automatic rollback | Failed workflows undo themselves | +| Watchdog timer | **Kill switch** with graceful termination and step handoff | Stop runaway agents without data loss | +| Audit logs | **Hash-chained delta trail**, tamper-evident forensic record | Prove exactly what happened | -## Quick Start +## Quick start ```bash -pip install agent-hypervisor +pip install agent-governance-toolkit-core ``` ```python @@ -81,13 +54,13 @@ session = await hv.create_session( creator_did="did:mesh:admin", ) -# Agent joins — ring assigned automatically by trust score +# Agent joins, ring assigned automatically by trust score ring = await hv.join_session( session.sso.session_id, "did:mesh:agent-1", sigma_raw=0.85, ) -# → RING_2_STANDARD (trusted agent) +# RING_2_STANDARD (trusted agent) # Activate and run a governed saga await hv.activate_session(session.sso.session_id) @@ -101,13 +74,13 @@ result = await session.saga.execute_step( saga.saga_id, step.step_id, executor=draft_email, ) -# Terminate — returns tamper-evident audit hash +# Terminate, returns tamper-evident audit hash hash_root = await hv.terminate_session(session.sso.session_id) ``` ## Configuration -This section covers how to configure agents, sessions, sagas, security, and rate limiting. +This section covers how to configure agents, sessions, sagas, safety controls, and rate limiting. ### Agent Configuration @@ -116,60 +89,64 @@ Agents join sessions and are assigned an **Execution Ring** based on their trust ```python from hypervisor import Hypervisor, SessionConfig, ConsistencyMode, ExecutionRing -# Initialize with optional liability cap and retention policy +# Basic initialization. Optional adapters resolve trust scores, +# behavioral verification, and capability manifests. hv = Hypervisor( - max_exposure=1000.0, # Max total liability per voucher - retention_policy=None, # Ephemeral GC rules (default: keep all) + nexus=None, # External trust scoring backend + policy_check=None, # Behavioral verification adapter + iatp=None, # Capability manifest parser ) # Create a session with resource limits session = await hv.create_session( config=SessionConfig( consistency_mode=ConsistencyMode.EVENTUAL, # or STRONG - max_participants=10, # 1–1000 - max_duration_seconds=3600, # 1–604,800 (7 days max) + max_participants=10, # 1-1000 + max_duration_seconds=3600, # 1-604,800 (7 days max) min_eff_score=0.60, # Minimum trust score to join enable_audit=True, # Hash-chained audit trail - enable_blockchain_commitment=False, ), creator_did="did:mesh:admin", ) -# Agent joins — ring assigned by trust score +# Agent joins, ring assigned by trust score ring = await hv.join_session( session.sso.session_id, "did:mesh:agent-1", - sigma_raw=0.85, # Raw trust score [0.0–1.0] + sigma_raw=0.85, # Raw trust score [0.0-1.0] ) # Ring assignment thresholds: -# eff_score > 0.95 + consensus → RING_1_PRIVILEGED -# eff_score > 0.60 → RING_2_STANDARD -# otherwise → RING_3_SANDBOX (default) +# eff_score > 0.95 + consensus -> RING_1_PRIVILEGED +# eff_score > 0.60 -> RING_2_STANDARD +# otherwise -> RING_3_SANDBOX (default) ``` ### Temporary Ring Elevation (Sudo) -Agents can request temporary privilege escalation with a TTL: - -> **Note:** Ring elevation is available in the Enterprise Edition. Public Preview includes the API surface but returns a denial response. See the architecture for how it works. +Agents can request temporary privilege escalation with a TTL. Elevation is granted only if the agent's trust score meets the target ring's threshold; Ring 1 additionally requires an attestation string, and Ring 0 is never granted through the standard API. ```python -from hypervisor import RingElevationManager +from hypervisor import ExecutionRing, RingElevationManager elevation_mgr = RingElevationManager() -# Grant temporary Ring 1 access (max 3600s, default 300s) -elevation = elevation_mgr.elevate( +# Request temporary Ring 1 access (TTL default 300s, capped at 3600s) +elevation = elevation_mgr.request_elevation( agent_did="did:mesh:agent-1", session_id=session.sso.session_id, + current_ring=ExecutionRing.RING_2_STANDARD, target_ring=ExecutionRing.RING_1_PRIVILEGED, - ttl_seconds=300, # Auto-expires after 5 minutes + ttl_seconds=300, # Auto-expires after 5 minutes + attestation="signed-by-sre", # Required for Ring 1 reason="deploy-approval", - attestation="signed-by-sre", # Optional proof + trust_score=0.96, # Or supply a trust_provider to the manager ) # Revoke early if needed -elevation_mgr.revoke(elevation.elevation_id) +elevation_mgr.revoke_elevation(elevation.elevation_id) + +# Expire elapsed elevations (call periodically) +elevation_mgr.tick() ``` ### Session Configuration @@ -185,64 +162,24 @@ config = SessionConfig( max_duration_seconds=7200, # 2-hour session min_eff_score=0.70, # Higher trust threshold enable_audit=True, - enable_blockchain_commitment=True, ) session = await hv.create_session(config=config, creator_did="did:mesh:admin") await hv.activate_session(session.sso.session_id) -# Session lifecycle: CREATED → HANDSHAKING → ACTIVE → TERMINATING → ARCHIVED +# Session lifecycle: CREATED -> HANDSHAKING -> ACTIVE -> TERMINATING -> ARCHIVED ``` ### Saga Configuration -Define multi-step transactions with compensation using the DSL parser or programmatically: +Define multi-step transactions with compensation programmatically: ```python -from hypervisor import SagaDSLParser, SagaOrchestrator, FanOutPolicy - -# Option 1: Define saga as a dict (or load from YAML) -definition = { - "name": "deploy-pipeline", - "session_id": "ss-a1b2c3d4", - "steps": [ - { - "id": "provision", - "action_id": "provision-vm", - "agent": "did:mesh:agent-1", - "execute_api": "/infra/provision", - "undo_api": "/infra/deprovision", # Compensation endpoint - "timeout": 120, # Seconds (default: 300) - "retries": 2, # Retry count (default: 0) - }, - { - "id": "deploy", - "action_id": "deploy-app", - "agent": "did:mesh:agent-2", - "execute_api": "/app/deploy", - "undo_api": "/app/undeploy", - "timeout": 60, - }, - ], - "fan_outs": [ - { - "policy": "all_must_succeed", # or majority_must_succeed, any_must_succeed - "branch_step_ids": ["provision", "deploy"], - }, - ], -} - -parser = SagaDSLParser() -errors = parser.validate(definition) # Returns [] if valid -saga_def = parser.parse(definition) -steps = parser.to_saga_steps(saga_def) - -# Option 2: Build programmatically saga = session.saga.create_saga(session.sso.session_id) step = session.saga.add_step( saga.saga_id, "draft_email", "did:mesh:agent-1", execute_api="/api/draft", - undo_api="/api/undo-draft", + undo_api="/api/undo-draft", # Compensation endpoint timeout_seconds=30, max_retries=2, ) @@ -258,6 +195,7 @@ The kill switch provides graceful agent termination with saga step handoff: ```python from hypervisor import KillSwitch +from hypervisor.security.kill_switch import KillReason kill_switch = KillSwitch() @@ -265,19 +203,19 @@ kill_switch = KillSwitch() result = kill_switch.kill( agent_did="did:mesh:rogue-agent", session_id=session.sso.session_id, - reason="ring_breach", # behavioral_drift | rate_limit | ring_breach | manual + reason=KillReason.RING_BREACH, ) -# result.handoffs — list of in-flight saga steps handed to substitute agents -# result.compensation_triggered — True if active sagas were compensated +# result.handoffs, list of in-flight saga steps handed to substitute agents +# result.compensation_triggered, True if active sagas were compensated ``` Kill reasons: -- `behavioral_drift` — Agent behavior diverges from expected patterns -- `rate_limit` — Agent exceeded rate limit thresholds -- `ring_breach` — Agent attempted unauthorized ring access -- `manual` — Operator-initiated termination -- `quarantine_timeout` — Quarantine period expired without resolution -- `session_timeout` — Session max duration exceeded +- `behavioral_drift`, agent behavior diverges from expected patterns +- `rate_limit`, agent exceeded rate limit thresholds +- `ring_breach`, agent attempted unauthorized ring access +- `manual`, operator-initiated termination +- `quarantine_timeout`, quarantine period expired without resolution +- `session_timeout`, session max duration exceeded ### Rate Limiting @@ -285,7 +223,7 @@ Per-ring token bucket rate limiting is applied automatically: ```python from hypervisor import AgentRateLimiter -from hypervisor.rings import ExecutionRing +from hypervisor.models import ExecutionRing limiter = AgentRateLimiter() @@ -296,7 +234,6 @@ limiter = AgentRateLimiter() # Ring 3 (Sandbox): 5.0 rate, 10.0 capacity # Custom rate limits per ring -from hypervisor.security.rate_limiter import DEFAULT_RING_LIMITS custom_limits = { ExecutionRing.RING_0_ROOT: (200.0, 400.0), ExecutionRing.RING_1_PRIVILEGED: (100.0, 200.0), @@ -317,62 +254,13 @@ detector = RingBreachDetector() # Breach events include: # severity: NONE | LOW | MEDIUM | HIGH | CRITICAL -# anomaly_score: float — how far the behavior deviates -# actual_rate vs expected_rate — call frequency anomaly -# call_count_window — calls in the detection window +# anomaly_score: float, how far the behavior deviates +# actual_rate vs expected_rate, call frequency anomaly +# call_count_window, calls in the detection window # Breach detection triggers automatic demotion or kill switch ``` -### YAML Configuration - -You can define sagas and load them from YAML files: - -```yaml -# saga-deploy.yaml -name: deploy-pipeline -session_id: ss-a1b2c3d4 -steps: - - id: provision - action_id: provision-vm - agent: "did:mesh:agent-1" - execute_api: /infra/provision - undo_api: /infra/deprovision - timeout: 120 - retries: 2 - - - id: deploy - action_id: deploy-app - agent: "did:mesh:agent-2" - execute_api: /app/deploy - undo_api: /app/undeploy - timeout: 60 - retries: 1 - -fan_outs: - - policy: all_must_succeed - branch_step_ids: - - provision - - deploy - -metadata: - environment: production - owner: platform-team -``` - -```python -import yaml -from hypervisor import SagaDSLParser - -with open("saga-deploy.yaml") as f: - definition = yaml.safe_load(f) - -parser = SagaDSLParser() -errors = parser.validate(definition) -if not errors: - saga_def = parser.parse(definition) -``` - ### Docker Compose For production deployments with Redis-backed state: @@ -401,57 +289,49 @@ services: | Parameter | Type | Default | Description | |-----------|------|---------|-------------| | **Hypervisor** | | | | -| `max_exposure` | `float` | `None` | Maximum total liability per voucher | -| `retention_policy` | `RetentionPolicy` | `None` | Ephemeral GC rules for audit data | | `nexus` | adapter | `None` | External trust scoring backend | | `policy_check` | adapter | `None` | Behavioral verification adapter | | `iatp` | adapter | `None` | Capability manifest parser | | **SessionConfig** | | | | | `consistency_mode` | `ConsistencyMode` | `EVENTUAL` | `STRONG` (consensus) or `EVENTUAL` (gossip) | -| `max_participants` | `int` | `10` | Max agents per session (1–1,000) | -| `max_duration_seconds` | `int` | `3600` | Session timeout (1–604,800) | -| `min_eff_score` | `float` | `0.60` | Minimum trust score to join (0.0–1.0) | +| `max_participants` | `int` | `10` | Max agents per session (1-1,000) | +| `max_duration_seconds` | `int` | `3600` | Session timeout (1-604,800) | +| `min_eff_score` | `float` | `0.60` | Minimum trust score to join (0.0-1.0) | | `enable_audit` | `bool` | `True` | Enable hash-chained audit trail | -| `enable_blockchain_commitment` | `bool` | `False` | Commit audit hashes to blockchain | | **Execution Rings** | | | | -| `RING_0_ROOT` | `int` | `0` | Hypervisor config & penalty (SRE Witness required) | +| `RING_0_ROOT` | `int` | `0` | Hypervisor config and penalty (SRE Witness required) | | `RING_1_PRIVILEGED` | `int` | `1` | Non-reversible actions (eff_score > 0.95 + consensus) | | `RING_2_STANDARD` | `int` | `2` | Reversible actions (eff_score > 0.60) | | `RING_3_SANDBOX` | `int` | `3` | Read-only / research (default) | | **Ring Elevation** | | | | | `ttl_seconds` | `int` | `300` | Elevation duration (max 3,600s) | | `reason` | `str` | `""` | Justification for elevation | -| `attestation` | `str` | `None` | Signed proof from authorizer | +| `attestation` | `str` | `None` | Signed proof, required for Ring 1 | | **Saga Steps** | | | | -| `timeout` | `int` | `300` | Step timeout in seconds | -| `retries` | `int` | `0` | Max retry attempts | -| `execute_api` | `str` | — | Endpoint for step execution | +| `timeout_seconds` | `int` | `300` | Step timeout in seconds | +| `max_retries` | `int` | `0` | Max retry attempts | +| `execute_api` | `str` | required | Endpoint for step execution | | `undo_api` | `str` | `None` | Endpoint for compensation | -| `checkpoint_goal` | `str` | `None` | Checkpoint description for replay | -| **Fan-Out Policy** | | | | -| `ALL_MUST_SUCCEED` | — | ✓ | All branches must complete | -| `MAJORITY_MUST_SUCCEED` | — | — | >50% of branches must complete | -| `ANY_MUST_SUCCEED` | — | — | At least one branch must complete | | **Rate Limits** (tokens/sec, burst) | | | | | Ring 0 (Root) | `(float, float)` | `(100.0, 200.0)` | Highest throughput for admin ops | | Ring 1 (Privileged) | `(float, float)` | `(50.0, 100.0)` | High throughput for trusted agents | | Ring 2 (Standard) | `(float, float)` | `(20.0, 40.0)` | Moderate throughput | | Ring 3 (Sandbox) | `(float, float)` | `(5.0, 10.0)` | Restricted throughput | | **Kill Switch** | | | | -| `reason` | `KillReason` | — | `behavioral_drift`, `rate_limit`, `ring_breach`, `manual`, `quarantine_timeout`, `session_timeout` | +| `reason` | `KillReason` | required | `behavioral_drift`, `rate_limit`, `ring_breach`, `manual`, `quarantine_timeout`, `session_timeout` | | **Breach Detection** | | | | -| `severity` | `BreachSeverity` | — | `NONE`, `LOW`, `MEDIUM`, `HIGH`, `CRITICAL` | +| `severity` | `BreachSeverity` | | `NONE`, `LOW`, `MEDIUM`, `HIGH`, `CRITICAL` | -## Architecture Diagrams +## Architecture ### Execution Ring Hierarchy ```mermaid graph TD - R0["🔴 Ring 0 — Root
Hypervisor config & penalty
Requires SRE Witness"] - R1["🟠 Ring 1 — Privileged
Non-reversible actions
eff_score > 0.95 + consensus"] - R2["🟡 Ring 2 — Standard
Reversible actions
eff_score > 0.60"] - R3["🟢 Ring 3 — Sandbox
Read-only / research
Default for unknown agents"] + R0["🔴 Ring 0, Root
Hypervisor config and penalty
Requires SRE Witness"] + R1["🟠 Ring 1, Privileged
Non-reversible actions
eff_score > 0.95 + consensus"] + R2["🟡 Ring 2, Standard
Reversible actions
eff_score > 0.60"] + R3["🟢 Ring 3, Sandbox
Read-only / research
Default for unknown agents"] R0 -->|"supervises"| R1 R1 -->|"supervises"| R2 @@ -489,51 +369,7 @@ flowchart LR Success -- No --> Compensate["Compensate\n(reverse order)"] Compensate --> CompOk{"Compensation\nsucceeds?"} CompOk -- Yes --> Rolled["↩️ Saga Rolled Back"] - CompOk -- No --> Escalate["⚠️ Escalate\nLiability Penalty"] -``` - -### Joint Liability Vouch Chain - -```mermaid -flowchart TD - Sponsor["🛡️ Sponsor Agent
eff_score: 0.92
Bonds reputation"] - Sponsored["🤖 Sponsored Agent
eff_score: 0.45
Gains Ring 2 access"] - Action["Agent performs action"] - Check{"Intent\nviolation?"} - Safe["✅ No penalty"] - Penalty["🔻 Both penalized
Sponsor collateral slashed
Sponsored demoted"] - - Sponsor -->|"vouches for"| Sponsored - Sponsored --> Action - Action --> Check - Check -- No --> Safe - Check -- Yes --> Penalty - Penalty -->|"collateral slash"| Sponsor - Penalty -->|"demotion + quarantine"| Sponsored -``` - -### Slash Cascade Propagation - -```mermaid -flowchart TD - Violation["🚨 Violation Detected"] - Attr["Fault Attribution
Identify responsible agent"] - Primary["Primary Agent
Full penalty applied"] - Sponsor1["Sponsor A
Collateral slashed"] - Sponsor2["Sponsor B
Collateral slashed"] - Quarantine["Quarantine Agent
Before termination"] - Demote["Demote to Ring 3"] - Ledger["Record in
Liability Ledger"] - - Violation --> Attr - Attr --> Primary - Primary --> Sponsor1 - Primary --> Sponsor2 - Primary --> Quarantine - Quarantine --> Demote - Sponsor1 --> Ledger - Sponsor2 --> Ledger - Primary --> Ledger + CompOk -- No --> Escalate["⚠️ Saga Failed\n(compensation error)"] ``` ## Key Features @@ -543,7 +379,7 @@ flowchart TD ### 🔐 Execution Rings -Hardware-inspired privilege model (Ring 0–3). Agents earn ring access based on trust score. Real-time demotion on trust drops. Sudo elevation with TTL. Breach detection with circuit breakers. +Hardware-inspired privilege model (Ring 0-3). Agents earn ring access based on trust score. Real-time demotion on trust drops. Sudo elevation with TTL. Breach detection with circuit breakers. @@ -557,27 +393,27 @@ Graceful termination with saga step handoff to substitute agents. Rate limiting ### 🔄 Saga Compensation -Multi-step transactions with timeout enforcement, retry with backoff, reverse-order compensation, and escalation to liability. Parallel execution with ALL/MAJORITY/ANY policies. +Multi-step transactions with timeout enforcement, retry with backoff, and reverse-order compensation of committed steps on failure. -### 🤝 Joint Liability -High-trust agents sponsor low-trust agents by bonding reputation. If the sponsored agent violates intent, **both are penalized**. Fault attribution, quarantine-before-terminate, persistent ledger. +### 📋 Hash-Chained Audit +Forensic-grade delta trails. Semantic diffs, hash-chained entries, and a summary commitment (root hash) returned at session end. -### 📋 Hash-Chained Audit -Forensic-grade delta trails — semantic diffs, hash-chained entries, summary commitment at session end. Garbage collection preserves forensic artifacts. +### 📡 Observability +Structured event bus emits typed events for every action. Causal trace IDs with full delegation-tree encoding. Version counters for causal consistency. **Prometheus metrics collector** for ring transitions and breaches. **OpenTelemetry span exporter** for saga-to-span mapping with distributed trace context. -### 📡 Observability -Structured event bus emits typed events for every action. Causal trace IDs with full delegation tree encoding. Version counters for causal consistency. **Prometheus metrics collector** for ring transitions and breaches. **OpenTelemetry span exporter** for saga-to-span mapping with distributed trace context. +### 🧩 Session Isolation +Shared Session Object with a per-session virtual file system, snapshots, and vector-clock causal ordering. DID-bound identity keeps rogue agents from corrupting other sessions. @@ -586,74 +422,61 @@ Structured event bus emits typed events for every action. Causal trace IDs with
📖 Feature details (click to expand) -### 🔐 Execution Rings — Deep Dive +### 🔐 Execution Rings, Deep Dive ``` -Ring 0 (Root) — Hypervisor config & penalty — requires SRE Witness -Ring 1 (Privileged) — Non-reversible actions — requires eff_score > 0.95 + consensus -Ring 2 (Standard) — Reversible actions — requires eff_score > 0.60 -Ring 3 (Sandbox) — Read-only / research — default for unknown agents +Ring 0 (Root) Hypervisor config and penalty, requires SRE Witness +Ring 1 (Privileged) Non-reversible actions, requires eff_score > 0.95 + consensus +Ring 2 (Standard) Reversible actions, requires eff_score > 0.60 +Ring 3 (Sandbox) Read-only / research, default for unknown agents ``` -**v2.0 additions:** Dynamic ring elevation (sudo with TTL), ring breach detection with circuit breakers, ring inheritance for spawned agents, **behavioral anomaly detection** with sliding-window rate analysis and ring-distance amplification. +**Ring controls:** Dynamic ring elevation (sudo with TTL), ring breach detection with circuit breakers, ring inheritance for spawned agents, and behavioral anomaly detection with sliding-window rate analysis and ring-distance amplification. + +**Command denylist enforcement:** `RingEnforcer.check_command()` validates subprocess commands against a global `DENIED_COMMANDS` list with case-insensitive matching and shell metacharacter stripping to prevent injection bypasses (curl, wget, shells, compilers, network tools, alternative interpreters). -### 🔄 Saga Orchestrator — Deep Dive +### 🔄 Saga Orchestrator, Deep Dive -- **Timeout enforcement** — steps that hang are automatically cancelled -- **Retry with backoff** — transient failures retry with exponential delay -- **Reverse-order compensation** — on failure, all committed steps are undone -- **Escalation** — if compensation fails, Joint Liability penalty is triggered -- **Parallel execution** — ALL_MUST_SUCCEED / MAJORITY / ANY policies -- **Execution checkpoints** — partial replay without re-running completed effects -- **Declarative DSL** — define sagas via YAML or dict +- **Timeout enforcement**, steps that hang are automatically cancelled +- **Retry with backoff**, transient failures retry with exponential delay +- **Reverse-order compensation**, on failure, all committed steps are undone ### 🔒 Session Consistency -- **Version counters** — causal consistency for shared VFS state -- **Resource locks** — READ/WRITE/EXCLUSIVE with lock timeout -- **Isolation levels** — SNAPSHOT, READ_COMMITTED, SERIALIZABLE per saga +- **Version counters**, causal consistency for shared VFS state +- **Resource locks**, READ/WRITE/EXCLUSIVE with lock timeout +- **Isolation levels**, SNAPSHOT, READ_COMMITTED, SERIALIZABLE per saga
-## Performance - -| Operation | Mean Latency | Throughput | -|-----------|-------------|------------| -| Ring computation | **0.3μs** | 3.75M ops/s | -| Delta audit capture | **27μs** | 26K ops/s | -| Session lifecycle | **54μs** | 15.7K ops/s | -| 3-step saga | **151μs** | 5.3K ops/s | -| **Full governance pipeline** | **268μs** | **2,983 ops/s** | +## Benchmarks -> Full pipeline = session create + agent join + 3 audit deltas + saga step + terminate with audit log root - -## Installation +Microbenchmarks for ring computation, delta-audit capture, session lifecycle, and saga execution live in the [`benchmarks/`](https://github.com/microsoft/agent-governance-toolkit/tree/main/agent-governance-python/agent-hypervisor/benchmarks) directory. ```bash -pip install agent-hypervisor +cd agent-governance-python/agent-hypervisor +python benchmarks/bench_hypervisor.py ``` ## Modules -| Module | Description | Tests | -|--------|-------------|-------| -| `hypervisor.session` | Shared Session Object lifecycle + VFS | 52 | -| `hypervisor.rings` | 4-ring privilege + elevation + breach detection | 34 | -| `hypervisor.liability` | Sponsorship, penalty, attribution, quarantine, ledger | 39 | -| `hypervisor.reversibility` | Execute/Undo API registry | 4 | -| `hypervisor.saga` | Saga orchestrator + fan-out + checkpoints + DSL | 41 | -| `hypervisor.audit` | Delta engine, audit log, GC, commitment | 10 | -| `hypervisor.verification` | DID transaction history verification | 4 | -| `hypervisor.observability` | Event bus, causal trace IDs | 22 | -| `hypervisor.security` | Rate limiter, kill switch | 16 | -| `hypervisor.integrations` | Nexus, Verification, IATP cross-module adapters | -- | -| **Integration** | End-to-end lifecycle, edge cases, security | **24** | -| **Scenarios** | Cross-module governance pipelines (7 suites) | **18** | -| **Total** | | **644** | +| Module | Description | +|--------|-------------| +| `hypervisor.session` | Shared Session Object lifecycle and VFS | +| `hypervisor.rings` | 4-ring privilege, elevation, and breach detection | +| `hypervisor.reversibility` | Execute/Undo API registry | +| `hypervisor.saga` | Saga orchestrator and compensation | +| `hypervisor.audit` | Delta engine and hash-chained audit trail | +| `hypervisor.verification` | DID transaction history verification | +| `hypervisor.observability` | Event bus, causal trace IDs, metrics | +| `hypervisor.security` | Rate limiter and kill switch | +| `hypervisor.integrations` | Nexus, Verification, IATP cross-module adapters | ## Test Suite ```bash +cd agent-governance-python/agent-hypervisor + # Run all tests pytest tests/ -v @@ -661,40 +484,48 @@ pytest tests/ -v pytest tests/integration/ -v # Run benchmarks -python agent-governance-python/benchmarks/bench_hypervisor.py +python benchmarks/bench_hypervisor.py ``` ## Cross-Module Integrations The Hypervisor supports optional integration with external trust scoring, behavioral verification, and capability manifest systems via adapters in `hypervisor.integrations`. See the adapter modules for usage examples. -### REST API +## REST API -Full FastAPI REST API with 22 endpoints and interactive Swagger docs: +Run the FastAPI server and open the interactive Swagger docs: ```bash -pip install agent-hypervisor[api] uvicorn hypervisor.api.server:app # Open http://localhost:8000/docs for Swagger UI ``` -Endpoints: Sessions, Rings, Sagas, Liability, Events, Health. +Implemented endpoint groups: + +| Group | Endpoints | +|-------|-----------| +| Health | `GET /health`, `GET /api/v1/stats` | +| Sessions | create, list, inspect, join, activate, terminate | +| Rings | session distribution, agent ring lookup, access check | +| Sagas | create, list, inspect, add step, execute step | +| Events | query events and event statistics | +| Verification | verify history and clear verification cache | -### Visualization Dashboard +## Visualization Dashboard -Interactive Streamlit dashboard with 5 tabs: +Interactive Streamlit dashboard: ```bash -cd examples/dashboard +cd agent-governance-python/agent-hypervisor/examples/dashboard pip install -r requirements.txt streamlit run app.py ``` -Tabs: Session Overview | Execution Rings | Saga Orchestration | Liability & Trust | Event Stream +Tabs: Session Overview | Execution Rings | Saga Orchestration | Event Stream ## Ecosystem -Agent Hypervisor is part of the **Agent Governance Ecosystem** — four specialized repos that work together: +Agent Hypervisor is part of the **Agent Governance Ecosystem**, specialized components that work together: ```mermaid graph TB @@ -713,21 +544,21 @@ graph TB style HV fill:#ff6b6b,stroke:#333,color:#fff ``` -| Repo | Role | Stars | -|------|------|-------| -| [Agent OS](https://github.com/microsoft/agent-governance-toolkit) | Policy enforcement kernel | 1,500+ tests | -| [Agent Mesh](https://github.com/microsoft/agent-governance-toolkit) | Cryptographic trust network | 1,400+ tests | -| [Agent SRE](https://github.com/microsoft/agent-governance-toolkit) | SLO, chaos, cost guardrails | 1,070+ tests | -| **Agent Hypervisor** | Session isolation & governance runtime | 644+ tests | +| Component | Role | +|------|------| +| [Agent OS](https://github.com/microsoft/agent-governance-toolkit) | Policy enforcement kernel | +| [Agent Mesh](https://github.com/microsoft/agent-governance-toolkit) | Cryptographic trust network | +| [Agent SRE](https://github.com/microsoft/agent-governance-toolkit) | SLO, chaos, and cost guardrails | +| **Agent Hypervisor** | Session isolation and governance runtime | -## 🗺️ Roadmap +## Roadmap | Quarter | Milestone | |---------|-----------| -| **Q1 2026** | ✅ v2.0 — Execution rings, saga orchestration, joint liability, shared sessions | +| **Q1 2026** | v2.0 with execution rings, saga orchestration, and shared sessions | | **Q2 2026** | Distributed hypervisor (multi-node), WebSocket real-time dashboard, Redis-backed sessions | | **Q3 2026** | Kubernetes operator for auto-scaling ring policies, CNCF Sandbox application | -| **Q4 2026** | v3.0 — Federated hypervisor mesh, cross-org agent governance, SOC2 attestation | +| **Q4 2026** | v3.0 with federated hypervisor mesh, cross-org agent governance, and SOC2 attestation | --- @@ -737,13 +568,10 @@ graph TB Just as OS hypervisors isolate virtual machines and enforce resource boundaries, an agent hypervisor isolates AI agent sessions and enforces governance boundaries. Without isolation, a misbehaving agent in a shared session can corrupt state, escalate privileges, or cascade failures across the entire system. **How do Execution Rings differ from traditional access control?** -Traditional access control is static and binary (allowed/denied). Execution Rings are dynamic and graduated -- agents earn ring privileges based on their trust score, can request temporary elevation with TTL (like `sudo`), and are automatically demoted when trust drops. Ring breach detection catches anomalous behavior before damage occurs. +Traditional access control is static and binary (allowed/denied). Execution Rings are dynamic and graduated. Agents earn ring privileges based on their trust score, can request temporary elevation with TTL (like `sudo`), and are automatically demoted when trust drops. Ring breach detection catches anomalous behavior before damage occurs. **What happens when a multi-agent saga fails?** -The Saga Orchestrator triggers reverse-order compensation for all committed steps. For parallel execution sagas, the failure policy determines the response: ALL_MUST_SUCCEED compensates if any branch fails, MAJORITY allows minority failures, and ANY succeeds if at least one branch completes. Execution checkpoints enable partial replay without re-running completed effects. - -**How does fault attribution work?** -When a saga fails, the hypervisor identifies the agent responsible for the failure and triggers appropriate liability consequences. +The Saga Orchestrator triggers reverse-order compensation for all committed steps. Each step defines an `undo_api` compensation endpoint, and steps that time out are cancelled and retried up to `max_retries` before compensation runs. ## Contributing @@ -756,7 +584,7 @@ We welcome contributions! Please see our [Contributing Guide](../../CONTRIBUTING ## License -MIT -- see [LICENSE](LICENSE). +MIT, see [LICENSE](../../LICENSE). --- @@ -766,6 +594,4 @@ MIT -- see [LICENSE](LICENSE). *Built with :heart: for the AI agent governance community* -If Agent Hypervisor helps your work, please consider giving it a :star: - diff --git a/docs/packages/agent-os.md b/docs/packages/agent-os.md index 18af0003e..082dacad5 100644 --- a/docs/packages/agent-os.md +++ b/docs/packages/agent-os.md @@ -414,7 +414,7 @@ agent-governance-python/agent-os/ | [`observability`](modules/observability/) | 3 | `agent-os-observability` | Prometheus metrics + OpenTelemetry tracing | ⚠️ No tests | | [`nexus`](modules/nexus/) | — | *Not published* | Trust exchange network | 🔬 Prototype | | [`mcp-kernel-server`](modules/mcp-kernel-server/) | Int | `mcp-kernel-server` | MCP server for Claude Desktop | ⚠️ No tests | -| [**`runtime`**](https://github.com/microsoft/agent-governance-toolkit) | **⭐** | `agentmesh-runtime` | **Execution supervisor — Execution Rings, Joint Liability, Saga Orchestrator** ([own repo](https://github.com/microsoft/agent-governance-toolkit)) | **✅ 184 tests** | +| [**`runtime`**](https://github.com/microsoft/agent-governance-toolkit) | **⭐** | `agentmesh-runtime` | **Execution supervisor — Execution Rings, Saga Orchestrator, Delta Audit** ([own repo](https://github.com/microsoft/agent-governance-toolkit)) | **✅ 184 tests** | --- @@ -436,9 +436,9 @@ Just as OS runtimes isolate execution environments and enforce resource boundari │ Ring 3 (Sandbox) ← Default for unknown agents │ │ │ │ ┌──────────┐ ┌───────────┐ ┌────────────────────────┐ │ -│ │ Joint │ │ Semantic │ │ Hash-Chained │ │ -│ │ Liability │ │ Saga │ │ Delta Audit Trail │ │ -│ │ Engine │ │ Orchestr. │ │ (Tamper-Evident) │ │ +│ │ Execution│ │ Saga │ │ Hash-Chained │ │ +│ │ Rings │ │Orchestrator│ │ Delta Audit Trail │ │ +│ │ │ │ │ │ (Tamper-Evident) │ │ │ └──────────┘ └───────────┘ └────────────────────────┘ │ └────────────────────────────────────────────────────────────┘ ``` @@ -448,9 +448,8 @@ Just as OS runtimes isolate execution environments and enforce resource boundari | Feature | Description | Latency | |---------|-------------|---------| | **Execution Rings** | 4-level privilege model (Ring 0–3) based on trust score | **0.3μs** | -| **Joint Liability** | High-trust agents vouch for low-trust agents with bonded reputation | **7μs** | | **Saga Orchestrator** | Multi-step transactions with timeout, retry, and auto-compensation | **151μs** | -| **Delta Audit** | Hash-chained semantic diffs with blockchain commitment | **27μs** | +| **Delta Audit** | Hash-chained semantic diffs | **27μs** | | **Full Pipeline** | Session + join + audit + saga + terminate | **268μs** | ### Quick Start diff --git a/docs/packages/agent-runtime.md b/docs/packages/agent-runtime.md index ea4c94b31..d277d3857 100644 --- a/docs/packages/agent-runtime.md +++ b/docs/packages/agent-runtime.md @@ -33,7 +33,6 @@ session level: - **Shared Sessions** — Multi-agent session management with consistency modes (strict, eventual, causal) - **Saga Orchestration** — Compensating transactions for multi-step agent workflows - **Kill Switch** — Immediate termination with audit trail and blast radius containment -- **Joint Liability** — Attribution tracking across multi-agent collaborations - **Audit Trails** — Hash-chained, append-only execution logs ## Quick Start diff --git a/docs/reference/contributing.md b/docs/reference/contributing.md index 0a7b7694d..0719d22b8 100644 --- a/docs/reference/contributing.md +++ b/docs/reference/contributing.md @@ -245,7 +245,7 @@ This policy is enforced by: **Why this policy exists:** PRs #357 and #362 were auto-merged without maintainer review and reintroduced a command injection vulnerability (`subprocess.run(shell=True)`) that had been fixed for MSRC Case 111178 just days earlier. AI code review agents did not catch the security regression. **What counts as maintainer approval:** -- ✅ A GitHub "Approve" review from a code owner (see [.github/CODEOWNERS](.github/CODEOWNERS)) +- ✅ A GitHub "Approve" review from a code owner (see [.github/CODEOWNERS](../../.github/CODEOWNERS)) - ❌ AI/bot approval (Copilot, Sourcery, etc.) — does not count - ❌ Author self-approval — does not count - ❌ Admin bypass — should not be used for external PRs diff --git a/docs/specs/AGENT-HYPERVISOR-EXECUTION-CONTROL-1.0.md b/docs/specs/AGENT-HYPERVISOR-EXECUTION-CONTROL-1.0.md index 2aca41c74..895515f66 100644 --- a/docs/specs/AGENT-HYPERVISOR-EXECUTION-CONTROL-1.0.md +++ b/docs/specs/AGENT-HYPERVISOR-EXECUTION-CONTROL-1.0.md @@ -6,8 +6,8 @@ > > This specification defines the execution control model for the Agent > Hypervisor, including execution rings, privilege elevation, resource -> constraints, rate limiting, session isolation, kill switch, audit -> integrity, and quarantine. All SDK implementations MUST conform to +> constraints, rate limiting, session isolation, kill switch, and audit +> integrity. All SDK implementations MUST conform to > this specification. The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", @@ -494,7 +494,6 @@ A SessionConfig MUST contain: | `max_duration_seconds` | int | 3600 | [1, 604800] (7 days) | | `min_eff_score` | float | 0.60 | [0.0, 1.0] | | `enable_audit` | bool | true | | -| `enable_blockchain_commitment` | bool | false | | ### 10.3 Session Participant @@ -644,6 +643,10 @@ substitute MUST be unregistered from the kill switch. ## 13. Quarantine +> **Not implemented in this release.** Agent quarantine was a Public Preview stub and has +> been removed from the implementation. This section is retained for historical spec +> reference and does not describe shipped behavior. + ### 13.1 Quarantine Reasons | Reason | Description | @@ -856,7 +859,6 @@ Implementations SHOULD support pluggable providers for: | Provider | Fallback | | --- | --- | | `ring_engine` | `RingEnforcer` | -| `liability` | `LiabilityMatrix` | | `saga_engine` | `SagaOrchestrator` | | `breach_detector` | `RingBreachDetector` | | `session_manager` | (implementation-specific) | diff --git a/docs/specs/AUDIT-COMPLIANCE-1.0.md b/docs/specs/AUDIT-COMPLIANCE-1.0.md index cd86dc912..691117769 100644 --- a/docs/specs/AUDIT-COMPLIANCE-1.0.md +++ b/docs/specs/AUDIT-COMPLIANCE-1.0.md @@ -25,7 +25,7 @@ The specification spans five AGT components: - **Agent OS** -- Core audit logging, governance event processing, and OpenTelemetry integration - **Agent Mesh** -- Merkle-chained audit log, compliance engine, decision BOM reconstruction, and audit collector REST API -- **Agent Hypervisor** -- Event bus, semantic delta engine, and commitment engine +- **Agent Hypervisor** -- Event bus and semantic delta engine - **Agent SRE** -- SRE-specific observability events and OTel conventions - **Agent Lightning** -- Flight recorder emission and RL environment violation tracking @@ -128,7 +128,7 @@ The AGT Audit and Compliance system MUST provide: | | | | | | | - AuditEntry | | - MerkleChain | | - EventBus | | - AuditBackend | | - Compliance | | - DeltaEngine | -| - EventSink SPI | | - DecisionBOM | | - CommitmentEngine | +| - EventSink SPI | | - DecisionBOM | | | | - EventProcessor | | - AuditCollector | | | | - OTel Backend | | | | | +--------+---------+ +--------+---------+ +---------+-----------+ @@ -1132,6 +1132,10 @@ The delta hash MUST be computed as: ## 14. Commitment Engine +> **Not implemented in this release.** The Commitment Engine was a Public Preview stub and +> has been removed from the implementation. This section is retained for historical spec +> reference and does not describe shipped behavior. + ### 14.1 Purpose [Pure Specification] The Commitment Engine produces summary records that anchor a session's delta chain, @@ -1923,7 +1927,6 @@ An implementation at Level 3 MUST satisfy Level 2 AND: - Implement the Compliance Framework Engine (Section 10). - Implement the Decision BOM reconstruction (Section 11). - Implement the Semantic Delta Engine (Section 13). -- Implement the Commitment Engine (Section 14). - Provide the Audit Collector REST API (Section 15). - Support all four compliance frameworks (Section 10.1). diff --git a/docs/tutorials/06-execution-sandboxing.md b/docs/tutorials/06-execution-sandboxing.md index 544a4c6cc..23c37e617 100644 --- a/docs/tutorials/06-execution-sandboxing.md +++ b/docs/tutorials/06-execution-sandboxing.md @@ -1,4 +1,4 @@ -# Tutorial 06 — Execution Sandboxing +# Tutorial 06 Execution Sandboxing > **Package:** `agentmesh-runtime` · **Time:** 30 minutes · **Prerequisites:** Python 3.11+ @@ -36,10 +36,10 @@ See also: [Deployment Guide](../deployment/README.md) | [Agent Runtime README](. AI agents that can read files, call APIs, and execute code need strict boundaries. Without sandboxing, a misbehaving agent can: -- **Exfiltrate data** — read secrets and send them to external endpoints. -- **Corrupt state** — write to databases or files it should never touch. -- **Consume resources** — spin up infinite loops that exhaust CPU and memory. -- **Cascade failures** — a failed step in a multi-agent workflow leaves the system in a broken half-finished state. +- **Exfiltrate data.** Read secrets and send them to external endpoints. +- **Corrupt state.** Write to databases or files it should never touch. +- **Consume resources.** Spin up infinite loops that exhaust CPU and memory. +- **Cascade failures.** A failed step in a multi-agent workflow leaves the system in a broken half-finished state. The **Agent Runtime** (`pip install agentmesh-runtime`) solves this with four layers of defense: @@ -56,7 +56,7 @@ layers of defense: │ Multi-step transactions with auto-rollback │ ├─────────────────────────────────────────────────┤ │ Session Isolation │ -│ VFS namespacing, vector clocks, intent locks │ +│ VFS namespacing, snapshots, vector clocks │ ├─────────────────────────────────────────────────┤ │ Emergency Controls │ │ Kill switch, rate limiting, breach detection │ @@ -79,19 +79,21 @@ Get sandboxing running in under 20 lines: from hypervisor import Hypervisor, ExecutionRing from hypervisor.rings.classifier import ActionClassifier from hypervisor.rings.enforcer import RingEnforcer +from hypervisor.models import ActionDescriptor # 1. Create the runtime hv = Hypervisor() # 2. Classify an action — the classifier maps actions to rings classifier = ActionClassifier() -result = classifier.classify_action_id("file.read") -print(result.ring) # ExecutionRing.RING_3_SANDBOX -print(result.risk_weight) # 0.1 -result = classifier.classify_action_id("deploy.k8s") -print(result.ring) # ExecutionRing.RING_1_PRIVILEGED -print(result.risk_weight) # 0.9 +read = ActionDescriptor(action_id="read_dataset", name="Read Dataset", + execute_api="/data/read", is_read_only=True) +print(classifier.classify(read).ring) # ExecutionRing.RING_3_SANDBOX + +delete = ActionDescriptor(action_id="delete_database", name="Delete Database", + execute_api="/db/drop") +print(classifier.classify(delete).ring) # ExecutionRing.RING_1_PRIVILEGED # 3. Enforce the ring — block agents that lack privilege enforcer = RingEnforcer() @@ -116,8 +118,8 @@ trust is required. ``` ┌───────────────────────┐ │ Ring 0 — Root │ eff_score: N/A (SRE Witness required) - │ Runtime config, │ Penalty/slashing operations - │ penalty ops │ Rate: unlimited + │ Runtime config │ Requires SRE Witness + │ │ Rate: unlimited ├───────────────────────┤ │ Ring 1 — Privileged │ eff_score ≥ 0.95 + consensus │ Non-reversible ops │ Write, deploy, delete @@ -152,7 +154,7 @@ ring = ExecutionRing.from_eff_score(eff_score=0.40) assert ring == ExecutionRing.RING_3_SANDBOX ``` -> **Note:** Ring 0 is never assigned by score alone — it requires an SRE +> **Note:** Ring 0 is never assigned by score alone. It requires an SRE > Witness attestation and is reserved for runtime-level configuration. ### 3.2 Action Classification @@ -161,27 +163,34 @@ Every action is classified by **risk weight** and **reversibility** to determine which ring it requires: ```python -from hypervisor.rings.classifier import ActionClassifier, ClassificationResult -from hypervisor.models import ReversibilityLevel +from hypervisor import ExecutionRing +from hypervisor.rings.classifier import ActionClassifier +from hypervisor.models import ActionDescriptor, ReversibilityLevel classifier = ActionClassifier() -# Read operations → Ring 3 (low risk, fully reversible) -result = classifier.classify_action_id("file.read") -assert result.ring == ExecutionRing.RING_3_SANDBOX -assert result.reversibility == ReversibilityLevel.REVERSIBLE +# Read-only operations → Ring 3 (sandbox) +read = ActionDescriptor(action_id="read_dataset", name="Read Dataset", + execute_api="/data/read", is_read_only=True) +assert classifier.classify(read).ring == ExecutionRing.RING_3_SANDBOX -# Write operations → Ring 2 (medium risk, reversible with effort) -result = classifier.classify_action_id("file.write") +# Reversible writes (with an undo endpoint) → Ring 2 (standard) +write = ActionDescriptor(action_id="write_file", name="Write File", + execute_api="/files/write", undo_api="/files/restore", + reversibility=ReversibilityLevel.FULL) +result = classifier.classify(write) assert result.ring == ExecutionRing.RING_2_STANDARD +assert result.reversibility == ReversibilityLevel.FULL -# Deployments → Ring 1 (high risk, non-reversible) -result = classifier.classify_action_id("deploy.k8s") +# Destructive, non-reversible operations → Ring 1 (privileged) +delete = ActionDescriptor(action_id="delete_database", name="Delete Database", + execute_api="/db/drop") +result = classifier.classify(delete) assert result.ring == ExecutionRing.RING_1_PRIVILEGED -assert result.reversibility == ReversibilityLevel.NON_REVERSIBLE +assert result.reversibility == ReversibilityLevel.NONE # Override classification for custom actions -classifier.set_override("my_custom.action", ring=ExecutionRing.RING_2_STANDARD, risk_weight=0.5) +classifier.set_override("my_custom_action", ring=ExecutionRing.RING_2_STANDARD, risk_weight=0.5) ``` ### 3.3 Ring Elevation (Privilege Escalation) @@ -432,76 +441,43 @@ Saga-level states: | `FAILED` | All compensation finished (or some compensation failed) | | `ESCALATED` | Compensation itself failed; human intervention required | -### 5.4 Declarative Sagas with the DSL +### 5.4 Programmatic saga orchestration -For complex workflows, define sagas declaratively: +Define saga steps directly with `SagaOrchestrator` and pair each forward action with an undo endpoint: ```python -from hypervisor.saga.dsl import SagaDSLParser, SagaDefinition - -saga_yaml = """ -saga: - id: deploy-pipeline - steps: - - id: create-pr - action_id: pr.create - agent: did:example:dev-agent - execute_api: /api/pr/create - undo_api: /api/pr/close - timeout: 60 - retries: 2 - - - id: run-tests - action_id: tests.run - agent: did:example:ci-agent - execute_api: /api/tests/run - undo_api: /api/tests/cancel - timeout: 300 - depends_on: [create-pr] - - - id: deploy-staging - action_id: deploy.staging - agent: did:example:deploy-agent - execute_api: /api/deploy/staging - undo_api: /api/deploy/rollback - timeout: 600 - depends_on: [run-tests] - checkpoint_goal: "Staging deployment matches PR diff" -""" - -parser = SagaDSLParser() -definition: SagaDefinition = parser.parse(saga_yaml) -``` - -### 5.5 Semantic Checkpoints - -Checkpoints verify that each step actually achieved its goal, not just that -it returned HTTP 200: +from hypervisor.saga.orchestrator import SagaOrchestrator -```python -from hypervisor.saga.checkpoint import CheckpointManager, SemanticCheckpoint +orchestrator = SagaOrchestrator() +saga = orchestrator.create_saga("session-deploy-42") -checkpoint_mgr = CheckpointManager() +create_pr = orchestrator.add_step( + saga_id=saga.saga_id, + action_id="pr.create", + agent_did="did:example:dev-agent", + execute_api="/api/pr/create", + undo_api="/api/pr/close", + timeout_seconds=60, + max_retries=2, +) -# After a deploy step, verify the deployment actually happened -checkpoint = SemanticCheckpoint( - step_id="deploy-staging", - goal="Staging deployment matches PR diff", +run_tests = orchestrator.add_step( + saga_id=saga.saga_id, + action_id="tests.run", + agent_did="did:example:ci-agent", + execute_api="/api/tests/run", + undo_api="/api/tests/cancel", + timeout_seconds=300, ) -# The checkpoint manager evaluates whether the goal was met ``` -### 5.6 Fan-Out Orchestration - -For parallel step execution (e.g., deploy to multiple regions simultaneously): +If a step fails, call `compensate()` to roll back committed steps in reverse order. ```python -from hypervisor.saga.fan_out import FanOutOrchestrator, FanOutPolicy +async def compensator(step): + return await call_undo_endpoint(step.undo_api) -fan_out = FanOutOrchestrator() - -# Execute the same action across multiple agents in parallel -# with configurable failure policies (fail-fast, best-effort, quorum) +failed = await orchestrator.compensate(saga.saga_id, compensator) ``` --- @@ -552,22 +528,19 @@ Choose the right isolation level based on your consistency requirements: ```python from hypervisor.session.isolation import IsolationLevel -# Snapshot — each agent sees a consistent snapshot (cheapest) +# Snapshot gives each agent a stable view for the operation. level = IsolationLevel.SNAPSHOT assert not level.requires_vector_clocks -assert not level.requires_intent_locks assert level.allows_concurrent_writes assert level.coordination_cost == "low" -# Read Committed — agents see committed writes from others +# Read Committed makes committed writes visible across agents. level = IsolationLevel.READ_COMMITTED assert level.requires_vector_clocks -assert not level.requires_intent_locks -# Serializable — strongest consistency (most expensive) +# Serializable is the strongest consistency level. level = IsolationLevel.SERIALIZABLE assert level.requires_vector_clocks -assert level.requires_intent_locks assert not level.allows_concurrent_writes assert level.coordination_cost == "high" ``` @@ -577,52 +550,21 @@ assert level.coordination_cost == "high" When agents produce concurrent writes, vector clocks establish a causal order: ```python -from hypervisor.session.vector_clock import VectorClockManager, CausalViolationError - -clock_mgr = VectorClockManager() - -# Each agent gets its own logical clock -clock_a = clock_mgr.create_clock("did:agent-a") -clock_b = clock_mgr.create_clock("did:agent-b") - -# Agent A performs an action -clock_mgr.increment("did:agent-a") - -# Check causal ordering — did A's action happen before B's? -happened_before = clock_mgr.happens_before(clock_a, clock_b) -``` - -### 6.4 Intent Locks for Concurrency Control - -Prevent conflicting concurrent operations with intent locks: - -```python -from hypervisor.session.intent_locks import IntentLockManager, LockIntent, DeadlockError +from hypervisor.session.vector_clock import VectorClock -lock_mgr = IntentLockManager() +# Each agent maintains its own logical clock +clock_a = VectorClock() +clock_b = VectorClock() -# Agent A acquires a write lock on the session -lock_mgr.acquire_lock( - session_id="session-001", - agent_did="did:agent-a", - intent=LockIntent.WRITE, -) +# Agent A performs an action, advancing its clock +clock_a.tick("did:agent-a") -# Agent B tries an exclusive lock — blocked until A releases -try: - lock_mgr.acquire_lock( - session_id="session-001", - agent_did="did:agent-b", - intent=LockIntent.EXCLUSIVE, - ) -except DeadlockError: - print("Deadlock detected — aborting Agent B's operation") - -# Release when done -lock_mgr.release_lock(session_id="session-001", agent_did="did:agent-a") +# Did A's action causally precede B's current state? +happened_before = clock_a.happens_before(clock_b) +concurrent = clock_a.is_concurrent(clock_b) ``` -### 6.5 Full Session Configuration +### 6.4 Full Session Configuration Bring it all together with a `SharedSessionObject`: @@ -631,10 +573,10 @@ from hypervisor.session import SharedSessionObject from hypervisor.models import SessionConfig, ConsistencyMode config = SessionConfig( - consistency_mode=ConsistencyMode.SERIALIZABLE, + consistency_mode=ConsistencyMode.STRONG, max_participants=5, - max_duration_seconds=3600, # 1 hour - min_eff_score=0.60, # minimum trust to join + max_duration_seconds=3600, + min_eff_score=0.60, ) session = SharedSessionObject( @@ -643,16 +585,14 @@ session = SharedSessionObject( ) # Session provides: -# session.vfs — SessionVFS (isolated file views) -# session.vector_clocks — VectorClockManager (causal ordering) -# session.intent_locks — IntentLockManager (concurrency control) +# session.vfs # SessionVFS isolated file views ``` --- ## 7. Emergency Controls -When an agent goes rogue, you need to stop it *immediately* — not after the +When an agent goes rogue, you need to stop it *immediately*, not after the next polling interval. ### 7.1 Kill Switch @@ -687,7 +627,6 @@ Available kill reasons: | `RATE_LIMIT` | Agent exceeded its rate limit repeatedly | | `RING_BREACH` | Agent attempted actions above its ring level | | `MANUAL` | Human operator triggered the kill | -| `QUARANTINE_TIMEOUT` | Agent was quarantined and didn't recover | | `SESSION_TIMEOUT` | Session exceeded its `max_duration_seconds` | ### 7.2 Graceful Shutdown with Handoff @@ -750,20 +689,7 @@ if not status.allowed: sandbox_limiter.reset(agent_did="did:example:new-agent") ``` -### 7.4 Quarantine - -Quarantine isolates an agent without killing it — useful for investigation: - -```python -from hypervisor.liability.quarantine import QuarantineManager, QuarantineReason - -quarantine = QuarantineManager() - -# Quarantine a suspect agent — it can't take new actions but existing -# saga steps are preserved for forensic analysis -``` - -### 7.5 Breach Detection Pipeline +### 7.4 Breach Detection Pipeline Wire breach detection into your kill switch for automated response: @@ -1002,17 +928,14 @@ trace_id = CausalTraceId.generate() | **Tools** | `CapabilityGuardMiddleware` | Per-agent tool allow/deny lists | | **Transactions** | `SagaOrchestrator` | Multi-step workflows with auto-rollback | | **Isolation** | `SessionVFS` | Per-agent virtual file system namespacing | -| **Isolation** | `IntentLockManager` | Concurrency control with intent locks | -| **Isolation** | `VectorClockManager` | Causal ordering of concurrent operations | +| **Isolation** | `VectorClock` | Causal ordering of concurrent operations | | **Emergency** | `KillSwitch` | Immediate agent termination | | **Emergency** | `AgentRateLimiter` | Per-agent call rate enforcement | -| **Emergency** | `QuarantineManager` | Agent isolation for investigation | | **Observability** | `HypervisorEventBus` | Real-time event streaming | --- ## Next Steps -- **Audit trails:** Explore `CommitmentEngine` and `DeltaEngine` for hash-chained, tamper-evident logging. -- **Liability:** See `LiabilityMatrix`, `CausalAttributor`, and `SlashingEngine` for agent accountability. +- **Audit trails:** Use `DeltaEngine` for hash-chained, tamper-evident delta logging. - **Deployment:** Read the [Azure Container Apps guide](../deployment/azure-container-apps.md) for cloud-native deployment patterns. diff --git a/docs/tutorials/11-saga-orchestration.md b/docs/tutorials/11-saga-orchestration.md index b010cedaa..5b942f9b2 100644 --- a/docs/tutorials/11-saga-orchestration.md +++ b/docs/tutorials/11-saga-orchestration.md @@ -1,19 +1,20 @@ -# Tutorial 11 — Saga Orchestration +# Tutorial 11 Saga Orchestration -> **Package:** `agentmesh-runtime` · **Time:** 30 minutes · **Prerequisites:** Python 3.11+ +> **Package:** `agent-hypervisor` · **Time:** 30 minutes · **Prerequisites:** Python 3.11+ --- ## What You'll Learn - Multi-step transactions with compensating actions -- Saga DSL for declarative pipeline definitions -- Fan-out for parallel step execution -- Compensating actions and rollback strategies +- Saga and step state machines with validated transitions +- Timeout and retry handling for individual steps +- Reverse-order compensation and rollback strategies +- Integrating saga steps with execution rings --- -**Multi-step agent transactions with compensating actions, parallel fan-out, and semantic checkpoints.** +**Multi-step agent transactions with compensating actions and reverse-order rollback.** See also: [Execution Sandboxing (Tutorial 06)](./06-execution-sandboxing.md) | [Observability & Tracing (Tutorial 13)](./13-observability-and-tracing.md) | [Agent Runtime README](../../agent-governance-python/agent-runtime/README.md) @@ -26,15 +27,11 @@ See also: [Execution Sandboxing (Tutorial 06)](./06-execution-sandboxing.md) | [ 3. [Quick Start: A 3-Step Saga with Compensation](#3-quick-start-a-3-step-saga-with-compensation) 4. [SagaOrchestrator](#4-sagaorchestrator) 5. [Saga & Step State Machines](#5-saga--step-state-machines) -6. [SagaDSLParser — Declarative Saga Definitions](#6-sagadslparser--declarative-saga-definitions) -7. [Schema Validation](#7-schema-validation) -8. [Compensating Transactions](#8-compensating-transactions) -9. [FanOutOrchestrator — Parallel Step Execution](#9-fanoutorchestrator--parallel-step-execution) -10. [CheckpointManager — Save & Restore Saga State](#10-checkpointmanager--save--restore-saga-state) -11. [Error Handling](#11-error-handling) -12. [Integration with Execution Rings](#12-integration-with-execution-rings) -13. [Real-World Example: Multi-Agent Data Pipeline](#13-real-world-example-multi-agent-data-pipeline) -14. [Next Steps](#14-next-steps) +6. [Compensating Transactions](#6-compensating-transactions) +7. [Error Handling](#7-error-handling) +8. [Integration with Execution Rings](#8-integration-with-execution-rings) +9. [Real-World Example: Multi-Agent Data Pipeline](#9-real-world-example-multi-agent-data-pipeline) +10. [Next Steps](#10-next-steps) --- @@ -63,46 +60,34 @@ If Step 3 fails: | Component | Purpose | |-----------|---------| | `SagaOrchestrator` | Sequential step execution with retry and compensation | -| `SagaDSLParser` | Declarative saga definitions from structured dictionaries | -| `SagaSchemaValidator` | JSON schema validation for saga definitions | -| `FanOutOrchestrator` | Parallel step execution with success policies | -| `CheckpointManager` | Semantic checkpoints for replay and skip-ahead | --- ## 2. Installation ```bash -pip install agentmesh-runtime +pip install agent-governance-toolkit-core ``` Import from either package: ```python # From runtime (convenience re-exports) -from agent_runtime import ( - SagaOrchestrator, SagaState, StepState, - FanOutOrchestrator, FanOutPolicy, - CheckpointManager, SagaDSLParser, SagaDefinition, -) +from agent_runtime import SagaOrchestrator, SagaState, StepState # Or directly from hypervisor -from hypervisor.saga.orchestrator import SagaOrchestrator -from hypervisor.saga.state_machine import Saga, SagaStep, SagaState, StepState -from hypervisor.saga.dsl import SagaDSLParser, SagaDefinition -from hypervisor.saga.fan_out import FanOutOrchestrator, FanOutPolicy, FanOutGroup -from hypervisor.saga.checkpoint import CheckpointManager, SemanticCheckpoint -from hypervisor.saga.schema import SagaSchemaValidator, SagaSchemaError +from hypervisor.saga.orchestrator import SagaOrchestrator, SagaTimeoutError +from hypervisor.saga.state_machine import Saga, SagaStep, SagaState, StepState, SagaStateError ``` -**Requirements:** Python ≥ 3.11, `agentmesh-runtime` v2.0.2+ +**Requirements:** Python 3.11+ --- ## 3. Quick Start: A 3-Step Saga with Compensation -A complete example — define a 3-step deployment saga, execute it, and -handle failure with automatic compensation: +A complete example that defines a 3-step deployment saga, executes it, and +handles failure with automatic compensation: ```python import asyncio @@ -116,7 +101,7 @@ async def main(): # 1. Create a saga bound to a session saga = orchestrator.create_saga(session_id="session-deploy-42") - # 2. Add steps — each pairs a forward action with a compensation + # 2. Add steps, each pairing a forward action with a compensation step_pr = orchestrator.add_step( saga_id=saga.saga_id, action_id="data.create_pr", @@ -193,7 +178,7 @@ asyncio.run(main()) Saga state: SagaState.COMPLETED ``` -Compensation runs in **reverse order** — tests cancelled before PR closed. +Compensation runs in **reverse order**, so tests are cancelled before the PR is closed. --- @@ -221,9 +206,9 @@ class SagaOrchestrator: | Parameter | Default | Description | |-----------|---------|-------------| -| `action_id` | — | Action type (dot-notation: `model.`, `data.`, `deploy.`, etc.) | -| `agent_did` | — | Decentralized identifier of the executing agent | -| `execute_api` | — | Forward execution endpoint | +| `action_id` | required | Action type (dot-notation: `model.`, `data.`, `deploy.`, etc.) | +| `agent_did` | required | Decentralized identifier of the executing agent | +| `execute_api` | required | Forward execution endpoint | | `undo_api` | `None` | Compensation endpoint (if `None`, step can't be compensated) | | `timeout_seconds` | `300` | Max wall-clock time for execution | | `max_retries` | `0` | Number of retry attempts on failure | @@ -315,7 +300,7 @@ The seven step states and their meanings: | `FAILED` | Step failed after exhausting retries | | `COMPENSATING` | Compensation is in progress for this step | | `COMPENSATED` | Compensation completed successfully | -| `COMPENSATION_FAILED` | Compensation itself failed — requires escalation | +| `COMPENSATION_FAILED` | Compensation itself failed, requires escalation | ### 5.2 Saga States @@ -335,9 +320,9 @@ assert saga.completed_at is not None |-------|---------|----------------| | `RUNNING` | Steps are being executed | `COMPENSATING`, `COMPLETED`, `FAILED` | | `COMPENSATING` | Compensation is running in reverse | `COMPLETED`, `ESCALATED` | -| `COMPLETED` | All steps committed or all compensations succeeded | — (terminal) | -| `FAILED` | Execution failed (before compensation) | — (terminal) | -| `ESCALATED` | Compensation itself failed; human intervention required | — (terminal) | +| `COMPLETED` | All steps committed or all compensations succeeded | terminal | +| `FAILED` | Execution failed (before compensation) | terminal | +| `ESCALATED` | Compensation itself failed; human intervention required | terminal | ### 5.3 Serialization and Inspection @@ -356,172 +341,13 @@ for step in saga.committed_steps_reversed: --- -## 6. SagaDSLParser — Declarative Saga Definitions - -Instead of building sagas imperatively with `add_step()`, you can define them -declaratively using a structured dictionary format. This is especially useful -for saga definitions stored in configuration files or databases. - -### 6.1 Basic Usage - -```python -from hypervisor.saga.dsl import SagaDSLParser, SagaDefinition - -parser = SagaDSLParser() - -definition = parser.parse({ - "name": "deploy-model", - "session_id": "sess-deploy-42", - "steps": [ - { - "id": "validate", - "action_id": "model.validate", - "agent": "did:mesh:validator", - "execute_api": "/api/validate", - "undo_api": "/api/rollback", - }, - { - "id": "deploy", - "action_id": "deploy.push", - "agent": "did:mesh:deployer", - "execute_api": "/api/deploy", - "undo_api": "/api/deploy/rollback", - "timeout": 600, - "retries": 2, - }, - { - "id": "notify", - "action_id": "notify.team", - "agent": "did:mesh:notifier", - "execute_api": "/api/notify", - # No undo_api — notifications can't be unsent - }, - ], -}) - -print(definition.name) # "deploy-model" -print(definition.session_id) # "sess-deploy-42" -print(definition.saga_id) # "saga:" -print(len(definition.steps)) # 3 -print(definition.step_ids) # ["validate", "deploy", "notify"] -``` - -### 6.2 Definition Schema - -**Required top-level:** `name` (str), `session_id` (str), `steps` (non-empty list). -**Optional top-level:** `saga_id` (str, auto-generated), `metadata` (dict). - -**Required per step:** `id` (str), `action_id` (str), `agent` (str). -**Optional per step:** `execute_api` (str), `undo_api` (str|None), `timeout` (int, default 300), `retries` (int, default 0), `checkpoint_goal` (str|None). - -### 6.3 Converting to SagaSteps - -A `SagaDefinition` can be converted into `SagaStep` objects for use with `SagaOrchestrator`: - -```python -saga_steps = parser.to_saga_steps(definition) -for step in saga_steps: - print(f"{step.step_id}: {step.execute_api} (timeout={step.timeout_seconds}s)") -``` - -### 6.4 Validation - -`validate()` returns errors without raising — useful for pre-flight checks: - -```python -errors = parser.validate({}) -# ["Missing 'name'", "Missing 'session_id'", "Missing 'steps'"] - -errors = parser.validate({ - "name": "valid", "session_id": "s1", - "steps": [{"id": "s1", "action_id": "data.run", "agent": "did:mesh:a"}], -}) -# [] -``` - -`parse()` raises `SagaDSLError` for missing `name`, missing `session_id`, -empty/missing `steps`, or duplicate step IDs. - ---- - -## 7. Schema Validation - -For production use, enable JSON schema validation to catch definition -errors early — invalid timeouts, unknown action prefixes, circular -dependencies, and more. - -### 7.1 SagaSchemaValidator - -```python -from hypervisor.saga.schema import SagaSchemaValidator, SagaSchemaError - -validator = SagaSchemaValidator() - -# Returns a list of error strings (empty = valid) -errors = validator.validate({ - "name": "test-saga", - "session_id": "sess-1", - "steps": [ - { - "id": "step-1", - "action_id": "model.validate", - "agent": "did:mesh:validator", - "execute_api": "/api/validate", - "undo_api": "/api/rollback", - "timeout": 300, - "retries": 0, - }, - ], -}) -assert errors == [] -``` - -### 7.2 What Gets Validated - -| Rule | Example | -|------|---------| -| **Action ID prefixes** | Must start with `model.`, `data.`, `deploy.`, `validate.`, `notify.`, `infra.`, `security.`, `monitor.`, `config.`, or `test.` | -| **Timeout range** | 1–86400 seconds | -| **Retry range** | 0–10 | -| **Compensation** | Steps without `undo_api` generate warnings | -| **Dependencies** | Unknown refs and circular dependencies are caught | -| **Duplicate IDs** | Duplicate step IDs are rejected | - -```python -from hypervisor.saga.schema import VALID_ACTION_PREFIXES - -# All recognized action prefixes -print(VALID_ACTION_PREFIXES) -# ("model.", "data.", "deploy.", "validate.", "notify.", -# "infra.", "security.", "monitor.", "config.", "test.") -``` - -### 7.3 Strict Mode and Parser Integration - -```python -# Fail-fast: throws SagaSchemaError with all errors -try: - validator.validate_or_raise({}) -except SagaSchemaError as e: - print(e.errors) # ["Missing 'name'", "Missing 'session_id'", ...] - -# Enable schema validation in the parser -parser = SagaDSLParser(schema_validation=True) -try: - parser.parse({"name": "", "session_id": "s", "steps": []}) -except SagaSchemaError: - print("Schema validation failed before parsing") -``` - ---- - -## 8. Compensating Transactions +## 6. Compensating Transactions Compensation is the core safety mechanism. When a step fails, the orchestrator walks backward through committed steps, calling a compensator for each. -### 8.1 Compensation Flow +### 6.1 Compensation Flow ```python async def compensator(step: SagaStep) -> Any: @@ -541,14 +367,14 @@ The flow: 5. All compensations succeeded → saga `COMPLETED`. Any failed → saga `ESCALATED`. 6. Returns list of steps whose compensation failed. -### 8.2 Steps Without Compensation +### 6.2 Steps Without Compensation Steps with `undo_api=None` cannot be compensated. Place irreversible actions (notifications, emails) as the **last** step so they're never compensated. -### 8.3 Escalation +### 6.3 Escalation -When compensation itself fails, the saga enters `ESCALATED` — human +When compensation itself fails, the saga enters `ESCALATED` and human intervention is required: ```python @@ -563,195 +389,28 @@ assert failed[0].state == StepState.COMPENSATION_FAILED > **Important:** An `ESCALATED` saga means inconsistent state. Wire up > alerting for this scenario. See -> [Tutorial 13 — Observability & Tracing](./13-observability-and-tracing.md) +> [Tutorial 13 Observability & Tracing](./13-observability-and-tracing.md) > for OpenTelemetry integration. --- -## 9. FanOutOrchestrator — Parallel Step Execution - -Some saga steps are independent and can run in parallel — for example, -deploying to multiple regions or validating data with multiple agents. - -### 9.1 Core Concepts - -The `FanOutOrchestrator` groups saga steps into **branches** within a -**fan-out group** and executes them with a configurable success policy: - -``` - ┌────────────┐ - │ Fan-Out │ - │ Group │ - └──┬────┬──┬─┘ - │ │ │ - ┌────────▼┐ ┌▼──┴────┐ ┌────────┐ - │Branch 1 │ │Branch 2│ │Branch 3│ - │(step s1)│ │(step s2)│ │(step s3)│ - └─────────┘ └────────┘ └────────┘ - │ │ │ - ▼ ▼ ▼ - Check policy: ALL_MUST_SUCCEED? -``` +## 7. Error Handling -### 9.2 Fan-Out Policies - -```python -from hypervisor.saga.fan_out import FanOutPolicy - -FanOutPolicy.ALL_MUST_SUCCEED # Every branch must succeed -FanOutPolicy.MAJORITY_MUST_SUCCEED # > 50% of branches must succeed -FanOutPolicy.ANY_MUST_SUCCEED # At least one branch must succeed -``` - -### 9.3 Creating and Executing a Fan-Out Group - -```python -from hypervisor.saga.fan_out import FanOutOrchestrator, FanOutPolicy -from hypervisor.saga.state_machine import SagaStep - -fan_out = FanOutOrchestrator() - -# Create a group within a saga -group = fan_out.create_group("saga:deploy-multi-region", FanOutPolicy.ALL_MUST_SUCCEED) - -# Add branches — each wraps a SagaStep -steps = [ - SagaStep(step_id="us-east", action_id="deploy.region", - agent_did="did:mesh:deployer", execute_api="/api/deploy/us-east"), - SagaStep(step_id="eu-west", action_id="deploy.region", - agent_did="did:mesh:deployer", execute_api="/api/deploy/eu-west"), -] -for step in steps: - fan_out.add_branch(group.group_id, step) - -# Define executors keyed by step_id -async def deploy_us(): - return {"region": "us-east-1", "status": "deployed"} - -async def deploy_eu(): - return {"region": "eu-west-1", "status": "deployed"} - -result = await fan_out.execute(group.group_id, executors={ - "us-east": deploy_us, "eu-west": deploy_eu, -}) - -print(result.resolved) # True -print(result.policy_satisfied) # True — all succeeded -print(result.success_count) # 2 -print(result.compensation_needed) # [] -``` - -### 9.4 Handling Partial Failures - -When a branch fails, `compensation_needed` lists step IDs of branches that -succeeded and now need rollback: - -```python -async def deploy_fails(): - raise RuntimeError("Region unavailable") - -result = await fan_out.execute(group.group_id, executors={ - "us-east": deploy_us, "eu-west": deploy_fails, -}) -print(result.policy_satisfied) # False -print(result.compensation_needed) # ["us-east"] -``` - -### 9.5 Managing Groups - -```python -active = fan_out.active_groups # Unresolved groups -group = fan_out.get_group("fanout:abc123") # Look up by ID - -# FanOutGroup properties -group.success_count # Branches that succeeded -group.failure_count # Branches that failed -group.total_branches # Total branches -group.check_policy() # Re-evaluate success policy -``` - ---- - -## 10. CheckpointManager — Save & Restore Saga State - -The `CheckpointManager` creates **semantic checkpoints** — snapshots that -record "this goal was achieved," enabling smarter replay where completed -steps can be skipped. - -### 10.1 Saving and Querying Checkpoints - -```python -from hypervisor.saga.checkpoint import CheckpointManager, SemanticCheckpoint - -checkpoint_mgr = CheckpointManager() - -# Save a checkpoint after a step achieves its goal -ckpt = checkpoint_mgr.save( - saga_id="saga:pipeline-7", - step_id="migrate-db", - goal_description="Database schema migrated to v5", - state_snapshot={"schema_version": 5, "tables_added": ["users_v2"]}, -) -print(ckpt.checkpoint_id) # "ckpt:" -print(ckpt.is_valid) # True - -# Check if a goal was achieved -achieved = checkpoint_mgr.is_achieved("saga:pipeline-7", - "Database schema migrated to v5", "migrate-db") - -# Get all checkpoints for a saga -for ckpt in checkpoint_mgr.get_saga_checkpoints("saga:pipeline-7"): - print(f" {ckpt.step_id}: {ckpt.goal_description}") -``` - -### 10.2 Invalidation and Replay - -```python -# Invalidate when underlying data changes -checkpoint_mgr.invalidate("saga:pipeline-7", "migrate-db", - reason="Schema manually altered") - -# Replay plan — returns only steps needing re-execution -replay = checkpoint_mgr.get_replay_plan("saga:pipeline-7", - ["extract", "transform", "validate", "load"]) -``` - -### 10.3 Goal Hashes - -```python -h1 = SemanticCheckpoint.compute_goal_hash("Deploy to staging", "step-deploy") -h2 = SemanticCheckpoint.compute_goal_hash("Deploy to staging", "step-deploy") -assert h1 == h2 # Same goal + step → same hash -``` - -> **Note:** In the Public Preview, `is_achieved()` returns `False` by -> default and `get_replay_plan()` returns all steps unchanged. Checkpoints -> are stored but not used for skip-ahead logic. The Enterprise Edition -> includes full semantic checkpoint evaluation. - ---- - -## 11. Error Handling - -### 11.1 Exception Types +### 7.1 Exception Types The saga system defines several exception types: ```python from hypervisor.saga.state_machine import SagaStateError from hypervisor.saga.orchestrator import SagaTimeoutError -from hypervisor.saga.dsl import SagaDSLError -from hypervisor.saga.schema import SagaSchemaError ``` | Exception | Raised when | |-----------|-------------| | `SagaStateError` | An invalid state transition is attempted | | `SagaTimeoutError` | A step exceeds its `timeout_seconds` | -| `SagaDSLError` | A saga definition has structural problems (missing fields, duplicates) | -| `SagaSchemaError` | Schema validation fails (invalid values, bad prefixes, circular deps) | -### 11.2 Timeout Handling +### 7.2 Timeout Handling Steps that exceed their `timeout_seconds` are failed automatically: @@ -770,11 +429,11 @@ async def slow_executor(): try: await orchestrator.execute_step(saga.saga_id, step.step_id, executor=slow_executor) -except asyncio.TimeoutError: +except SagaTimeoutError: print(f"Step state: {step.state}") # StepState.FAILED ``` -### 11.3 Retry Semantics +### 7.3 Retry Semantics Steps with `max_retries > 0` are retried automatically with a 1-second delay between attempts: @@ -783,7 +442,7 @@ delay between attempts: attempt_count = 0 async def flaky_executor(): - nonlocal attempt_count + global attempt_count attempt_count += 1 if attempt_count < 3: raise ConnectionError("Temporarily unavailable") @@ -804,7 +463,7 @@ assert step.state == StepState.COMMITTED assert step.retry_count == 2 ``` -### 11.4 Error Propagation Pattern +### 7.4 Error Propagation Pattern ```python async def run_saga_safely(orchestrator, saga, steps_and_executors, compensator): @@ -830,7 +489,7 @@ async def run_saga_safely(orchestrator, saga, steps_and_executors, compensator): --- -## 12. Integration with Execution Rings +## 8. Integration with Execution Rings Sagas work with the [Execution Ring Model](./06-execution-sandboxing.md) to enforce privilege boundaries on each step. An agent can only execute a @@ -838,7 +497,8 @@ saga step if its effective score grants access to the ring required by that action. ```python -from hypervisor import ExecutionRing +from hypervisor import ExecutionRing, ReversibilityLevel +from hypervisor.models import ActionDescriptor from hypervisor.rings.classifier import ActionClassifier from hypervisor.saga.orchestrator import SagaOrchestrator @@ -854,8 +514,15 @@ step = orchestrator.add_step( undo_api="/api/deploy/rollback", ) -# Check ring requirements before execution -classification = classifier.classify_action_id("deploy.production") +# Classify the action, then compare its required ring to the agent ring +action = ActionDescriptor( + action_id="deploy.production", + name="Deploy to production", + execute_api="/api/deploy/prod", + undo_api="/api/deploy/rollback", + reversibility=ReversibilityLevel.PARTIAL, +) +classification = classifier.classify(action) agent_ring = ExecutionRing.from_eff_score(eff_score=0.72) if classification.ring.value < agent_ring.value: @@ -870,146 +537,118 @@ For steps needing temporary privilege escalation, combine sagas with --- -## 13. Real-World Example: Multi-Agent Data Pipeline +## 9. Real-World Example: Multi-Agent Data Pipeline -Bringing together DSL, fan-out, checkpoints, and compensation: +Bringing together ordered execution, timeouts, retries, and reverse-order +compensation in a single pipeline: ```python import asyncio from hypervisor.saga.orchestrator import SagaOrchestrator -from hypervisor.saga.dsl import SagaDSLParser -from hypervisor.saga.fan_out import FanOutOrchestrator, FanOutPolicy -from hypervisor.saga.checkpoint import CheckpointManager - -# ── 1. Define pipeline declaratively ───────────────────────────── - -parser = SagaDSLParser(schema_validation=True) -definition = parser.parse({ - "name": "weekly-ml-pipeline", - "session_id": "pipeline-2025-w03", - "steps": [ - {"id": "extract-sales", "action_id": "data.extract", - "agent": "did:mesh:extractor", "execute_api": "/api/extract/sales", - "undo_api": "/api/extract/cleanup", "timeout": 120, "retries": 2}, - {"id": "extract-inventory", "action_id": "data.extract", - "agent": "did:mesh:extractor", "execute_api": "/api/extract/inventory", - "undo_api": "/api/extract/cleanup", "timeout": 120, "retries": 2}, - {"id": "transform", "action_id": "data.transform", - "agent": "did:mesh:transformer", "execute_api": "/api/transform", - "undo_api": "/api/transform/rollback", "timeout": 600}, - {"id": "validate", "action_id": "validate.quality", - "agent": "did:mesh:validator", "execute_api": "/api/validate", - "undo_api": "/api/validate/reset"}, - {"id": "load", "action_id": "data.load", - "agent": "did:mesh:loader", "execute_api": "/api/load/warehouse", - "undo_api": "/api/load/rollback", "timeout": 900}, - {"id": "notify", "action_id": "notify.team", - "agent": "did:mesh:notifier", "execute_api": "/api/notify/slack"}, - ], -}) - -# ── 2. Create orchestrators and saga ───────────────────────────── +from hypervisor.saga.state_machine import SagaState + +# ── 1. Create the orchestrator and a saga for the pipeline ─────── orchestrator = SagaOrchestrator() -fan_out = FanOutOrchestrator() -checkpoint_mgr = CheckpointManager() -saga = orchestrator.create_saga(session_id=definition.session_id) - -saga_steps = parser.to_saga_steps(definition) -step_map = {} -for dsl_step in saga_steps: - step = orchestrator.add_step( - saga_id=saga.saga_id, action_id=dsl_step.action_id, - agent_did=dsl_step.agent_did, execute_api=dsl_step.execute_api, - undo_api=dsl_step.undo_api, timeout_seconds=dsl_step.timeout_seconds, - max_retries=dsl_step.max_retries, +saga = orchestrator.create_saga(session_id="pipeline-2025-w03") + +# ── 2. Add the pipeline steps in execution order ───────────────── + +pipeline = [ + # action_id, agent_did, execute_api, undo_api, timeout, retries + ("data.extract", "did:mesh:extractor", "/api/extract/sales", + "/api/extract/cleanup", 120, 2), + ("data.transform", "did:mesh:transformer", "/api/transform", + "/api/transform/rollback", 600, 0), + ("validate.quality", "did:mesh:validator", "/api/validate", + "/api/validate/reset", 300, 0), + ("data.load", "did:mesh:loader", "/api/load/warehouse", + "/api/load/rollback", 900, 0), + ("notify.team", "did:mesh:notifier", "/api/notify/slack", + None, 60, 0), +] + +steps = [ + orchestrator.add_step( + saga_id=saga.saga_id, + action_id=action_id, + agent_did=agent_did, + execute_api=execute_api, + undo_api=undo_api, + timeout_seconds=timeout, + max_retries=retries, ) - step_map[dsl_step.step_id] = step + for action_id, agent_did, execute_api, undo_api, timeout, retries in pipeline +] -# ── 3. Execute: fan-out extraction, then sequential steps ──────── +# ── 3. Provide an async executor for each step ─────────────────── -async def run_pipeline(): - # Parallel extraction via fan-out - group = fan_out.create_group(saga.saga_id, FanOutPolicy.ALL_MUST_SUCCEED) - for key in ["extract-sales", "extract-inventory"]: - fan_out.add_branch(group.group_id, step_map[key]) - - async def extract_sales(): - return {"records": 15_420} - async def extract_inventory(): - return {"records": 8_300} - - result = await fan_out.execute(group.group_id, executors={ - step_map["extract-sales"].step_id: extract_sales, - step_map["extract-inventory"].step_id: extract_inventory, - }) - if not result.policy_satisfied: - await orchestrator.compensate(saga.saga_id, compensator) - return - - checkpoint_mgr.save(saga.saga_id, "extract-phase", - "All sources extracted", {"total": 23_720}) - - # Sequential: transform → validate → load → notify - async def transform(): return {"records": 23_720} - async def validate(): return {"score": 0.97} - async def load(): return {"rows_inserted": 23_720} - async def notify(): return {"sent": True} - - for name, fn in [("transform", transform), ("validate", validate), - ("load", load), ("notify", notify)]: - try: - r = await orchestrator.execute_step( - saga.saga_id, step_map[name].step_id, executor=fn) - print(f" ✓ {name}: {r}") - except Exception as e: - print(f" ✗ {name} failed: {e}") - await orchestrator.compensate(saga.saga_id, compensator) - return +async def extract(): return {"records": 23_720} +async def transform(): return {"records": 23_720} +async def validate(): return {"score": 0.97} +async def load(): return {"rows_inserted": 23_720} +async def notify(): return {"sent": True} + +executors = [extract, transform, validate, load, notify] - print(f"\n✅ Pipeline complete — saga state: {saga.state}") +# ── 4. Compensator called for each committed step on rollback ──── async def compensator(step): print(f" ↩ Compensating {step.action_id} via {step.undo_api}") return "compensated" +# ── 5. Run the pipeline, compensating on the first failure ─────── + +async def run_pipeline(): + for step, executor in zip(steps, executors): + try: + result = await orchestrator.execute_step( + saga.saga_id, step.step_id, executor=executor, + ) + print(f" ✓ {step.action_id}: {result}") + except Exception as e: + print(f" ✗ {step.action_id} failed: {e}") + failed = await orchestrator.compensate(saga.saga_id, compensator) + if saga.state == SagaState.ESCALATED: + raise RuntimeError( + f"{len(failed)} compensation(s) failed; manual repair required" + ) + return + + print(f"Pipeline complete, saga state: {saga.state}") + asyncio.run(run_pipeline()) ``` --- -## 14. Next Steps +## 10. Next Steps You now have a solid understanding of saga orchestration in the Agent Governance Toolkit. Here's where to go next: | Topic | Tutorial | |-------|----------| -| Privilege rings and sandboxing | [Tutorial 06 — Execution Sandboxing](./06-execution-sandboxing.md) | -| OpenTelemetry spans for saga events | [Tutorial 13 — Observability & Tracing](./13-observability-and-tracing.md) | -| Rogue agent detection and circuit breakers | [Tutorial 05 — Agent Reliability](./05-agent-reliability.md) | -| Trust scores and agent identity | [Tutorial 02 — Trust & Identity](./02-trust-and-identity.md) | -| Policy-based governance | [Tutorial 01 — Policy Engine](./01-policy-engine.md) | +| Privilege rings and sandboxing | [Tutorial 06 Execution Sandboxing](./06-execution-sandboxing.md) | +| OpenTelemetry spans for saga events | [Tutorial 13 Observability & Tracing](./13-observability-and-tracing.md) | +| Rogue agent detection and circuit breakers | [Tutorial 05 Agent Reliability](./05-agent-reliability.md) | +| Trust scores and agent identity | [Tutorial 02 Trust & Identity](./02-trust-and-identity.md) | +| Policy-based governance | [Tutorial 01 Policy Engine](./01-policy-engine.md) | ### Key Takeaways -1. **Every forward action needs a compensation** — design your APIs with +1. **Every forward action needs a compensation.** Design your APIs with undo endpoints from the start. -2. **Use the DSL for complex pipelines** — declarative definitions are - easier to review, version-control, and share. -3. **Enable schema validation in production** — catch timeout, retry, and - dependency errors before execution. -4. **Fan-out for independent steps** — parallel execution with policy-based - success criteria. -5. **Checkpoints enable smart replay** — skip steps whose goals are already - achieved when restarting a saga. -6. **Plan for ESCALATED state** — wire up alerting for sagas that can't +2. **Steps and sagas follow validated state machines.** Invalid transitions + raise `SagaStateError`. +3. **Use timeouts and retries per step.** `execute_step` enforces + `timeout_seconds` and retries up to `max_retries`. +4. **Plan for ESCALATED state.** Wire up alerting for sagas that can't be compensated automatically. --- ## Next Steps -- **Liability & Attribution:** [Tutorial 12 — Liability & Attribution](12-liability-and-attribution.md) -- **Observability:** [Tutorial 13 — Observability & Distributed Tracing](13-observability-and-tracing.md) -- **Execution Sandboxing:** [Tutorial 06 — Execution Sandboxing](06-execution-sandboxing.md) +- **Observability:** [Tutorial 13 Observability and Distributed Tracing](13-observability-and-tracing.md) +- **Execution Sandboxing:** [Tutorial 06 Execution Sandboxing](06-execution-sandboxing.md) diff --git a/docs/tutorials/12-liability-and-attribution.md b/docs/tutorials/12-liability-and-attribution.md deleted file mode 100644 index 71ce92bf9..000000000 --- a/docs/tutorials/12-liability-and-attribution.md +++ /dev/null @@ -1,1148 +0,0 @@ -# Tutorial 12 — Liability & Attribution - -> **Package:** `agentmesh-runtime` · **Time:** 25 minutes · **Prerequisites:** Python 3.11+ - ---- - -## What You'll Learn - -- Vouching and sponsorship protocols for agent accountability -- Slashing penalties for governance violations -- Causal attribution across multi-agent workflows -- Quarantine and isolation for misbehaving agents - ---- - -**Track accountability across multi-agent workflows: who vouched for whom, who caused what, and what penalties apply when things go wrong.** - -See also: [Trust & Identity (Tutorial 02)](02-trust-and-identity.md) | [Execution Sandboxing (Tutorial 06)](06-execution-sandboxing.md) - ---- - -## Table of Contents - -1. [Introduction](#1-introduction) -2. [What You'll Learn](#2-what-youll-learn) -3. [Installation](#3-installation) -4. [Quick Start: Agent A Vouches for Agent B](#4-quick-start-agent-a-vouches-for-agent-b) -5. [VouchingEngine — Sponsorship Protocol](#5-vouchingengine--sponsorship-protocol) -6. [SlashingEngine — Penalty for Misbehavior](#6-slashingengine--penalty-for-misbehavior) -7. [LiabilityMatrix — Joint Liability Graph](#7-liabilitymatrix--joint-liability-graph) -8. [CausalAttributor — Who Caused What](#8-causalattributor--who-caused-what) -9. [QuarantineManager — Isolating Problematic Agents](#9-quarantinemanager--isolating-problematic-agents) -10. [LiabilityLedger — Immutable Audit Trail](#10-liabilityledger--immutable-audit-trail) -11. [Integration with Trust Scoring](#11-integration-with-trust-scoring) -12. [Real-World Example: Multi-Agent Workflow](#12-real-world-example-multi-agent-workflow) -13. [Next Steps](#13-next-steps) - ---- - -## 1. Introduction - -When a single agent acts alone, accountability is straightforward — it either -succeeds or it doesn't. But in multi-agent systems, the picture changes -dramatically: - -- **Agent A** vouches for **Agent B**, who then vouches for **Agent C**. -- **Agent C** deletes a production database. -- Who is liable? Just C? B for sponsoring C? A for trusting B? - -Without a formal liability framework you can't answer these questions. The -Agent Governance Toolkit solves this with six composable components: - -``` -┌──────────────────────────────────────────────────────────┐ -│ Liability Framework │ -├──────────────┬───────────────────────────────────────────┤ -│ Vouching │ Agents sponsor each other with bonds │ -│ Engine │ that create skin-in-the-game incentives │ -├──────────────┼───────────────────────────────────────────┤ -│ Slashing │ Penalties cascade through the vouching │ -│ Engine │ graph when a sponsored agent misbehaves │ -├──────────────┼───────────────────────────────────────────┤ -│ Liability │ Directed graph tracking sponsor → │ -│ Matrix │ sponsored relationships and exposure │ -├──────────────┼───────────────────────────────────────────┤ -│ Causal │ Fault attribution via causal chain │ -│ Attributor │ analysis for saga/workflow failures │ -├──────────────┼───────────────────────────────────────────┤ -│ Quarantine │ Isolate agents pending investigation │ -│ Manager │ after severe violations │ -├──────────────┼───────────────────────────────────────────┤ -│ Liability │ Immutable ledger recording every │ -│ Ledger │ liability event for auditing │ -└──────────────┴───────────────────────────────────────────┘ -``` - -### Prerequisites - -- Python ≥ 3.11 -- `pip install agentmesh-runtime` (v2.0.2+) - ---- - -## 2. What You'll Learn - -| Topic | Skill | Section | -|-------|-------|---------| -| Vouching / Sponsorship | Create bonds where high-trust agents vouch for newcomers | [§5](#5-vouchingengine--sponsorship-protocol) | -| Penalty Slashing | Cascade penalties through the sponsorship graph | [§6](#6-slashingengine--penalty-for-misbehavior) | -| Liability Graphs | Visualize and query sponsor → sponsored relationships | [§7](#7-liabilitymatrix--joint-liability-graph) | -| Causal Attribution | Determine root cause in multi-step workflow failures | [§8](#8-causalattributor--who-caused-what) | -| Agent Quarantine | Isolate agents that violate policies | [§9](#9-quarantinemanager--isolating-problematic-agents) | -| Audit Ledger | Maintain immutable records of all liability events | [§10](#10-liabilityledger--immutable-audit-trail) | -| Trust Integration | Connect liability outcomes to trust scores | [§11](#11-integration-with-trust-scoring) | -| End-to-End Workflow | Full example combining all six components | [§12](#12-real-world-example-multi-agent-workflow) | - ---- - -## 3. Installation - -Install the runtime package which re-exports all liability classes from the -hypervisor: - -```bash -pip install agentmesh-runtime -``` - -Verify the installation: - -```python -from agent_runtime import ( - VouchingEngine, - SlashingEngine, - LiabilityMatrix, - CausalAttributor, - QuarantineManager, - LiabilityLedger, -) - -print("Liability framework ready ✓") -``` - -You can also import directly from the hypervisor package: - -```python -from hypervisor import ( - VouchingEngine, - VouchRecord, - SlashingEngine, - LiabilityMatrix, - CausalAttributor, - AttributionResult, - QuarantineManager, - QuarantineReason, - LiabilityLedger, - LedgerEntryType, -) -``` - ---- - -## 4. Quick Start: Agent A Vouches for Agent B - -Get vouching running in under 15 lines: - -```python -from hypervisor import VouchingEngine - -# 1. Create the vouching engine -engine = VouchingEngine() - -# 2. Agent A (high trust, σ=0.85) vouches for Agent B (newcomer) -record = engine.vouch( - voucher_did="did:mesh:agent-a", - vouchee_did="did:mesh:agent-b", - session_id="session:onboarding-001", - voucher_sigma=0.85, -) - -print(record.vouch_id) # "vouch:a1b2c3d4" -print(record.voucher_did) # "did:mesh:agent-a" -print(record.vouchee_did) # "did:mesh:agent-b" -print(record.is_active) # True -print(record.is_expired) # False - -# 3. Check who vouches for Agent B -sponsors = engine.get_vouchers_for("did:mesh:agent-b", "session:onboarding-001") -print(f"Agent B has {len(sponsors)} sponsor(s)") # 1 -``` - -That's it — Agent A has staked its reputation on Agent B. If Agent B -misbehaves, the slashing engine can propagate penalties back to Agent A. - ---- - -## 5. VouchingEngine — Sponsorship Protocol - -The `VouchingEngine` implements a sponsorship model inspired by proof-of-stake -systems. High-trust agents "vouch" for lower-trust agents by bonding a -percentage of their own trust score. - -### 5.1 Creating a Vouch - -```python -from hypervisor import VouchingEngine - -engine = VouchingEngine(max_exposure=0.80) - -# Vouch with explicit bond percentage and expiry -from datetime import datetime, timedelta, UTC - -record = engine.vouch( - voucher_did="did:mesh:senior-agent", - vouchee_did="did:mesh:junior-agent", - session_id="session:data-pipeline", - voucher_sigma=0.90, - bond_pct=0.15, # 15% of σ bonded - expiry=datetime.now(UTC) + timedelta(hours=2), # Auto-expire after 2h -) - -print(record.bonded_sigma_pct) # 0.15 -print(record.bonded_amount) # 0.135 (0.90 × 0.15) -``` - -### 5.3 Effective Score Computation - -The vouching engine computes an effective score for the vouchee that considers -its own trust score and the sponsorship context: - -```python -eff_score = engine.compute_eff_score( - vouchee_did="did:mesh:junior-agent", - session_id="session:data-pipeline", - vouchee_sigma=0.45, - risk_weight=0.5, -) -print(f"Effective score: {eff_score}") -``` - -> **Public Preview:** In the Public Preview, `compute_eff_score` returns -> the vouchee's own sigma directly — sponsor boost is an enterprise feature. -> The vouching graph is still tracked for auditing and liability analysis. - -### 5.4 Exposure Tracking - -Track how much total reputation a voucher has at risk: - -```python -# Agent vouches for multiple agents in the same session -engine.vouch("did:mesh:lead", "did:mesh:worker-1", "session:batch", 0.90) -engine.vouch("did:mesh:lead", "did:mesh:worker-2", "session:batch", 0.90) -engine.vouch("did:mesh:lead", "did:mesh:worker-3", "session:batch", 0.90) - -exposure = engine.get_total_exposure("did:mesh:lead", "session:batch") -print(f"Total exposure: {exposure}") # Sum of bonded amounts -``` - -### 5.5 Releasing Bonds - -Bonds can be released individually or in bulk when a session ends: - -```python -# Release a single bond -engine.release_bond(record.vouch_id) -print(record.is_active) # False - -# Release all bonds for a session (call at session cleanup) -count = engine.release_session_bonds("session:batch") -print(f"Released {count} bonds") -``` - -### 5.6 Configuration Constants - -| Constant | Default | Description | -|----------|---------|-------------| -| `SCORE_SCALE` | `1000.0` | Score normalization factor | -| `MIN_VOUCHER_SCORE` | `0.50` | Minimum σ required to vouch for others | -| `DEFAULT_BOND_PCT` | `0.20` | Default bond percentage (20% of σ) | -| `DEFAULT_MAX_EXPOSURE` | `0.80` | Max total exposure per voucher (80% of σ) | - ---- - -## 6. SlashingEngine — Penalty for Misbehavior - -When a sponsored agent misbehaves, the `SlashingEngine` applies penalties that -cascade through the vouching graph — penalizing both the offender and its -sponsors. - -### 6.1 How Slashing Works - -``` -Agent A (voucher) Agent B (vouchee) - σ = 0.90 σ = 0.70 - │ │ - └───── vouches for ─────────────┘ - │ - B misbehaves - │ - ┌────────┴────────┐ - │ SlashingEngine │ - ├─────────────────┤ - │ B: σ 0.70 → ? │ ← direct penalty - │ A: σ 0.90 → ? │ ← cascade penalty - └─────────────────┘ -``` - -### 6.2 Triggering a Slash - -```python -from hypervisor import VouchingEngine, SlashingEngine - -# Set up the engines -vouching = VouchingEngine() -slashing = SlashingEngine(vouching) - -# Agent A vouches for Agent B -vouching.vouch( - voucher_did="did:mesh:agent-a", - vouchee_did="did:mesh:agent-b", - session_id="session:prod-deploy", - voucher_sigma=0.90, -) - -# Agent B performs an unauthorized action → slash -result = slashing.slash( - vouchee_did="did:mesh:agent-b", - session_id="session:prod-deploy", - vouchee_sigma=0.70, - risk_weight=0.95, - reason="Unauthorized write to production database", - agent_scores={"did:mesh:agent-a": 0.90, "did:mesh:agent-b": 0.70}, -) - -print(result.slash_id) # "slash:abc123" -print(result.vouchee_did) # "did:mesh:agent-b" -print(result.vouchee_sigma_before) # 0.70 -print(result.vouchee_sigma_after) # Reduced score -print(result.reason) # "Unauthorized write to production database" -print(result.cascade_depth) # 0 (direct slash) -``` - -### 6.3 Voucher Clips (Cascade Penalties) - -Each slash result includes `voucher_clips` — the penalties applied to sponsors: - -```python -for clip in result.voucher_clips: - print(f"Voucher: {clip.voucher_did}") - print(f" σ before: {clip.sigma_before}") - print(f" σ after: {clip.sigma_after}") - print(f" Risk weight: {clip.risk_weight}") - print(f" Vouch ID: {clip.vouch_id}") -``` - -> **Public Preview:** In the Public Preview, slashing is logged but -> scores are not actually modified. The `SlashResult` records are still created -> for auditing. Enterprise editions enforce real score deductions. - -### 6.4 Cascade Depth - -The slashing engine limits cascade depth to prevent runaway penalty chains: - -```python -# MAX_CASCADE_DEPTH = 2 -# Penalties propagate at most 2 levels up the sponsorship graph: -# Agent C (offender) → Agent B (direct sponsor) → Agent A (indirect sponsor) - -# SIGMA_FLOOR = 0.05 -# No agent's score is slashed below 0.05 — prevents permanent exclusion -``` - -### 6.5 Slash History - -Review all past slashing events: - -```python -for entry in slashing.history: - print(f"[{entry.timestamp}] {entry.vouchee_did}: {entry.reason}") - print(f" σ {entry.vouchee_sigma_before:.2f} → {entry.vouchee_sigma_after:.2f}") - print(f" Vouchers affected: {len(entry.voucher_clips)}") -``` - ---- - -## 7. LiabilityMatrix — Joint Liability Graph - -The `LiabilityMatrix` models the sponsor → sponsored relationships as a -directed graph. It's the data structure that answers "if this agent fails, who -else is liable?" - -### 7.1 Building the Graph - -```python -from hypervisor import LiabilityMatrix - -matrix = LiabilityMatrix(session_id="session:data-pipeline") - -# Agent A sponsors Agent B and Agent C -matrix.add_edge("did:mesh:agent-a", "did:mesh:agent-b", bonded_amount=0.18, vouch_id="v1") -matrix.add_edge("did:mesh:agent-a", "did:mesh:agent-c", bonded_amount=0.15, vouch_id="v2") - -# Agent B sponsors Agent D -matrix.add_edge("did:mesh:agent-b", "did:mesh:agent-d", bonded_amount=0.10, vouch_id="v3") -``` - -This creates the following liability graph: - -``` - Agent A (σ bonded: 0.33) - ╱ ╲ - ↓ ↓ - Agent B Agent C - (0.18) (0.15) - │ - ↓ - Agent D - (0.10) -``` - -### 7.2 Querying the Graph - -```python -# Who sponsors Agent D? -sponsors = matrix.who_vouches_for("did:mesh:agent-d") -for edge in sponsors: - print(f"{edge.voucher_did} → {edge.vouchee_did} ({edge.bonded_amount})") -# Output: did:mesh:agent-b → did:mesh:agent-d (0.10) - -# Who does Agent A sponsor? -sponsored = matrix.who_is_vouched_by("did:mesh:agent-a") -for edge in sponsored: - print(f"{edge.voucher_did} → {edge.vouchee_did}") -# Output: -# did:mesh:agent-a → did:mesh:agent-b -# did:mesh:agent-a → did:mesh:agent-c - -# Total exposure for Agent A -exposure = matrix.total_exposure("did:mesh:agent-a") -print(f"Agent A total exposure: {exposure}") # 0.33 -``` - -### 7.3 Cascade Path Analysis - -Find all paths through which penalties would propagate if an agent is slashed: - -```python -# If Agent D misbehaves, who is affected? -paths = matrix.cascade_path("did:mesh:agent-b", max_depth=2) -print(paths) -# Returns all DFS paths from agent-b through sponsored agents -# e.g., [["did:mesh:agent-b", "did:mesh:agent-d"]] -``` - -### 7.4 Cycle Detection - -Circular vouching (A sponsors B, B sponsors A) creates infinite cascade risk. -The matrix detects this: - -```python -# Detect circular dependencies -matrix_risky = LiabilityMatrix(session_id="session:test") -matrix_risky.add_edge("did:a", "did:b", 0.2, "v1") -matrix_risky.add_edge("did:b", "did:a", 0.2, "v2") # Creates a cycle! - -assert matrix_risky.has_cycle() is True - -# Safe graph — no cycles -matrix_safe = LiabilityMatrix(session_id="session:test-safe") -matrix_safe.add_edge("did:a", "did:b", 0.2, "v1") -matrix_safe.add_edge("did:b", "did:c", 0.2, "v2") - -assert matrix_safe.has_cycle() is False -``` - -### 7.5 Session Cleanup - -Release all bonds when a session ends: - -```python -# View current edges -print(f"Active edges: {len(matrix.edges)}") # 3 - -# Clear everything -matrix.clear() -print(f"Active edges: {len(matrix.edges)}") # 0 -``` - ---- - -## 8. CausalAttributor — Who Caused What - -When a multi-step saga fails, the `CausalAttributor` traces the causal chain -to determine which agent is responsible — and how much liability each -participant bears. - -### 8.1 How Causal Attribution Works - -In a saga (multi-step workflow), each agent performs actions. When a step fails, -the attributor: - -1. Identifies the **failure step** and the **failure agent** (direct cause). -2. Traces the **causal chain** — which preceding actions contributed. -3. Assigns a **liability score** to each involved agent. - -``` -Saga: data-pipeline-001 - Step 1: Agent A → fetch data ✓ - Step 2: Agent B → transform data ✓ - Step 3: Agent C → write to DB ✗ FAILED - Step 4: Agent D → notify (skipped) - -Attribution: - Agent C → liability: 1.0 (direct cause) - Agent B → liability: 0.0 (not at fault) - Agent A → liability: 0.0 (not at fault) -``` - -### 8.2 Running an Attribution - -```python -from hypervisor import CausalAttributor - -attributor = CausalAttributor() - -# Define what each agent did in the saga -agent_actions = { - "did:mesh:fetcher": [ - {"step_id": "step-1", "action": "fetch_data", "status": "success"}, - ], - "did:mesh:transformer": [ - {"step_id": "step-2", "action": "transform", "status": "success"}, - ], - "did:mesh:writer": [ - {"step_id": "step-3", "action": "write_db", "status": "failed"}, - ], -} - -result = attributor.attribute( - saga_id="saga:pipeline-001", - session_id="session:nightly-run", - agent_actions=agent_actions, - failure_step_id="step-3", - failure_agent_did="did:mesh:writer", - risk_weights={"did:mesh:writer": 0.95, "did:mesh:transformer": 0.5}, -) - -print(result.attribution_id) # "attr:a1b2c3d4" -print(result.saga_id) # "saga:pipeline-001" -print(result.root_cause_agent) # "did:mesh:writer" -print(result.causal_chain_length) -print(result.agents_involved) # ["did:mesh:fetcher", "did:mesh:transformer", "did:mesh:writer"] -``` - -### 8.3 Reading Fault Attributions - -Each `AttributionResult` contains a list of `FaultAttribution` objects: - -```python -for attr in result.attributions: - print(f"Agent: {attr.agent_did}") - print(f" Liability score: {attr.liability_score}") - print(f" Causal contribution: {attr.causal_contribution}") - print(f" Direct cause: {attr.is_direct_cause}") - print(f" Reason: {attr.reason}") - -# Get liability for a specific agent -writer_liability = result.get_liability("did:mesh:writer") -print(f"Writer liability: {writer_liability}") # 1.0 (full liability) -``` - -### 8.4 Attribution History - -Review all past attributions: - -```python -for past in attributor.attribution_history: - print(f"[{past.timestamp}] Saga: {past.saga_id}") - print(f" Root cause: {past.root_cause_agent}") - print(f" Agents involved: {', '.join(past.agents_involved)}") -``` - ---- - -## 9. QuarantineManager — Isolating Problematic Agents - -When an agent's behavior becomes dangerous — repeated slashing, ring breaches, -or rate-limit abuse — the `QuarantineManager` isolates it from the system. - -### 9.1 Quarantine Reasons - -```python -from hypervisor import QuarantineReason - -# All possible quarantine reasons: -QuarantineReason.BEHAVIORAL_DRIFT # Agent deviated from expected behavior -QuarantineReason.LIABILITY_VIOLATION # Exceeded liability thresholds -QuarantineReason.RING_BREACH # Attempted action above privilege level -QuarantineReason.RATE_LIMIT_EXCEEDED # Too many calls in time window -QuarantineReason.MANUAL # Human operator decision -QuarantineReason.CASCADE_SLASH # Quarantined as part of slash cascade -``` - -### 9.2 Quarantining an Agent - -```python -from hypervisor import QuarantineManager, QuarantineReason - -qm = QuarantineManager() - -# Quarantine Agent C for a ring breach -record = qm.quarantine( - agent_did="did:mesh:agent-c", - session_id="session:prod-deploy", - reason=QuarantineReason.RING_BREACH, - details="Attempted Ring 1 action with Ring 3 credentials", - duration_seconds=600, # 10-minute quarantine - forensic_data={ - "attempted_action": "deploy.k8s", - "agent_ring": 3, - "required_ring": 1, - }, -) - -print(record.quarantine_id) # "quar:a1b2c3d4" -print(record.reason) # QuarantineReason.RING_BREACH -print(record.is_active) # True -print(record.expires_at) # ~10 minutes from now -print(record.forensic_data) # The evidence dict -``` - -### 9.3 Checking Quarantine Status - -```python -# Is this agent quarantined? -is_quarantined = qm.is_quarantined("did:mesh:agent-c", "session:prod-deploy") -print(is_quarantined) # True or False - -# Get the active quarantine record -active = qm.get_active_quarantine("did:mesh:agent-c", "session:prod-deploy") -if active: - print(f"Quarantined since: {active.entered_at}") - print(f"Expires at: {active.expires_at}") - print(f"Duration: {active.duration_seconds}s") -``` - -> **Public Preview:** In the Public Preview, `is_quarantined()` always -> returns `False` and `active_quarantines` is always empty. Quarantine records -> are still created for auditing. Enterprise editions enforce actual isolation. - -### 9.4 Releasing from Quarantine - -```python -# Manual release (e.g., after investigation) -released = qm.release("did:mesh:agent-c", "session:prod-deploy") -if released: - print(f"Released at: {released.released_at}") - -# Automatic expiry — call tick() periodically to process expirations -expired_records = qm.tick() -for record in expired_records: - print(f"Auto-released: {record.agent_did}") -``` - -### 9.5 Quarantine History - -```python -# Get all quarantine records (active + expired + released) -all_history = qm.get_history() -print(f"Total quarantine events: {len(all_history)}") - -# Filter by agent -agent_history = qm.get_history(agent_did="did:mesh:agent-c") - -# Filter by session -session_history = qm.get_history(session_id="session:prod-deploy") - -# Current quarantine stats -print(f"Active quarantines: {qm.quarantine_count}") -print(f"Active records: {qm.active_quarantines}") -``` - -### 9.6 Default Quarantine Duration - -If no `duration_seconds` is specified, the default is **300 seconds** (5 minutes): - -```python -# QuarantineManager.DEFAULT_QUARANTINE_SECONDS = 300 - -record = qm.quarantine( - agent_did="did:mesh:agent-x", - session_id="session:test", - reason=QuarantineReason.MANUAL, - details="Under investigation", - # duration_seconds omitted → defaults to 300s -) -``` - ---- - -## 10. LiabilityLedger — Immutable Audit Trail - -The `LiabilityLedger` records every liability event — vouches given, slashes -received, quarantine entries — into an append-only log. This is the -authoritative source for an agent's liability history. - -### 10.1 Event Types - -```python -from hypervisor import LedgerEntryType - -# All event types recorded in the ledger: -LedgerEntryType.VOUCH_GIVEN # Agent vouched for another -LedgerEntryType.VOUCH_RECEIVED # Agent received a vouch -LedgerEntryType.VOUCH_RELEASED # Vouch bond was released -LedgerEntryType.SLASH_RECEIVED # Agent was directly slashed -LedgerEntryType.SLASH_CASCADED # Agent penalized via cascade -LedgerEntryType.QUARANTINE_ENTERED # Agent entered quarantine -LedgerEntryType.QUARANTINE_RELEASED # Agent released from quarantine -LedgerEntryType.FAULT_ATTRIBUTED # Agent received fault attribution -LedgerEntryType.CLEAN_SESSION # Agent completed a session cleanly -``` - -### 10.2 Recording Events - -```python -from hypervisor import LiabilityLedger, LedgerEntryType - -ledger = LiabilityLedger() - -# Record a vouch event -entry = ledger.record( - agent_did="did:mesh:agent-a", - entry_type=LedgerEntryType.VOUCH_GIVEN, - session_id="session:pipeline", - severity=0.0, - details="Vouched for did:mesh:agent-b with 20% bond", - related_agent="did:mesh:agent-b", -) -print(entry.entry_id) # "a1b2c3d4e5f6" -print(entry.timestamp) # datetime - -# Record a slash event -ledger.record( - agent_did="did:mesh:agent-b", - entry_type=LedgerEntryType.SLASH_RECEIVED, - session_id="session:pipeline", - severity=0.8, - details="Unauthorized database write", -) - -# Record a clean session (positive signal) -ledger.record( - agent_did="did:mesh:agent-a", - entry_type=LedgerEntryType.CLEAN_SESSION, - session_id="session:pipeline", - severity=0.0, - details="Completed session without incidents", -) -``` - -### 10.3 Agent History - -```python -# Get full history for an agent -history = ledger.get_agent_history("did:mesh:agent-a") -for entry in history: - print(f"[{entry.timestamp}] {entry.entry_type.value}: {entry.details}") - -# Ledger statistics -print(f"Total entries: {ledger.total_entries}") -print(f"Tracked agents: {ledger.tracked_agents}") -``` - -### 10.4 Risk Profiles - -The ledger computes an `AgentRiskProfile` — a summary of an agent's liability -track record: - -```python -profile = ledger.compute_risk_profile("did:mesh:agent-b") - -print(f"Agent: {profile.agent_did}") -print(f"Total entries: {profile.total_entries}") -print(f"Slash count: {profile.slash_count}") -print(f"Quarantine count: {profile.quarantine_count}") -print(f"Clean sessions: {profile.clean_session_count}") -print(f"Avg fault score: {profile.fault_score_avg:.2f}") -print(f"Risk score: {profile.risk_score:.2f}") -print(f"Recommendation: {profile.recommendation}") # "admit", "probation", or "deny" -``` - -### 10.5 Admission Decisions - -The ledger can recommend whether an agent should be admitted to new sessions -based on its track record: - -```python -should_admit, reason = ledger.should_admit("did:mesh:agent-b") -print(f"Admit: {should_admit}") # True/False -print(f"Reason: {reason}") # "admit" / "probation" / "deny" -``` - -> **Public Preview:** `should_admit()` always returns `(True, "admit")`. -> The risk profile is still computed for visibility. Enterprise editions enforce -> admission gates. - -### 10.6 Thresholds - -| Constant | Value | Description | -|----------|-------|-------------| -| `PROBATION_THRESHOLD` | `0.3` | Risk score ≥ 0.3 triggers probation recommendation | -| `DENY_THRESHOLD` | `0.6` | Risk score ≥ 0.6 triggers deny recommendation | - ---- - -## 11. Integration with Trust Scoring - -The liability framework connects directly to the trust and identity system -described in [Tutorial 02 — Trust & Identity](02-trust-and-identity.md). Here's -how the pieces fit together: - -### 11.1 Trust → Liability Flow - -``` -┌───────────────┐ ┌──────────────┐ ┌───────────────────┐ -│ Trust Score │────▶│ Vouching │────▶│ Liability Matrix │ -│ (σ = 0.85) │ │ Engine │ │ (graph edges) │ -└───────────────┘ └──────────────┘ └───────────────────┘ - │ - ▼ -┌───────────────┐ ┌──────────────┐ ┌───────────────────┐ -│ Effective │◀────│ Bond Amount │ │ Slash Cascade │ -│ Score │ │ (σ × bond%) │ │ (depth ≤ 2) │ -└───────────────┘ └──────────────┘ └───────────────────┘ -``` - -### 11.2 Vouching Requires Minimum Trust - -Only agents above the minimum voucher score threshold can sponsor others: - -```python -from hypervisor import VouchingEngine - -engine = VouchingEngine() - -# MIN_VOUCHER_SCORE = 0.50 -# Agent with σ = 0.85 → can vouch ✓ -record = engine.vouch( - voucher_did="did:mesh:trusted", - vouchee_did="did:mesh:newcomer", - session_id="session:test", - voucher_sigma=0.85, -) -print(f"Vouch created: {record.is_active}") -``` - -### 11.3 Slashing Affects Trust Scores - -When the hypervisor detects behavioral drift, it automatically slashes the -offending agent — which feeds back into the trust system: - -```python -from hypervisor import Hypervisor - -hv = Hypervisor() - -# The verify_behavior() method checks for drift and auto-slashes: -# result = await hv.verify_behavior( -# session_id="session:prod", -# agent_did="did:mesh:agent-b", -# claimed_embedding=claimed, -# observed_embedding=observed, -# ) -# If drift_score exceeds the threshold, the hypervisor calls: -# hv.slashing.slash(...) -# which reduces the agent's score and cascades to its sponsors. -``` - -### 11.4 Ledger → Ring Assignment - -An agent's liability history influences its trust score, which determines its -execution ring (see [Tutorial 06 — Execution Sandboxing](06-execution-sandboxing.md)): - -``` -Liability Ledger → Risk Profile → Trust Score (σ) → Ring - 3 clean sessions risk: 0.1 σ = 0.82 Ring 2 - 0 slashes recommend: admit -``` - -```python -# Check if an agent's liability record supports admission -admit, reason = ledger.should_admit("did:mesh:agent-b") - -if admit and reason == "admit": - # Full access — score maps to Ring 2 or above - pass -elif admit and reason == "probation": - # Limited access — restrict to Ring 3 sandbox - pass -else: - # Deny — agent has too many violations - pass -``` - ---- - -## 12. Real-World Example: Multi-Agent Workflow - -Let's combine all six components in a realistic scenario: a **data pipeline** -where three agents collaborate, one of them fails, and the system traces -liability end-to-end. - -### Scenario - -- **Fetcher** (high trust, σ=0.90) — retrieves data from external API -- **Transformer** (medium trust, σ=0.65) — processes and cleans data -- **Writer** (lower trust, σ=0.50) — writes results to database - -Fetcher vouches for Transformer, Transformer vouches for Writer. During -execution, Writer attempts an unauthorized schema migration and fails. - -### Full Implementation - -```python -from datetime import datetime, UTC -from hypervisor import ( - VouchingEngine, - SlashingEngine, - LiabilityMatrix, - CausalAttributor, - QuarantineManager, - QuarantineReason, - LiabilityLedger, - LedgerEntryType, -) - -SESSION = "session:nightly-pipeline-2025-07-22" - -# ── 1. Initialize all engines ──────────────────────────────────────────── - -vouching = VouchingEngine(max_exposure=0.80) -slashing = SlashingEngine(vouching) -matrix = LiabilityMatrix(session_id=SESSION) -attributor = CausalAttributor() -quarantine = QuarantineManager() -ledger = LiabilityLedger() - -# ── 2. Establish vouching chain ────────────────────────────────────────── - -# Fetcher (σ=0.90) vouches for Transformer -v1 = vouching.vouch( - voucher_did="did:mesh:fetcher", - vouchee_did="did:mesh:transformer", - session_id=SESSION, - voucher_sigma=0.90, - bond_pct=0.20, -) -matrix.add_edge("did:mesh:fetcher", "did:mesh:transformer", v1.bonded_amount, v1.vouch_id) - -ledger.record("did:mesh:fetcher", LedgerEntryType.VOUCH_GIVEN, SESSION, - details="Vouched for transformer", related_agent="did:mesh:transformer") -ledger.record("did:mesh:transformer", LedgerEntryType.VOUCH_RECEIVED, SESSION, - details="Vouched by fetcher", related_agent="did:mesh:fetcher") - -# Transformer (σ=0.65) vouches for Writer -v2 = vouching.vouch( - voucher_did="did:mesh:transformer", - vouchee_did="did:mesh:writer", - session_id=SESSION, - voucher_sigma=0.65, - bond_pct=0.15, -) -matrix.add_edge("did:mesh:transformer", "did:mesh:writer", v2.bonded_amount, v2.vouch_id) - -ledger.record("did:mesh:transformer", LedgerEntryType.VOUCH_GIVEN, SESSION, - details="Vouched for writer", related_agent="did:mesh:writer") -ledger.record("did:mesh:writer", LedgerEntryType.VOUCH_RECEIVED, SESSION, - details="Vouched by transformer", related_agent="did:mesh:transformer") - -print("Vouching chain established:") -print(f" Fetcher → Transformer (bond: {v1.bonded_amount:.3f})") -print(f" Transformer → Writer (bond: {v2.bonded_amount:.3f})") -print(f" Cycle detected: {matrix.has_cycle()}") # False - -# ── 3. Simulate pipeline execution ────────────────────────────────────── - -agent_actions = { - "did:mesh:fetcher": [ - {"step_id": "step-1", "action": "fetch_api", "status": "success"}, - ], - "did:mesh:transformer": [ - {"step_id": "step-2", "action": "clean_data", "status": "success"}, - {"step_id": "step-3", "action": "validate_schema", "status": "success"}, - ], - "did:mesh:writer": [ - {"step_id": "step-4", "action": "write_results", "status": "success"}, - {"step_id": "step-5", "action": "migrate_schema", "status": "failed"}, - ], -} - -# ── 4. Writer fails at step 5 → Run causal attribution ────────────────── - -attribution = attributor.attribute( - saga_id="saga:nightly-pipeline", - session_id=SESSION, - agent_actions=agent_actions, - failure_step_id="step-5", - failure_agent_did="did:mesh:writer", - risk_weights={ - "did:mesh:fetcher": 0.3, - "did:mesh:transformer": 0.5, - "did:mesh:writer": 0.95, - }, -) - -print(f"\nCausal attribution:") -print(f" Root cause: {attribution.root_cause_agent}") -for attr in attribution.attributions: - marker = "← DIRECT CAUSE" if attr.is_direct_cause else "" - print(f" {attr.agent_did}: liability={attr.liability_score:.2f} {marker}") - -# Record attribution in ledger -ledger.record("did:mesh:writer", LedgerEntryType.FAULT_ATTRIBUTED, SESSION, - severity=attribution.get_liability("did:mesh:writer"), - details="Root cause of schema migration failure") - -# ── 5. Slash the offending agent ───────────────────────────────────────── - -slash_result = slashing.slash( - vouchee_did="did:mesh:writer", - session_id=SESSION, - vouchee_sigma=0.50, - risk_weight=0.95, - reason="Unauthorized schema migration in production", - agent_scores={ - "did:mesh:fetcher": 0.90, - "did:mesh:transformer": 0.65, - "did:mesh:writer": 0.50, - }, -) - -print(f"\nSlashing result:") -print(f" Writer σ: {slash_result.vouchee_sigma_before:.2f} → {slash_result.vouchee_sigma_after:.2f}") -for clip in slash_result.voucher_clips: - print(f" Cascade → {clip.voucher_did}: σ {clip.sigma_before:.2f} → {clip.sigma_after:.2f}") - -ledger.record("did:mesh:writer", LedgerEntryType.SLASH_RECEIVED, SESSION, - severity=0.95, details="Unauthorized schema migration") -for clip in slash_result.voucher_clips: - ledger.record(clip.voucher_did, LedgerEntryType.SLASH_CASCADED, SESSION, - severity=clip.risk_weight, - details=f"Cascade from writer slash", - related_agent="did:mesh:writer") - -# ── 6. Quarantine the offender ─────────────────────────────────────────── - -q_record = quarantine.quarantine( - agent_did="did:mesh:writer", - session_id=SESSION, - reason=QuarantineReason.LIABILITY_VIOLATION, - details="Unauthorized schema migration caused pipeline failure", - duration_seconds=3600, # 1-hour quarantine - forensic_data={ - "saga_id": "saga:nightly-pipeline", - "failed_step": "step-5", - "attribution_id": attribution.attribution_id, - "slash_id": slash_result.slash_id, - }, -) - -ledger.record("did:mesh:writer", LedgerEntryType.QUARANTINE_ENTERED, SESSION, - severity=1.0, details="Quarantined for liability violation") - -print(f"\nQuarantine:") -print(f" Agent: {q_record.agent_did}") -print(f" Reason: {q_record.reason.value}") -print(f" Duration: {q_record.forensic_data}") - -# ── 7. Record clean sessions for well-behaved agents ──────────────────── - -for good_agent in ["did:mesh:fetcher", "did:mesh:transformer"]: - ledger.record(good_agent, LedgerEntryType.CLEAN_SESSION, SESSION, - details="Completed pipeline steps without incidents") - -# ── 8. Review risk profiles ───────────────────────────────────────────── - -print(f"\n{'='*60}") -print("Risk Profiles") -print(f"{'='*60}") - -for agent in ["did:mesh:fetcher", "did:mesh:transformer", "did:mesh:writer"]: - profile = ledger.compute_risk_profile(agent) - admit, reason = ledger.should_admit(agent) - print(f"\n {agent}:") - print(f" Slashes: {profile.slash_count} | Quarantines: {profile.quarantine_count}") - print(f" Clean sessions: {profile.clean_session_count}") - print(f" Risk score: {profile.risk_score:.2f} | Recommendation: {profile.recommendation}") - print(f" Admit to next session: {admit} ({reason})") - -# ── 9. Session cleanup ────────────────────────────────────────────────── - -released = vouching.release_session_bonds(SESSION) -matrix.clear() -print(f"\nSession cleanup: released {released} bonds, cleared liability matrix") -``` - -### Expected Output - -``` -Vouching chain established: - Fetcher → Transformer (bond: 0.180) - Transformer → Writer (bond: 0.098) - Cycle detected: False - -Causal attribution: - Root cause: did:mesh:writer - did:mesh:writer: liability=1.00 ← DIRECT CAUSE - ... - -Slashing result: - Writer σ: 0.50 → ... - ... - -Quarantine: - Agent: did:mesh:writer - Reason: liability_violation - ... - -============================================================ -Risk Profiles -============================================================ - did:mesh:fetcher: - Slashes: 0 | Quarantines: 0 - Clean sessions: 1 - Risk score: 0.00 | Recommendation: admit - Admit to next session: True (admit) - - did:mesh:transformer: - Slashes: 0 | Quarantines: 0 - Clean sessions: 1 - Risk score: 0.00 | Recommendation: admit - Admit to next session: True (admit) - - did:mesh:writer: - Slashes: 1 | Quarantines: 1 - Clean sessions: 0 - Risk score: ... | Recommendation: ... - Admit to next session: ... - -Session cleanup: released 2 bonds, cleared liability matrix -``` - ---- - -## 13. Next Steps - -- **Trust & Identity:** Deepen your understanding of trust scores and DIDs - in [Tutorial 02 — Trust & Identity](02-trust-and-identity.md). -- **Execution Sandboxing:** Learn how trust scores map to privilege rings and - capability guards in [Tutorial 06 — Execution Sandboxing](06-execution-sandboxing.md). -- **Audit & Compliance:** Explore how liability ledger entries integrate with - `CommitmentEngine` and `DeltaEngine` for tamper-evident audit logs in - [Tutorial 04 — Audit & Compliance](04-audit-and-compliance.md). -- **REST API:** Use the `/api/v1/sessions/{session_id}/sponsor` endpoint to - create vouches via HTTP — see the API reference documentation. -- **Enterprise Features:** Upgrade to the enterprise edition for enforced - bonding, real slashing penalties, quarantine enforcement, and admission - gates based on `LiabilityLedger` risk profiles. diff --git a/docs/tutorials/13-observability-and-tracing.md b/docs/tutorials/13-observability-and-tracing.md index d9d8fd730..625dd0222 100644 --- a/docs/tutorials/13-observability-and-tracing.md +++ b/docs/tutorials/13-observability-and-tracing.md @@ -55,7 +55,7 @@ primitives that solve all of these: ┌────────────────────────────────────────────────────────────┐ │ HypervisorEventBus │ │ Append-only structured event store with pub/sub │ -│ 40+ typed events · session/agent/time indexes │ +│ 30 typed events · session/agent/time indexes │ ├──────────────────────┬─────────────────────────────────────┤ │ RingMetricsCollector│ SagaSpanExporter │ │ Subscribes to ring │ Subscribes to saga │ @@ -263,17 +263,16 @@ transitions, saga steps, security incidents, audit records, and more. ### Event types -The bus supports 40+ typed events organized into categories: +The bus supports 30 typed events organized into categories: | Category | Event Types | Examples | |---|---|---| | **Session** | 5 | `SESSION_CREATED`, `SESSION_TERMINATED`, `SESSION_ARCHIVED` | | **Ring** | 5 | `RING_ASSIGNED`, `RING_ELEVATED`, `RING_BREACH_DETECTED` | -| **Liability** | 6 | `VOUCH_CREATED`, `SLASH_EXECUTED`, `QUARANTINE_ENTERED` | -| **Saga** | 10 | `SAGA_CREATED`, `SAGA_STEP_COMMITTED`, `SAGA_ESCALATED` | +| **Saga** | 7 | `SAGA_CREATED`, `SAGA_STEP_COMMITTED`, `SAGA_ESCALATED` | | **VFS** | 5 | `VFS_WRITE`, `VFS_SNAPSHOT`, `VFS_CONFLICT` | | **Security** | 4 | `RATE_LIMITED`, `AGENT_KILLED`, `IDENTITY_VERIFIED` | -| **Audit** | 3 | `AUDIT_DELTA_CAPTURED`, `AUDIT_COMMITTED` | +| **Audit** | 2 | `AUDIT_DELTA_CAPTURED`, `AUDIT_COMMITTED` | | **Verification** | 2 | `BEHAVIOR_DRIFT`, `HISTORY_VERIFIED` | ### The HypervisorEvent dataclass @@ -283,11 +282,11 @@ timestamp: ```python event = HypervisorEvent( - event_type=EventType.SLASH_EXECUTED, + event_type=EventType.RING_BREACH_DETECTED, session_id="session-042", - agent_did="did:mesh:rogue-agent", + agent_did="did:mesh:agent", causal_trace_id="abc123/def456", - payload={"severity": "critical", "stake_slashed": 150}, + payload={"severity": "critical", "required_ring": 1, "agent_ring": 3}, ) print(event.event_id) # Auto-generated 16-char UUID hex @@ -295,7 +294,7 @@ print(event.timestamp) # datetime.now(UTC) # Serialize to JSON-compatible dict d = event.to_dict() -print(d["event_type"]) # "liability.slash_executed" +print(d["event_type"]) # "ring.breach_detected" print(d["timestamp"]) # "2025-01-15T10:30:00+00:00" (ISO format) ``` @@ -312,7 +311,7 @@ bus.emit(HypervisorEvent( bus.emit(HypervisorEvent( event_type=EventType.RING_ELEVATED, session_id="s1", agent_did="did:mesh:agent-alpha", - payload={"from_ring": 3, "to_ring": 1, "reason": "admin vouch"}, + payload={"from_ring": 3, "to_ring": 1, "reason": "operator approval"}, )) print(f"Total events: {bus.event_count}") # 2 diff --git a/docs/tutorials/14-kill-switch-and-rate-limiting.md b/docs/tutorials/14-kill-switch-and-rate-limiting.md index 52b887d09..aa69b8c8e 100644 --- a/docs/tutorials/14-kill-switch-and-rate-limiting.md +++ b/docs/tutorials/14-kill-switch-and-rate-limiting.md @@ -136,7 +136,6 @@ print(list(KillReason)) | `RATE_LIMIT` | `"rate_limit"` | Agent exceeded rate limits repeatedly | | `RING_BREACH` | `"ring_breach"` | Agent attempted actions above its ring level | | `MANUAL` | `"manual"` | Human operator triggered the kill | -| `QUARANTINE_TIMEOUT` | `"quarantine_timeout"` | Agent was quarantined and didn't recover | | `SESSION_TIMEOUT` | `"session_timeout"` | Session exceeded its `max_duration_seconds` | ### 3.2 Kill with In-Flight Saga Steps @@ -830,7 +829,7 @@ Every elevation denial includes a structured reason and remediation guidance: | `INVALID_TARGET` | Target ring is not higher-privilege | Request a lower-numbered ring | | `RING_0_FORBIDDEN` | Ring 0 cannot be requested via API | Requires SRE Witness attestation | | `INSUFFICIENT_TRUST` | Agent's trust score too low | Improve trust through successful operations | -| `NO_SPONSORSHIP` | No Ring 0/1 agent vouched for this | Get sponsorship from privileged agent | +| `DUPLICATE` | An active elevation already exists | Reuse or revoke the existing elevation | | `EXPIRED_TTL` | TTL exceeded maximum (3600s) | Submit new request with valid TTL | ### 7.4 Validation Rules diff --git a/docs/tutorials/23-delegation-chains.md b/docs/tutorials/23-delegation-chains.md index 9c7d5684d..2d7af2df3 100644 --- a/docs/tutorials/23-delegation-chains.md +++ b/docs/tutorials/23-delegation-chains.md @@ -498,7 +498,6 @@ with delegated authority. | TypeScript package overview | [Tutorial 20 — TypeScript package](./20-typescript-sdk.md) | | Rust crate delegation | [Tutorial 21 — Rust crate](./21-rust-sdk.md) | | Policy evaluation | [Tutorial 01 — Policy Engine](./01-policy-engine.md) | -| Liability & attribution | [Tutorial 12 — Liability & Attribution](./12-liability-and-attribution.md) | --- @@ -521,5 +520,3 @@ with delegated authority. - **Implement time-limited delegation** with `expiresAt` for temporary access - **Use the `IdentityRegistry`** to track all agents and enable cascade revocation -- **Read Tutorial 12** ([Liability & Attribution](./12-liability-and-attribution.md)) - to understand how delegated actions are attributed diff --git a/docs/tutorials/45-shift-left-governance.md b/docs/tutorials/45-shift-left-governance.md index 7ad14a28e..82669234b 100644 --- a/docs/tutorials/45-shift-left-governance.md +++ b/docs/tutorials/45-shift-left-governance.md @@ -7,8 +7,8 @@ Catch governance violations before they reach production. This tutorial walks through every layer of AGT's shift-left story: from pre-commit hooks that validate policy files on your laptop, through PR-time gates that enforce -attestation and dependency review, to CI/CD checks that run security scans, -binary analysis, and supply chain verification on every build. +dependency review and secret scanning, to CI/CD checks that run governance +verification, binary analysis, and supply chain verification on every build. > **Scope:** commit-time, PR-time, CI/build-time, and release-time governance > **Tools:** pre-commit hooks, GitHub Actions, GitHub CI workflows @@ -23,8 +23,8 @@ binary analysis, and supply chain verification on every build. | [Why Shift-Left?](#why-shift-left) | The case for catching violations before runtime | | [Commit-Time](#commit-time-pre-commit-hooks) | Pre-commit hooks for policy and plugin validation | | [PR-Time: Contributor Reputation](#pr-time-contributor-reputation) | Automated screening for coordinated inauthentic behavior | -| [PR-Time](#pr-time-gates) | Governance attestation, dependency review, secret scanning | -| [CI/Build-Time](#cibuild-time-checks) | Governance verify, policy validation, security scans, binary analysis | +| [PR-Time](#pr-time-gates) | Dependency review, secret scanning, supply chain checks | +| [CI/Build-Time](#cibuild-time-checks) | Governance verify, policy validation, static analysis, binary analysis | | [Language-Specific Build Checks](#language-specific-build-time-enforcement) | .NET, TypeScript, Python build-time enforcement | | [Release-Time](#release-time-gates) | SBOM generation, artifact signing, attestation | | [Reference Architecture](#reference-architecture) | How all the pieces fit together | @@ -44,15 +44,14 @@ Shift-left governance moves checks earlier in the development lifecycle: Commit PR CI/Build Release Runtime │ │ │ │ │ ▼ ▼ ▼ ▼ ▼ -``` Contributor Commit PR CI/Build Release Runtime │ │ │ │ │ │ ▼ ▼ ▼ ▼ ▼ ▼ ┌──────┐ ┌──────┐ ┌─────────┐ ┌────────────┐ ┌───────────┐ ┌──────────┐ - │reputa│ │ pre- │ │ attest │ │ governance │ │ SBOM + │ │ policy │ - │tion │ │commit│ │ + dep │ │ verify + │ │ signing + │ │ engine + │ - │check │ │hooks │ │ review │ │ CodeQL + │ │ provenance│ │ trust + │ - │ │ │ │ │ + scans │ │ BinSkim │ │ │ │ audit │ + │author│ │ pre- │ │ dep + │ │ governance │ │ SBOM + │ │ policy │ + │screen│ │commit│ │ secret │ │ verify + │ │ signing + │ │ engine + │ + │check │ │hooks │ │ scans │ │ CodeQL + │ │ provenance│ │ trust + │ + │ │ │ │ │ │ │ BinSkim │ │ │ │ audit │ └──────┘ └──────┘ └─────────┘ └────────────┘ └───────────┘ └──────────┘ Earliest feedback Most comprehensive ``` @@ -216,48 +215,13 @@ jobs: ## PR-Time Gates -When code reaches a pull request, three independent workflows enforce -governance before merge. - -### §2.1 Governance Attestation - -The **Governance Attestation** action (`action/governance-attestation/`) -validates that PR authors have completed a 7-section attestation checklist -covering security, privacy, CELA, responsible AI, accessibility, release -readiness, and org-specific launch gates. - -```yaml -# .github/workflows/pr-governance.yml -name: PR Governance -on: - pull_request: - types: [opened, edited, synchronize] - -jobs: - attestation: - runs-on: ubuntu-latest - steps: - - uses: microsoft/agent-governance-toolkit/action/governance-attestation@main - with: - required-sections: | - 1) Security review - 2) Privacy review - 3) CELA review - 4) Responsible AI review - 5) Accessibility review - 6) Release Readiness / Safe Deployment - 7) Org-specific Launch Gates -``` - -The action outputs: -- `status`: pass or fail -- `errors`: list of missing sections -- `sections-found`: JSON mapping of sections to checkbox counts +When code reaches a pull request, independent workflows enforce governance +before merge. -### §2.2 Dependency Review +### §2.1 Dependency Review AGT's dependency review workflow blocks PRs that introduce dependencies with -known CVEs or disallowed licences: +known CVEs or disallowed licenses: ```yaml # From .github/workflows/dependency-review.yml @@ -273,9 +237,9 @@ known CVEs or disallowed licences: This runs on every PR that touches dependency manifests and flags: - Dependencies with moderate+ CVEs -- Dependencies with licences not on the allow list +- Dependencies with licenses not on the allow list -### §2.3 Secret Scanning +### §2.2 Secret Scanning The secret scanning workflow (`secret-scanning.yml`) runs on every PR to `main` and weekly on schedule. It combines: @@ -284,7 +248,7 @@ and weekly on schedule. It combines: 2. **High-entropy string scanning** for API keys, GitHub tokens, AWS keys, and Slack tokens using regex patterns -### §2.4 Supply Chain Checks +### §2.3 Supply Chain Checks The supply chain check workflow (`supply-chain-check.yml`) runs when dependency manifests change and enforces: @@ -292,7 +256,7 @@ manifests change and enforces: - **Exact version pinning**: no `^` or `~` version ranges in `package.json` - **Lockfile presence**: every package with dependencies must have a lockfile -### §2.5 Quality Gates +### §2.4 Quality Gates The quality gates workflow (`quality-gates.yml`) runs on every PR and blocks merge if: @@ -348,23 +312,7 @@ The `command` input supports four modes: | `policy-evaluate` | Evaluates a specific policy against a context | | `all` | Runs governance-verify, then marketplace-verify and policy-evaluate if paths are provided | -### §3.2 Security Scan Action - -The **Security Scan** action (`action/security-scan/`) scans directories for -secrets, CVEs, and dangerous code patterns: - -```yaml -- uses: microsoft/agent-governance-toolkit/action/security-scan@main - with: - paths: 'plugins/ scripts/' - min-severity: high # block on critical or high findings - exemptions-file: .security-exemptions.json -``` - -Outputs include `findings-count`, `blocking-count`, and full `findings` in JSON -format, making it easy to integrate with dashboards or notification systems. - -### §3.3 Policy Validation Workflow +### §3.2 Policy Validation Workflow The policy validation workflow (`policy-validation.yml`) triggers when any YAML file or the policy engine source changes. It: @@ -375,13 +323,13 @@ file or the policy engine source changes. It: This ensures that policy file changes don't break the policy engine. -### §3.4 CodeQL and Static Analysis +### §3.3 CodeQL and Static Analysis AGT uses CodeQL for semantic static analysis of Python and TypeScript code. The CodeQL workflow (`codeql.yml`) runs on pushes and PRs, uploading SARIF results to GitHub's security tab. -### §3.5 Dependency Confusion Scan +### §3.4 Dependency Confusion Scan A dedicated CI job runs `scripts/check_dependency_confusion.py --strict` on every build. This checks that: @@ -389,7 +337,7 @@ every build. This checks that: - Internal package names don't collide with public PyPI/npm packages - Notebook `pip install` commands only reference registered packages -### §3.6 Workflow Security Audit +### §3.5 Workflow Security Audit When GitHub Actions workflow files change, a workflow security job scans for: @@ -397,7 +345,7 @@ When GitHub Actions workflow files change, a workflow security job scans for: - Overly permissive permissions - Unpinned action references -### §3.7 .NET Binary Analysis (BinSkim) +### §3.6 .NET Binary Analysis (BinSkim) For the .NET SDK, the CI pipeline runs Microsoft BinSkim binary analysis on compiled assemblies: @@ -494,26 +442,22 @@ Here is how all the shift-left governance layers compose into a single pipeline: ``` Developer Machine GitHub PR CI Pipeline Release ───────────────── ───────── ─────────── ─────── -pre-commit hooks Governance Main CI SBOM -├─ validate-policy attestation ├─ lint (ruff, ESLint) ├─ SPDX -├─ validate-plugin ├─ 7-section ├─ build (.NET, TS, ├─ CycloneDX -│ -manifest │ checklist │ Rust, Go, Python) │ -├─ evaluate-plugin │ ├─ test (all SDKs) Signing -│ -policy Dependency review ├─ governance-verify ├─ Sigstore -├─ agt-validate ├─ CVE check ├─ policy-validation ├─ provenance -├─ agt-doctor (pre-push) ├─ licence check ├─ CodeQL / SAST │ +pre-commit hooks Dependency review Main CI SBOM +├─ validate-policy ├─ CVE check ├─ lint (ruff, ESLint) ├─ SPDX +├─ validate-plugin ├─ license check ├─ build (.NET, TS, ├─ CycloneDX +│ -manifest │ │ Rust, Go, Python) │ +├─ evaluate-plugin Secret scanning ├─ test (all SDKs) Signing +│ -policy ├─ Gitleaks ├─ governance-verify ├─ Sigstore +├─ agt-validate ├─ entropy scan ├─ policy-validation ├─ provenance +├─ agt-doctor (pre-push) │ ├─ CodeQL / SAST │ ├─ detect-secrets │ ├─ BinSkim (.NET) Provenance -├─ no-stubs Secret scanning ├─ dependency-scan ├─ SLSA -├─ no-custom-crypto ├─ Gitleaks ├─ workflow-security ├─ SBOM - ├─ entropy scan │ │ attestation +├─ no-stubs Supply chain check ├─ dependency-scan ├─ SLSA +├─ no-custom-crypto ├─ version pinning ├─ workflow-security ├─ SBOM + ├─ lockfile presence │ │ attestation │ ├─ ci-complete gate │ - Supply chain check │ (required status Scorecard - ├─ version pinning │ check) └─ OpenSSF - ├─ lockfile presence │ - │ Security scan action - Quality gates ├─ secrets - ├─ no stubs ├─ CVEs - ├─ no crypto ├─ dangerous patterns + Quality gates │ (required status Scorecard + ├─ no stubs │ check) └─ OpenSSF + ├─ no crypto ├─ security audit ├─ dep audit trail ``` @@ -556,8 +500,6 @@ enforcing that nothing that ran has failed. |-----------|----------| | Pre-commit hooks | `.pre-commit-hooks.yaml` | | Governance Verify action | `action/action.yml` | -| Security Scan action | `action/security-scan/action.yml` | -| Governance Attestation action | `action/governance-attestation/action.yml` | | Policy validation workflow | `.github/workflows/policy-validation.yml` | | Secret scanning workflow | `.github/workflows/secret-scanning.yml` | | Dependency review workflow | `.github/workflows/dependency-review.yml` | @@ -580,7 +522,7 @@ enforcing that nothing that ran has failed. [rollout template](../operations/pre-commit-hook-template.md) - **Add the Governance Verify action** to your CI pipeline for automated compliance checks -- **Enable dependency review** to catch CVE and licence issues at PR time +- **Enable dependency review** to catch CVE and license issues at PR time - **Read Tutorial 25** ([Security Hardening](25-security-hardening.md)) for deeper coverage of CodeQL, fuzzing, and Scorecard - **Read Tutorial 26** ([SBOM & Signing](26-sbom-and-signing.md)) for diff --git a/docs/tutorials/README.md b/docs/tutorials/README.md index 209715695..3e7531ada 100644 --- a/docs/tutorials/README.md +++ b/docs/tutorials/README.md @@ -44,8 +44,7 @@ guides. |---|----------|-------------------|---------| | 05 | [Agent Reliability (SRE)](05-agent-reliability.md) | SLOs, error budgets, circuit breakers, chaos testing | `agent-governance-toolkit-sre` | | 06 | [Execution Sandboxing](06-execution-sandboxing.md) | 4-tier privilege rings, resource limits, termination control | `agent-governance-toolkit-runtime` | -| 11 | [Saga Orchestration](11-saga-orchestration.md) | Multi-step transactions, DSL, fan-out, compensating actions | `agent-governance-toolkit-runtime` | -| 12 | [Liability & Attribution](12-liability-and-attribution.md) | Vouching, slashing, causal attribution, quarantine | `agent-governance-toolkit-runtime` | +| 11 | [Saga Orchestration](11-saga-orchestration.md) | Multi-step transactions with compensating actions | `agent-governance-toolkit-runtime` | | 14 | [Kill Switch & Rate Limiting](14-kill-switch-and-rate-limiting.md) | Emergency termination, rate limiting, ring elevation | `agent-governance-toolkit-runtime` | | 51 | [Cost Governance](51-cost-governance.md) | Tiered budgets, auto-throttle, kill switch, anomaly detection, cost optimization | `agent-governance-toolkit-sre` | @@ -194,9 +193,8 @@ A self-contained sub-series progressing from basic allow/deny rules to productio 3. [23 — Delegation Chains](23-delegation-chains.md) → scope narrowing and delegation 4. [16 — Protocol Bridges](16-protocol-bridges.md) → cross-protocol communication 5. [11 — Saga Orchestration](11-saga-orchestration.md) → multi-step workflows -6. [12 — Liability & Attribution](12-liability-and-attribution.md) → who's responsible -7. [17 — Advanced Trust & Behavior](17-advanced-trust-and-behavior.md) → dynamic trust -8. [24 — Cost & Token Budgets](24-cost-and-token-budgets.md) → control agent spend +6. [17 — Advanced Trust & Behavior](17-advanced-trust-and-behavior.md) → dynamic trust +7. [24 — Cost & Token Budgets](24-cost-and-token-budgets.md) → control agent spend ### 🔎 "I need to find all agents in my org" diff --git a/examples/github-actions-governance/.agents/security.yaml b/examples/github-actions-governance/.agents/security.yaml deleted file mode 100644 index eb7a0a6a8..000000000 --- a/examples/github-actions-governance/.agents/security.yaml +++ /dev/null @@ -1,18 +0,0 @@ -audit: - enabled: true - log_level: info - retention_days: 90 - -pii_scanning: - enabled: true - patterns: [ssn, email, credit_card] - -allowed_tools: - - web_search - - read_file - - query_database - -max_tool_calls: 20 - -require_human_approval: false -confidence_threshold: 0.8 diff --git a/examples/github-actions-governance/README.md b/examples/github-actions-governance/README.md deleted file mode 100644 index 3c2519e22..000000000 --- a/examples/github-actions-governance/README.md +++ /dev/null @@ -1,71 +0,0 @@ -# GitHub Actions Governance Gate — Example - -This example shows how to wire the AGT governance gate into a deployment -workflow so that every agent deployment is policy-checked and receipted -before it reaches production. - -## Files - -``` -.agents/security.yaml # Agent governance policy -agents.yaml # Agent manifest (versions, tools, models) -``` - -## Quick start - -```bash -pip install pyyaml cryptography -python ../../scripts/governance_gate.py \ - --policy .agents/security.yaml \ - --manifest agents.yaml \ - --commit abc1234 \ - --deployer octocat -``` - -## Using the reusable workflow - -In your own repository's deployment workflow: - -```yaml -jobs: - governance: - uses: microsoft/agent-governance-toolkit/.github/workflows/agent-governance-gate.yml@main - with: - policy_file: .agents/security.yaml - agent_manifest: agents.yaml - require_receipt: true - secrets: - signing_key: ${{ secrets.GOVERNANCE_SIGNING_KEY }} - - deploy: - needs: governance - if: needs.governance.outputs.gate_result == 'passed' - runs-on: ubuntu-latest - steps: - - run: echo "Deploying with receipt ${{ needs.governance.outputs.receipt_id }}" -``` - -## Generating a signing keypair - -```bash -python - <<'EOF' -from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey -from cryptography.hazmat.primitives.serialization import ( - Encoding, PrivateFormat, PublicFormat, NoEncryption -) -key = Ed25519PrivateKey.generate() -print(key.private_bytes(Encoding.PEM, PrivateFormat.PKCS8, NoEncryption()).decode()) -print(key.public_key().public_bytes(Encoding.PEM, PublicFormat.SubjectPublicKeyInfo).decode()) -EOF -``` - -Store the private key as the `GOVERNANCE_SIGNING_KEY` secret in your repository. - -## Policy fields checked - -| Field | Required value | -|---|---| -| `audit.enabled` | `true` | -| `pii_scanning.enabled` | `true` | -| `allowed_tools` | a non-empty list | -| `max_tool_calls` | an integer | diff --git a/examples/github-actions-governance/agents.yaml b/examples/github-actions-governance/agents.yaml deleted file mode 100644 index 1c37990ad..000000000 --- a/examples/github-actions-governance/agents.yaml +++ /dev/null @@ -1,18 +0,0 @@ -agents: - - name: sales-summarizer - version: "1.2.0" - model: gpt-4o - policy: .agents/security.yaml - tools: - - web_search - - query_database - description: Summarizes sales data from the data warehouse. - - - name: support-triage - version: "0.9.1" - model: claude-sonnet-4-6 - policy: .agents/security.yaml - tools: - - read_file - - web_search - description: Triages incoming support tickets. diff --git a/mkdocs.yml b/mkdocs.yml index b9a5022ea..f97e235eb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -141,7 +141,6 @@ nav: - Prompt Injection Detection: tutorials/09-prompt-injection-detection.md - Plugin Marketplace: tutorials/10-plugin-marketplace.md - Saga Orchestration: tutorials/11-saga-orchestration.md - - Liability & Attribution: tutorials/12-liability-and-attribution.md - Observability & Tracing: tutorials/13-observability-and-tracing.md - Kill Switch & Rate Limiting: tutorials/14-kill-switch-and-rate-limiting.md - RL Training Governance: tutorials/15-rl-training-governance.md diff --git a/scripts/check_dependency_confusion.py b/scripts/check_dependency_confusion.py index da11ff852..468e2693a 100644 --- a/scripts/check_dependency_confusion.py +++ b/scripts/check_dependency_confusion.py @@ -107,7 +107,7 @@ "sql", "async", "nexus", "caas-core", "message-bus", "ai-agents", "amb", "eval_type_backport", # Integration packages / real PyPI packages used as deps - # Optional integration deps in agt-integrations/pyproject.toml (all real PyPI packages): + # Optional integration backend deps (all real PyPI packages): # flowise (Flowise SDK), boto3 (AWS, for AVP), nostr-sdk (Nostr WoT), oso (Oso authz). "flowise", "boto3", "nostr-sdk", "oso", # fastembed: Qdrant's embedding library, optional dep for the default-off diff --git a/scripts/governance_gate.py b/scripts/governance_gate.py deleted file mode 100644 index 6dd5b08f1..000000000 --- a/scripts/governance_gate.py +++ /dev/null @@ -1,291 +0,0 @@ -#!/usr/bin/env python3 -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -""" -GitHub Actions Governance Gate for Agent Deployments - -Validates an agent's policy configuration, generates a signed Ed25519 -deployment receipt, and writes an entry to the audit trail. Exits non-zero -on any policy violation so GitHub Actions can block the deployment. - -Usage (standalone): - python scripts/governance_gate.py \ - --policy .agents/security.yaml \ - --manifest agents.yaml \ - --commit abc1234 \ - --deployer octocat - -Usage (from a GitHub Actions workflow): - - name: Governance Gate - run: python scripts/governance_gate.py - env: - GOVERNANCE_POLICY: .agents/security.yaml - GOVERNANCE_MANIFEST: agents.yaml - GITHUB_SHA: ${{ github.sha }} - GITHUB_ACTOR: ${{ github.actor }} - -Exit codes: - 0 All checks passed, receipt generated. - 1 One or more policy checks failed. - 2 Bad arguments or missing required files. -""" - -from __future__ import annotations - -import argparse -import hashlib -import json -import os -import sys -import time -import uuid -from pathlib import Path -from typing import Any - -try: - import yaml - _HAS_YAML = True -except ImportError: - _HAS_YAML = False - -try: - from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey - from cryptography.hazmat.primitives.serialization import Encoding, PublicFormat - import base64 - _HAS_CRYPTO = True -except ImportError: - _HAS_CRYPTO = False - - -# --------------------------------------------------------------------------- -# Required policy fields and their validation rules -# --------------------------------------------------------------------------- - -_REQUIRED_CHECKS: list[tuple[str, str, Any]] = [ - # (field_path, display_name, expected_value_or_type) - ("audit.enabled", "audit_enabled", True), - ("pii_scanning.enabled","pii_scanning", True), - ("allowed_tools", "allowed_tools", list), - ("max_tool_calls", "max_tool_calls", int), -] - - -def _get_nested(data: dict, dotted_key: str) -> tuple[bool, Any]: - """Traverse nested dict with a dotted key. Returns (found, value).""" - keys = dotted_key.split(".") - node: Any = data - for k in keys: - if not isinstance(node, dict) or k not in node: - return False, None - node = node[k] - return True, node - - -def _validate_policy(policy_data: dict) -> list[str]: - """Return a list of failure messages; empty list means all passed.""" - failures: list[str] = [] - for field_path, display, expected in _REQUIRED_CHECKS: - found, value = _get_nested(policy_data, field_path) - if not found: - failures.append(f"{display}: MISSING (field '{field_path}' not found)") - continue - if expected is True and not value: - failures.append(f"{display}: FAIL (expected true, got {value!r})") - elif expected is list and not isinstance(value, list): - failures.append(f"{display}: FAIL (expected a list, got {type(value).__name__})") - elif expected is int and not isinstance(value, int): - failures.append(f"{display}: FAIL (expected an integer, got {type(value).__name__})") - return failures - - -# --------------------------------------------------------------------------- -# Receipt generation -# --------------------------------------------------------------------------- - -def _sha256(text: str) -> str: - return hashlib.sha256(text.encode()).hexdigest() - - -def _canonical_payload(receipt: dict) -> str: - """RFC 8785-style canonical JSON (signature excluded).""" - payload = {k: v for k, v in receipt.items() if k not in ("signature", "signer_public_key")} - return json.dumps(payload, sort_keys=True, separators=(",", ":")) - - -def _generate_receipt( - commit: str, - deployer: str, - policy_hash: str, - manifest_hash: str, - decision: str, - private_key_pem: str | None, -) -> dict: - receipt: dict[str, Any] = { - "receipt_id": f"rec_{uuid.uuid4().hex[:12]}", - "action": "agent_deployment", - "principal": deployer, - "decision": decision, - "commit_sha": commit, - "policy_hash": f"sha256:{policy_hash}", - "manifest_hash": f"sha256:{manifest_hash}", - "nonce": uuid.uuid4().hex, - "timestamp": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), - "signature": None, - "signer_public_key": None, - } - - if _HAS_CRYPTO and private_key_pem: - try: - from cryptography.hazmat.primitives.serialization import load_pem_private_key - key = load_pem_private_key(private_key_pem.encode(), password=None) - if isinstance(key, Ed25519PrivateKey): - payload_bytes = _canonical_payload(receipt).encode() - sig = key.sign(payload_bytes) - pub = key.public_key().public_bytes(Encoding.Raw, PublicFormat.Raw) - receipt["signature"] = base64.b64encode(sig).decode() - receipt["signer_public_key"] = base64.b64encode(pub).decode() - except Exception as exc: - print(f" WARNING: signing failed: {exc}", file=sys.stderr) - receipt["signature_error"] = str(exc) - - return receipt - - -# --------------------------------------------------------------------------- -# Audit trail -# --------------------------------------------------------------------------- - -def _write_audit_entry(receipt: dict, audit_path: Path) -> None: - audit_path.parent.mkdir(parents=True, exist_ok=True) - with audit_path.open("a", encoding="utf-8") as f: - f.write(json.dumps(receipt) + "\n") - - -# --------------------------------------------------------------------------- -# Output helpers -# --------------------------------------------------------------------------- - -def _ok(msg: str) -> None: - print(f" {msg:<28s} PASS") - - -def _fail(msg: str) -> None: - print(f" {msg:<28s} FAIL", file=sys.stderr) - - -def _banner(title: str) -> None: - print(f"\n{title}") - print("-" * (len(title))) - - -# --------------------------------------------------------------------------- -# Main -# --------------------------------------------------------------------------- - -def run( - policy_file: Path, - manifest_file: Path, - commit: str, - deployer: str, - audit_log: Path, - private_key_pem: str | None, - require_receipt: bool, -) -> int: - print("=" * 52) - print(" Governance Gate: agent-deployment-check") - print("=" * 52) - print(f" Policy file: {policy_file}") - print(f" Agent manifest: {manifest_file}") - print(f" Commit: {commit[:12]}") - print(f" Deployer: {deployer}") - - # --- Load files --- - if not _HAS_YAML: - print("\nERROR: PyYAML is required. Install with: pip install pyyaml", file=sys.stderr) - return 2 - - for path in (policy_file, manifest_file): - if not path.exists(): - print(f"\nERROR: File not found: {path}", file=sys.stderr) - return 2 - - policy_raw = policy_file.read_text(encoding="utf-8") - manifest_raw = manifest_file.read_text(encoding="utf-8") - policy_data: dict = yaml.safe_load(policy_raw) or {} - policy_hash = _sha256(policy_raw) - manifest_hash = _sha256(manifest_raw) - - # --- Policy checks --- - _banner("Checking policy configuration...") - failures = _validate_policy(policy_data) - - all_display = [d for _, d, _ in _REQUIRED_CHECKS] - failing_display = {f.split(":")[0].strip() for f in failures} - for display in all_display: - if display in failing_display: - _fail(display) - else: - _ok(display) - - # --- Receipt --- - decision = "allow" if not failures else "deny" - _banner("Generating deployment receipt...") - receipt = _generate_receipt( - commit=commit, - deployer=deployer, - policy_hash=policy_hash, - manifest_hash=manifest_hash, - decision=decision, - private_key_pem=private_key_pem, - ) - signed = receipt.get("signature") is not None - print(f" Receipt ID: {receipt['receipt_id']}") - print(f" Signed: {'yes (Ed25519)' if signed else 'no (cryptography not available)'}") - print(f" Policy hash: {receipt['policy_hash'][:20]}...") - - if require_receipt and not signed and _HAS_CRYPTO and not private_key_pem: - failures.append("receipt: FAIL (require_receipt=true but no signing key provided)") - - # --- Audit trail --- - _write_audit_entry(receipt, audit_log) - - # --- Result --- - print() - if failures: - print("Governance gate: FAILED", file=sys.stderr) - print("\nFailures:", file=sys.stderr) - for f in failures: - print(f" - {f}", file=sys.stderr) - return 1 - - print("Governance gate: PASSED") - return 0 - - -def _parse_args(argv: list[str] | None = None) -> argparse.Namespace: - p = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) - p.add_argument("--policy", default=os.environ.get("GOVERNANCE_POLICY", ".agents/security.yaml"), type=Path) - p.add_argument("--manifest", default=os.environ.get("GOVERNANCE_MANIFEST", "agents.yaml"), type=Path) - p.add_argument("--commit", default=os.environ.get("GITHUB_SHA", os.environ.get("COMMIT", "unknown"))) - p.add_argument("--deployer", default=os.environ.get("GITHUB_ACTOR", os.environ.get("DEPLOYER", "unknown"))) - p.add_argument("--audit-log", default=".governance/audit.jsonl", type=Path) - p.add_argument("--signing-key", default=os.environ.get("GOVERNANCE_SIGNING_KEY"), help="Ed25519 private key PEM (or env GOVERNANCE_SIGNING_KEY)") - p.add_argument("--require-receipt", action="store_true", default=os.environ.get("GOVERNANCE_REQUIRE_RECEIPT", "").lower() == "true") - return p.parse_args(argv) - - -def main(argv: list[str] | None = None) -> int: - args = _parse_args(argv) - return run( - policy_file=args.policy, - manifest_file=args.manifest, - commit=args.commit, - deployer=args.deployer, - audit_log=args.audit_log, - private_key_pem=args.signing_key, - require_receipt=args.require_receipt, - ) - - -if __name__ == "__main__": - sys.exit(main()) diff --git a/scripts/tests/test_governance_gate.py b/scripts/tests/test_governance_gate.py deleted file mode 100644 index 88c3e074f..000000000 --- a/scripts/tests/test_governance_gate.py +++ /dev/null @@ -1,318 +0,0 @@ -# Copyright (c) Microsoft Corporation. -# Licensed under the MIT License. -"""Tests for scripts/governance_gate.py. - -All tests are fully offline — no GitHub Actions environment needed. -Uses tmp_path for file I/O; no network calls are made. - -Coverage: -- Policy validation: all required fields present and correct -- Policy validation: each field missing or wrong type/value -- Receipt generation: structure and fields -- Receipt signing when cryptography is available -- require_receipt with no signing key -- Audit log written as JSONL -- run() exit codes: 0 on pass, 1 on fail, 2 on missing file -- CLI arg parsing: env var fallbacks -- _get_nested: dotted key traversal -- _sha256 determinism -""" - -import json -import sys -from pathlib import Path - -import pytest - -sys.path.insert(0, str(Path(__file__).parent.parent.parent / "scripts")) -import governance_gate as gg - - -# Helpers - -def _write_policy(tmp_path: Path, overrides: dict | None = None) -> Path: - """Write a valid policy YAML, optionally overriding top-level keys.""" - import yaml - data: dict = { - "audit": {"enabled": True}, - "pii_scanning": {"enabled": True}, - "allowed_tools": ["web_search", "read_file"], - "max_tool_calls": 10, - } - if overrides: - data.update(overrides) - p = tmp_path / "security.yaml" - p.write_text(yaml.dump(data)) - return p - - -def _write_manifest(tmp_path: Path) -> Path: - p = tmp_path / "agents.yaml" - p.write_text("agents:\n - name: test-agent\n version: 1.0.0\n") - return p - - -# _get_nested - -class TestGetNested: - def test_top_level_key(self): - found, val = gg._get_nested({"a": 1}, "a") - assert found and val == 1 - - def test_nested_key(self): - found, val = gg._get_nested({"a": {"b": True}}, "a.b") - assert found and val is True - - def test_missing_key(self): - found, val = gg._get_nested({"a": 1}, "b") - assert not found and val is None - - def test_missing_nested_key(self): - found, val = gg._get_nested({"a": {}}, "a.b") - assert not found - - def test_empty_dict(self): - found, _ = gg._get_nested({}, "a") - assert not found - - -# _validate_policy - -class TestValidatePolicy: - def test_valid_policy_no_failures(self, tmp_path): - import yaml - p = _write_policy(tmp_path) - data = yaml.safe_load(p.read_text()) - assert gg._validate_policy(data) == [] - - def test_missing_audit_enabled(self, tmp_path): - import yaml - p = _write_policy(tmp_path, {"audit": {}}) - data = yaml.safe_load(p.read_text()) - failures = gg._validate_policy(data) - assert any("audit_enabled" in f for f in failures) - - def test_audit_enabled_false(self, tmp_path): - import yaml - p = _write_policy(tmp_path, {"audit": {"enabled": False}}) - data = yaml.safe_load(p.read_text()) - failures = gg._validate_policy(data) - assert any("audit_enabled" in f for f in failures) - - def test_missing_pii_scanning(self, tmp_path): - import yaml - p = _write_policy(tmp_path, {"pii_scanning": {}}) - data = yaml.safe_load(p.read_text()) - failures = gg._validate_policy(data) - assert any("pii_scanning" in f for f in failures) - - def test_allowed_tools_not_a_list(self, tmp_path): - import yaml - p = _write_policy(tmp_path, {"allowed_tools": "web_search"}) - data = yaml.safe_load(p.read_text()) - failures = gg._validate_policy(data) - assert any("allowed_tools" in f for f in failures) - - def test_max_tool_calls_not_int(self, tmp_path): - import yaml - p = _write_policy(tmp_path, {"max_tool_calls": "ten"}) - data = yaml.safe_load(p.read_text()) - failures = gg._validate_policy(data) - assert any("max_tool_calls" in f for f in failures) - - def test_multiple_failures_reported(self, tmp_path): - import yaml - p = _write_policy(tmp_path, {"audit": {}, "pii_scanning": {}}) - data = yaml.safe_load(p.read_text()) - failures = gg._validate_policy(data) - assert len(failures) >= 2 - - -# _sha256 - -class TestSha256: - def test_deterministic(self): - assert gg._sha256("hello") == gg._sha256("hello") - - def test_different_inputs_differ(self): - assert gg._sha256("a") != gg._sha256("b") - - def test_returns_hex_string(self): - result = gg._sha256("test") - assert len(result) == 64 - int(result, 16) # raises if not hex - - -# _generate_receipt - -class TestGenerateReceipt: - def test_receipt_has_required_fields(self): - r = gg._generate_receipt("abc", "alice", "phash", "mhash", "allow", None) - for field in ("receipt_id", "action", "principal", "decision", - "commit_sha", "policy_hash", "manifest_hash", "timestamp", "nonce"): - assert field in r - - def test_receipt_id_prefixed_rec(self): - r = gg._generate_receipt("abc", "alice", "phash", "mhash", "allow", None) - assert r["receipt_id"].startswith("rec_") - - def test_policy_hash_prefixed_sha256(self): - r = gg._generate_receipt("abc", "alice", "phash", "mhash", "allow", None) - assert r["policy_hash"].startswith("sha256:") - - def test_decision_allow(self): - r = gg._generate_receipt("abc", "alice", "p", "m", "allow", None) - assert r["decision"] == "allow" - - def test_decision_deny(self): - r = gg._generate_receipt("abc", "alice", "p", "m", "deny", None) - assert r["decision"] == "deny" - - def test_no_signature_without_key(self): - r = gg._generate_receipt("abc", "alice", "p", "m", "allow", None) - assert r["signature"] is None - - def test_unique_nonces(self): - r1 = gg._generate_receipt("abc", "alice", "p", "m", "allow", None) - r2 = gg._generate_receipt("abc", "alice", "p", "m", "allow", None) - assert r1["nonce"] != r2["nonce"] - - @pytest.mark.skipif(not gg._HAS_CRYPTO, reason="cryptography not installed") - def test_signed_receipt_with_real_key(self): - from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PrivateKey - from cryptography.hazmat.primitives.serialization import ( - Encoding, PrivateFormat, PublicFormat, NoEncryption - ) - key = Ed25519PrivateKey.generate() - pem = key.private_bytes(Encoding.PEM, PrivateFormat.PKCS8, NoEncryption()).decode() - r = gg._generate_receipt("abc", "alice", "p", "m", "allow", pem) - assert r["signature"] is not None - assert r["signer_public_key"] is not None - - -# _write_audit_entry - -class TestWriteAuditEntry: - def test_creates_file_and_appends(self, tmp_path): - log = tmp_path / "sub" / "audit.jsonl" - entry = {"receipt_id": "rec_001", "decision": "allow"} - gg._write_audit_entry(entry, log) - lines = log.read_text().strip().splitlines() - assert len(lines) == 1 - assert json.loads(lines[0])["receipt_id"] == "rec_001" - - def test_appends_multiple_entries(self, tmp_path): - log = tmp_path / "audit.jsonl" - gg._write_audit_entry({"n": 1}, log) - gg._write_audit_entry({"n": 2}, log) - lines = log.read_text().strip().splitlines() - assert len(lines) == 2 - assert json.loads(lines[1])["n"] == 2 - - -# run() — integration-level - -class TestRun: - def test_valid_policy_exits_0(self, tmp_path): - policy = _write_policy(tmp_path) - manifest = _write_manifest(tmp_path) - audit = tmp_path / "audit.jsonl" - code = gg.run(policy, manifest, "abc1234", "alice", audit, None, False) - assert code == 0 - - def test_invalid_policy_exits_1(self, tmp_path): - import yaml - policy = tmp_path / "bad.yaml" - policy.write_text(yaml.dump({"audit": {"enabled": False}, "pii_scanning": {"enabled": True}, "allowed_tools": [], "max_tool_calls": 5})) - manifest = _write_manifest(tmp_path) - audit = tmp_path / "audit.jsonl" - code = gg.run(policy, manifest, "abc", "alice", audit, None, False) - assert code == 1 - - def test_missing_policy_file_exits_2(self, tmp_path): - manifest = _write_manifest(tmp_path) - audit = tmp_path / "audit.jsonl" - code = gg.run(tmp_path / "nonexistent.yaml", manifest, "abc", "alice", audit, None, False) - assert code == 2 - - def test_missing_manifest_file_exits_2(self, tmp_path): - policy = _write_policy(tmp_path) - audit = tmp_path / "audit.jsonl" - code = gg.run(policy, tmp_path / "nonexistent.yaml", "abc", "alice", audit, None, False) - assert code == 2 - - def test_audit_log_written_on_pass(self, tmp_path): - policy = _write_policy(tmp_path) - manifest = _write_manifest(tmp_path) - audit = tmp_path / "audit.jsonl" - gg.run(policy, manifest, "abc1234", "alice", audit, None, False) - assert audit.exists() - entry = json.loads(audit.read_text().strip()) - assert entry["decision"] == "allow" - assert entry["commit_sha"] == "abc1234" - assert entry["principal"] == "alice" - - def test_audit_log_written_on_fail(self, tmp_path): - import yaml - policy = tmp_path / "bad.yaml" - policy.write_text(yaml.dump({"audit": {}, "pii_scanning": {"enabled": True}, "allowed_tools": [], "max_tool_calls": 5})) - manifest = _write_manifest(tmp_path) - audit = tmp_path / "audit.jsonl" - gg.run(policy, manifest, "abc", "alice", audit, None, False) - entry = json.loads(audit.read_text().strip()) - assert entry["decision"] == "deny" - - def test_receipt_id_in_audit_log(self, tmp_path): - policy = _write_policy(tmp_path) - manifest = _write_manifest(tmp_path) - audit = tmp_path / "audit.jsonl" - gg.run(policy, manifest, "abc", "alice", audit, None, False) - entry = json.loads(audit.read_text().strip()) - assert entry["receipt_id"].startswith("rec_") - - def test_require_receipt_without_key_fails(self, tmp_path): - if not gg._HAS_CRYPTO: - pytest.skip("cryptography not installed") - policy = _write_policy(tmp_path) - manifest = _write_manifest(tmp_path) - audit = tmp_path / "audit.jsonl" - code = gg.run(policy, manifest, "abc", "alice", audit, None, require_receipt=True) - assert code == 1 - - def test_each_policy_failure_reported_individually(self, tmp_path): - import yaml - policy = tmp_path / "p.yaml" - policy.write_text(yaml.dump({"audit": {}, "pii_scanning": {}, "allowed_tools": "bad", "max_tool_calls": "x"})) - manifest = _write_manifest(tmp_path) - audit = tmp_path / "audit.jsonl" - code = gg.run(policy, manifest, "abc", "alice", audit, None, False) - assert code == 1 - - -# _parse_args — env var fallbacks - -class TestParseArgs: - def test_defaults_from_env(self, monkeypatch): - monkeypatch.setenv("GOVERNANCE_POLICY", "custom/policy.yaml") - monkeypatch.setenv("GOVERNANCE_MANIFEST", "custom/agents.yaml") - monkeypatch.setenv("GITHUB_SHA", "deadbeef") - monkeypatch.setenv("GITHUB_ACTOR", "octocat") - args = gg._parse_args([]) - assert args.policy == Path("custom/policy.yaml") - assert args.manifest == Path("custom/agents.yaml") - assert args.commit == "deadbeef" - assert args.deployer == "octocat" - - def test_explicit_args_override_env(self, monkeypatch): - monkeypatch.setenv("GITHUB_SHA", "envsha") - args = gg._parse_args(["--commit", "clisha"]) - assert args.commit == "clisha" - - def test_require_receipt_flag(self): - args = gg._parse_args(["--require-receipt"]) - assert args.require_receipt is True - - def test_require_receipt_from_env(self, monkeypatch): - monkeypatch.setenv("GOVERNANCE_REQUIRE_RECEIPT", "true") - args = gg._parse_args([]) - assert args.require_receipt is True diff --git a/scripts/verify_tutorials_01_34.py b/scripts/verify_tutorials_01_34.py index 11afb8f68..d0071c2a5 100644 --- a/scripts/verify_tutorials_01_34.py +++ b/scripts/verify_tutorials_01_34.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# Copyright (c) Microsoft Corporation. Licensed under the MIT License. """Verify tutorials 01-34 code examples.""" import sys, os PASS = FAIL = 0 @@ -178,8 +179,8 @@ def t11(): from agent_os import supervisor check("T11 supervisor import", t11) -# === T12: Liability & Attribution === -print("\n=== T12: Liability ===") +# === T12: Removed tutorial slot === +print("\n=== T12: Removed tutorial slot ===") def t12(): from agentmesh.governance import AuditLog, AuditEntry diff --git a/tests/ci/test_regression_a2_toolkit_regex.py b/tests/ci/test_regression_a2_toolkit_regex.py index 0806fd10d..76849bed9 100644 --- a/tests/ci/test_regression_a2_toolkit_regex.py +++ b/tests/ci/test_regression_a2_toolkit_regex.py @@ -24,14 +24,10 @@ ACTIONS = [ REPO_ROOT / "action" / "action.yml", - REPO_ROOT / "action" / "security-scan" / "action.yml", - REPO_ROOT / "action" / "governance-attestation" / "action.yml", ] READMES = [ REPO_ROOT / "action" / "README.md", - REPO_ROOT / "action" / "security-scan" / "README.md", - REPO_ROOT / "action" / "governance-attestation" / "README.md", ] EXPECTED_REGEX = "^[0-9]+\\.[0-9]+\\.[0-9]+((a|b|rc)[0-9]+)?$"