diff --git a/README.md b/README.md index 31841e59..a2953c93 100644 --- a/README.md +++ b/README.md @@ -148,6 +148,27 @@ scripts/ conways-rules.txt # Core rules for the automaton ``` +## Audit event fields (risk/policy) + +Tool-call results now support optional structured audit metadata to make policy outcomes easier to analyze downstream: + +- `riskLevel`: tool risk classification (`safe|caution|dangerous|forbidden`) +- `policyDecision`: normalized policy outcome (`allow|block|require_confirmation|dry_run`) +- `policyReason`: short human-readable reason +- `capability`: capability namespace for the action (e.g. `network.expose_port`, `finance.transfer_credits`) + +Example: + +```json +{ + "name": "transfer_credits", + "riskLevel": "dangerous", + "policyDecision": "allow", + "policyReason": "ALLOWED: All policy checks passed", + "capability": "finance.transfer_credits" +} +``` + ## License MIT diff --git a/src/agent/tools.ts b/src/agent/tools.ts index f32a069e..6a1acff3 100644 --- a/src/agent/tools.ts +++ b/src/agent/tools.ts @@ -3261,6 +3261,35 @@ export function toolsToInferenceFormat( * Execute a tool call and return the result. * Optionally evaluates against the policy engine before execution. */ +function mapCapability(tool: AutomatonTool): string { + switch (tool.name) { + case "transfer_credits": + return "finance.transfer_credits"; + case "x402_fetch": + return "finance.x402_payment"; + case "expose_port": + return "network.expose_port"; + case "remove_port": + return "network.remove_port"; + case "register_domain": + case "add_dns_record": + case "delete_dns_record": + return "domain.manage"; + case "self_modify_code": + case "install_skill": + case "uninstall_skill": + return "self_mod.modify"; + default: + return `${tool.category}.${tool.name}`; + } +} + +function toPolicyDecisionLabel(action: "allow" | "deny" | "quarantine"): "allow" | "block" | "require_confirmation" | "dry_run" { + if (action === "deny") return "block"; + if (action === "quarantine") return "require_confirmation"; + return "allow"; +} + export async function executeTool( toolName: string, args: Record, @@ -3284,9 +3313,16 @@ export async function executeTool( result: "", durationMs: 0, error: `Unknown tool: ${toolName}`, + policyDecision: "block", + policyReason: "TOOL_UNKNOWN: not found in registered tool set", + capability: `unknown.${toolName}`, }; } + const capability = mapCapability(tool); + let policyDecisionLabel: "allow" | "block" | "require_confirmation" | "dry_run" = "allow"; + let policyReason = "No policy engine provided; executed legacy allow path"; + // Policy evaluation (if engine is provided) if (policyEngine && turnContext) { const request: PolicyRequest = { @@ -3297,6 +3333,8 @@ export async function executeTool( }; const decision = policyEngine.evaluate(request); policyEngine.logDecision(decision); + policyDecisionLabel = toPolicyDecisionLabel(decision.action); + policyReason = `${decision.reasonCode}: ${decision.humanMessage}`; if (decision.action !== "allow") { return { @@ -3306,6 +3344,10 @@ export async function executeTool( result: "", durationMs: Date.now() - startTime, error: `Policy denied: ${decision.reasonCode} — ${decision.humanMessage}`, + riskLevel: tool.riskLevel, + policyDecision: policyDecisionLabel, + policyReason, + capability, }; } } @@ -3368,6 +3410,10 @@ export async function executeTool( arguments: args, result, durationMs: Date.now() - startTime, + riskLevel: tool.riskLevel, + policyDecision: policyDecisionLabel, + policyReason, + capability, }; } catch (err: any) { return { @@ -3377,6 +3423,10 @@ export async function executeTool( result: "", durationMs: Date.now() - startTime, error: err.message || String(err), + riskLevel: tool.riskLevel, + policyDecision: policyDecisionLabel, + policyReason, + capability, }; } } diff --git a/src/types.ts b/src/types.ts index 4e967561..5cc87158 100644 --- a/src/types.ts +++ b/src/types.ts @@ -118,6 +118,11 @@ export interface ToolCallResult { result: string; durationMs: number; error?: string; + /** Structured policy/audit metadata (optional for backward compatibility) */ + riskLevel?: RiskLevel; + policyDecision?: "allow" | "block" | "require_confirmation" | "dry_run"; + policyReason?: string; + capability?: string; } export interface TokenUsage {