Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
e891460
docs: add unified PAI + IronClaw architecture design
curtitoo Feb 17, 2026
5d4850e
docs: add unified architecture implementation plan (14 tasks, Phases …
curtitoo Feb 17, 2026
7d27397
feat: add PAI heartbeat triage script and documentation
curtitoo Feb 17, 2026
da24999
feat: add daemon-state library and heartbeat API route
curtitoo Feb 17, 2026
a0f785b
feat: add Radix Tabs UI component for bridged dashboard views
curtitoo Feb 17, 2026
6fd1637
feat: add GET /api/pai/memory for PAI native memory data
curtitoo Feb 17, 2026
f7cb507
feat: add GET /api/pai/settings for safe PAI config display
curtitoo Feb 17, 2026
3877cd5
feat: add GET /api/pai/logs for PAI debug log entries
curtitoo Feb 17, 2026
688cc47
feat: bridge Memory page with PAI + IronClaw tabs
curtitoo Feb 17, 2026
255df3c
feat: bridge Logs page with PAI + IronClaw tabs
curtitoo Feb 17, 2026
46012c5
feat: bridge Settings page with PAI + IronClaw tabs
curtitoo Feb 17, 2026
4303195
fix: add exponential backoff to SSE reconnect (RedTeam MEDIUM fix)
curtitoo Feb 17, 2026
4ac9508
fix: address code review findings across Phases 0-2
curtitoo Feb 17, 2026
2bdeabf
docs: add Anti-Corruption Layer (Phase 3) design
curtitoo Feb 17, 2026
37121d8
docs: add Anti-Corruption Layer (Phase 3) implementation plan
curtitoo Feb 17, 2026
01357b6
feat(acl): add PAI-owned type definitions for all 8 domains
curtitoo Feb 17, 2026
5f6df6b
feat(acl): add IronClaw-to-PAI mapper functions
curtitoo Feb 17, 2026
7d7322c
feat(acl): add /api/pai/agents and /api/pai/agents/[id] routes
curtitoo Feb 17, 2026
d732740
feat(acl): add /api/pai/routines and /api/pai/routines/[id] routes
curtitoo Feb 17, 2026
e3e2d55
feat(acl): add /api/pai/extensions and /api/pai/summary routes
curtitoo Feb 17, 2026
3f97cc1
feat(acl): migrate agents pages to PAI types
curtitoo Feb 17, 2026
adddf93
feat(acl): migrate routines pages to PAI types
curtitoo Feb 17, 2026
ed2d2a1
feat(acl): migrate extensions page to PAI types
curtitoo Feb 17, 2026
cc465ea
feat(acl): migrate overview, ask, and SSE hook to PAI types
curtitoo Feb 17, 2026
0ba9103
docs(acl): add Phase 3 deferral comments for IronClaw proxy routes
curtitoo Feb 17, 2026
3e5dfa8
feat(governance): add settings classification library
curtitoo Feb 17, 2026
857d7c1
feat(governance): add security events API route
curtitoo Feb 17, 2026
0cf439d
feat(governance): add security status API route
curtitoo Feb 17, 2026
369359d
feat(governance): add settings audit API route
curtitoo Feb 17, 2026
65e0497
feat(governance): add governance dashboard page
curtitoo Feb 17, 2026
5ec7967
feat(governance): add Governance link to sidebar
curtitoo Feb 17, 2026
13f5769
feat(governance): add critical guard and tier badges to settings
curtitoo Feb 17, 2026
130a5a7
fix: strip session_id from security-events API + fix prefix matching
curtitoo Feb 17, 2026
eeece28
feat(inference): add CC-Mirror model configuration page
curtitoo Feb 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions Packs/pai-core-install/src/skills/CORE/Tools/heartbeat-triage.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env bash
set -euo pipefail

# PAI Heartbeat Triage — invoked by IronClaw on a cron schedule.
# Lightweight: uses haiku with no tools/hooks to decide if escalation is needed.

STATE_DIR="$HOME/.claude/MEMORY/STATE"
STATE_FILE="$STATE_DIR/daemon-state.json"
LOG_DIR="$HOME/.claude/logs"

mkdir -p "$STATE_DIR" "$LOG_DIR"

# Bootstrap state file on first run
if [[ ! -f "$STATE_FILE" ]]; then
printf '{"last_check":0,"status":"idle","escalation_count":0}\n' > "$STATE_FILE"
fi

prev_state=$(cat "$STATE_FILE")
last_check=$(echo "$prev_state" | jq -r '.last_check // 0')
now=$(date +%s)

# -- Gather signals since last check --
# Recent failures from the last hour of logs
recent_failures=$(find "$LOG_DIR" -name '*.error' -newer "$STATE_FILE" -exec tail -1 {} + 2>/dev/null | head -20 || true)

# New learnings captured since last heartbeat
learnings=""
if [[ -d "$HOME/.claude/MEMORY" ]]; then
learnings=$(find "$HOME/.claude/MEMORY" -name '*.md' -newer "$STATE_FILE" -exec basename {} \; 2>/dev/null | head -10 || true)
fi

# -- Triage via lightweight Claude session (no tools, no hooks) --
triage_prompt="Previous state: $prev_state
Time since last check: $(( now - last_check ))s
Recent failures: ${recent_failures:-none}
New learnings: ${learnings:-none}

Respond with ONLY valid JSON: {\"action\":\"idle|escalate\",\"reason\":\"...\",\"priority\":\"low|medium|high\"}"

triage_result=$(claude --print \
--model haiku \
--tools '' \
--output-format text \
--setting-sources '' \
--system-prompt "You are a PAI system health triage agent. Analyze signals and decide: idle (nothing actionable) or escalate (needs a full session). Be conservative — only escalate for real issues." \
"$triage_prompt" 2>/dev/null) || triage_result='{"action":"idle","reason":"triage call failed","priority":"low"}'

# -- Persist updated state --
action=$(echo "$triage_result" | jq -r '.action // "idle"')
reason=$(echo "$triage_result" | jq -r '.reason // "no reason"')
esc_count=$(echo "$prev_state" | jq -r '.escalation_count // 0')

if [[ "$action" == "escalate" ]]; then
esc_count=$(( esc_count + 1 ))
fi

jq -n \
--argjson now "$now" \
--arg status "$action" \
--arg reason "$reason" \
--argjson esc_count "$esc_count" \
'{last_check: $now, status: $status, last_reason: $reason, escalation_count: $esc_count}' \
> "$STATE_FILE"

# -- Escalate: spawn a full Claude session with hooks enabled --
if [[ "$action" == "escalate" ]]; then
nohup claude --print \
--model sonnet \
--system-prompt "PAI escalation session. Triage reason: $reason. Investigate and resolve." \
"Heartbeat triage escalated. Reason: $reason. Recent failures: ${recent_failures:-none}" \
>> "$LOG_DIR/escalation-$(date +%Y%m%d-%H%M%S).log" 2>&1 &
fi
85 changes: 85 additions & 0 deletions Packs/pai-core-install/src/skills/CORE/docs/HEARTBEAT.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# PAI Heartbeat Triage

Periodic health check for the PAI system, designed to be invoked by IronClaw's cron scheduler.

## Overview

The heartbeat pattern separates **triage** (cheap, fast, read-only) from **action** (full session with tools and hooks). This keeps background monitoring costs minimal while preserving the ability to escalate when something actually needs attention.

```
IronClaw cron --> heartbeat-triage.sh --> lightweight Claude (haiku, no tools)
|
idle? done.
|
escalate? --> full Claude session (sonnet, hooks enabled)
```

## IronClaw Routine Configuration

Register the heartbeat as an IronClaw routine:

```json
{
"id": "pai-heartbeat",
"schedule": "*/15 * * * *",
"command": "Packs/pai-core-install/src/skills/CORE/Tools/heartbeat-triage.sh",
"timeout_seconds": 30,
"retry": {
"max_attempts": 1,
"backoff_ms": 0
},
"tags": ["health", "triage"]
}
```

The 15-minute interval balances responsiveness with API cost. Adjust based on system activity.

## Invocation Modes

| Aspect | Lightweight Triage | Full Escalation |
|---|---|---|
| **Model** | haiku | sonnet |
| **Tools** | Disabled (`--tools ''`) | All available |
| **Hooks** | Disabled (`--setting-sources ''`) | All enabled |
| **Output** | JSON decision only | Investigative session |
| **Cost** | Minimal | Standard |
| **Duration** | < 5 seconds | Variable |
| **Trigger** | IronClaw cron | Triage script (background) |

## State Persistence

Triage state lives at `~/.claude/MEMORY/STATE/daemon-state.json`:

```json
{
"last_check": 1708185600,
"status": "idle",
"last_reason": "no actionable signals",
"escalation_count": 0
}
```

| Field | Purpose |
|---|---|
| `last_check` | Unix timestamp of last triage run |
| `status` | Result of last triage (`idle` or `escalate`) |
| `last_reason` | Human-readable explanation from triage model |
| `escalation_count` | Running total of escalations for observability |

The state file also serves as a timestamp reference -- `find -newer` uses it to scope signal gathering to only what's changed since the last check.

## Security Model

**Triage session (haiku):**
- Cannot execute tools (`--tools ''`)
- Cannot trigger hooks (`--setting-sources ''`)
- Read-only analysis of log snippets and file names
- Can only output a JSON verdict

**Escalated session (sonnet):**
- Full tool access for investigation and remediation
- All security hooks active (SecurityValidator, etc.)
- Runs as a background process with output logged
- Standard PAI permission model applies

This two-tier design ensures that the high-frequency triage path has zero write capability, while escalated sessions get the full security stack.
1 change: 1 addition & 0 deletions Packs/pai-telos-skill/src/DashboardTemplate/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ coverage

# logs
logs
!app/api/**/logs/
*.log
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json

Expand Down
229 changes: 229 additions & 0 deletions Packs/pai-telos-skill/src/DashboardTemplate/App/agents/[id]/page.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
"use client"

import { useState, useEffect } from "react"
import { useParams, useRouter } from "next/navigation"
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"
import { Badge } from "@/components/ui/badge"
import { Button } from "@/components/ui/button"
import { Bot, ArrowLeft, XCircle, RotateCcw } from "lucide-react"
import type { PAIAgentDetail, AgentEvent, AgentStatus } from "@/types/pai"

const statusVariant: Record<AgentStatus, "success" | "primary" | "warning" | "destructive" | "secondary"> = {
completed: "success",
in_progress: "primary",
pending: "warning",
failed: "destructive",
stuck: "secondary",
}

function formatDate(iso?: string): string {
if (!iso) return "-"
return new Date(iso).toLocaleString()
}

function formatDuration(secs?: number): string {
if (secs == null) return "-"
if (secs < 60) return `${Math.round(secs)}s`
if (secs < 3600) return `${Math.floor(secs / 60)}m ${Math.round(secs % 60)}s`
return `${Math.floor(secs / 3600)}h ${Math.floor((secs % 3600) / 60)}m`
}

export default function AgentDetailPage() {
const params = useParams()
const router = useRouter()
const id = params.id as string

const [agent, setAgent] = useState<PAIAgentDetail | null>(null)
const [events, setEvents] = useState<AgentEvent[]>([])
const [loading, setLoading] = useState(true)
const [offline, setOffline] = useState(false)

useEffect(() => {
async function fetchData() {
try {
const res = await fetch(`/api/pai/agents/${id}`)
if (!res.ok) {
setOffline(true)
return
}
const data = await res.json() as { agent: PAIAgentDetail; events: AgentEvent[] }
setAgent(data.agent)
setEvents(data.events)
setOffline(false)
} catch {
setOffline(true)
} finally {
setLoading(false)
}
}
fetchData()
}, [id])

const handleCancel = async () => {
if (!window.confirm("Cancel this agent?")) return
try {
await fetch(`/api/pai/agents/${id}`, { method: "DELETE" })
router.push("/agents")
} catch {
// Ignore - user will see the job is still active
}
}

if (loading) {
return (
<div className="p-8 flex items-center justify-center">
<p className="text-gray-500 dark:text-gray-400">Loading agent details...</p>
</div>
)
}

if (offline || !agent) {
return (
<div className="p-8">
<button
onClick={() => router.push("/agents")}
className="flex items-center text-sm text-gray-500 dark:text-gray-400 hover:text-[#2e7de9] mb-6"
>
<ArrowLeft className="h-4 w-4 mr-1" />
Back to Agents
</button>
<Card className="border-[#f0a020]/30 max-w-lg mx-auto">
<CardContent className="flex flex-col items-center justify-center py-16 text-center">
<Bot className="h-16 w-16 text-[#f0a020] mb-4" />
<p className="text-lg font-medium text-gray-700 dark:text-gray-300">IronClaw is not running</p>
<p className="text-sm text-gray-500 dark:text-gray-400 mt-2">Start IronClaw to manage agents</p>
<code className="mt-4 px-4 py-2 bg-gray-100 dark:bg-gray-700 rounded text-sm text-gray-600 dark:text-gray-400">
cd ~/ironclaw && cargo run
</code>
</CardContent>
</Card>
</div>
)
}

return (
<div className="p-8">
<button
onClick={() => router.push("/agents")}
className="flex items-center text-sm text-gray-500 dark:text-gray-400 hover:text-[#2e7de9] mb-6"
>
<ArrowLeft className="h-4 w-4 mr-1" />
Back to Agents
</button>

{/* Agent Header */}
<Card className="mb-6">
<CardHeader>
<div className="flex items-center justify-between">
<div>
<CardTitle className="flex items-center gap-3">
{agent.title}
<Badge variant={statusVariant[agent.status]}>{agent.status.replace("_", " ")}</Badge>
</CardTitle>
{agent.description && (
<p className="text-sm text-gray-500 dark:text-gray-400 mt-2">{agent.description}</p>
)}
</div>
<div className="flex gap-2">
{(agent.status === "pending" || agent.status === "in_progress") && (
<Button variant="destructive" size="sm" onClick={handleCancel}>
<XCircle className="h-4 w-4 mr-1" />
Cancel
</Button>
)}
{(agent.status === "failed" || agent.status === "stuck") && (
<Button variant="outline" size="sm" onClick={() => router.refresh()}>
<RotateCcw className="h-4 w-4 mr-1" />
Restart
</Button>
)}
</div>
</div>
</CardHeader>
<CardContent>
<div className="grid grid-cols-4 gap-4 text-sm">
<div>
<p className="text-gray-500 dark:text-gray-400">Created</p>
<p className="font-medium">{formatDate(agent.createdAt)}</p>
</div>
<div>
<p className="text-gray-500 dark:text-gray-400">Started</p>
<p className="font-medium">{formatDate(agent.startedAt)}</p>
</div>
<div>
<p className="text-gray-500 dark:text-gray-400">Completed</p>
<p className="font-medium">{formatDate(agent.completedAt)}</p>
</div>
<div>
<p className="text-gray-500 dark:text-gray-400">Duration</p>
<p className="font-medium">{formatDuration(agent.elapsedSecs)}</p>
</div>
</div>
</CardContent>
</Card>

{/* Transition Timeline */}
{agent.transitions.length > 0 && (
<Card className="mb-6">
<CardHeader>
<CardTitle className="text-lg">State Transitions</CardTitle>
</CardHeader>
<CardContent>
<div className="space-y-4">
{agent.transitions.map((t, i) => (
<div key={i} className="flex items-start gap-3">
<div className="flex flex-col items-center">
<div className="h-3 w-3 rounded-full bg-[#2e7de9]" />
{i < agent.transitions.length - 1 && (
<div className="w-0.5 h-8 bg-gray-200 dark:bg-gray-700" />
)}
</div>
<div>
<p className="text-sm font-medium">
{t.from} → {t.to}
</p>
<p className="text-xs text-gray-500 dark:text-gray-400">{formatDate(t.timestamp)}</p>
{t.reason && (
<p className="text-xs text-gray-400 mt-1">{t.reason}</p>
)}
</div>
</div>
))}
</div>
</CardContent>
</Card>
)}

{/* Event Timeline */}
{events.length > 0 && (
<Card>
<CardHeader>
<CardTitle className="text-lg">Events</CardTitle>
</CardHeader>
<CardContent>
<div className="space-y-3">
{events.map((event, i) => (
<div key={i} className="flex items-start gap-3 border-b dark:border-gray-700 last:border-0 pb-3 last:pb-0">
<div className="flex flex-col items-center pt-1">
<div className="h-2 w-2 rounded-full bg-gray-400" />
</div>
<div className="flex-1 min-w-0">
<div className="flex items-center justify-between">
<Badge variant="secondary" className="text-xs">{event.eventType}</Badge>
<span className="text-xs text-gray-400">{formatDate(event.timestamp)}</span>
</div>
{event.data != null && (
<pre className="text-xs text-gray-500 dark:text-gray-400 mt-1 overflow-x-auto whitespace-pre-wrap break-words">
{typeof event.data === "string" ? event.data : JSON.stringify(event.data, null, 2)}
</pre>
)}
</div>
</div>
))}
</div>
</CardContent>
</Card>
)}
</div>
)
}
Loading