pideploy/prd.json at main · Crokily/pideploy · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
{
  "project": "piDeploy",
  "branchName": "ralph/agent-orchestrator",
  "description": "Refactor ClawDeploy instance lifecycle management from hardcoded API routes to Pi Agent-driven autonomous Orchestrator service. Preserve frontend/UI, replace execution layer with agentLoop + custom tools + heartbeat self-healing.",
  "qualityChecks": {
    "orchestrator-typecheck": "cd agent/orchestrator && npx tsc --noEmit",
    "frontend-typecheck": "cd agent/frontend && npx tsc --noEmit",
    "frontend-lint": "cd agent/frontend && npm run lint",
    "frontend-build": "cd agent/frontend && npm run build"
  },
  "userStories": [
    {
      "id": "US-001",
      "title": "Monorepo Setup — Rename and Restructure",
      "description": "As a developer, I need the project restructured into a monorepo (piDeploy) with original/ and agent/ folders so both versions coexist safely.",
      "acceptanceCriteria": [
        "Project directory is /home/ubuntu/piDeploy",
        "piDeploy/original/ contains full original clawdeploy code (frontend/, backend/, docs/, scripts/, tasks/)",
        "piDeploy/agent/frontend/ is a copy of original/frontend/ with its own node_modules",
        "piDeploy/agent/orchestrator/ directory exists (empty src/ placeholder)",
        "piDeploy/README.md describes the new monorepo architecture",
        "piDeploy/.git preserved from original (history retained)",
        "/data/clawdeploy/ data directory is NOT moved (still referenced by path in code)",
        "cd piDeploy/agent/frontend && npm install succeeds",
        "cd piDeploy/agent/frontend && npx tsc --noEmit passes"
      ],
      "priority": 1,
      "passes": true,
      "notes": "Completed. Monorepo structure verified. agent/frontend typecheck passes."
    },
    {
      "id": "US-002",
      "title": "Orchestrator Project Scaffolding",
      "description": "As a developer, I need the orchestrator TypeScript project initialized with all dependencies and configurations.",
      "acceptanceCriteria": [
        "agent/orchestrator/package.json created with name @pideploy/orchestrator, type: module",
        "Dependencies: @mariozechner/pi-ai, @mariozechner/pi-agent-core, @sinclair/typebox, dockerode, @types/dockerode, pino, pino-pretty",
        "DevDependencies: typescript, tsx, @types/node",
        "agent/orchestrator/tsconfig.json configured: target ES2022, module NodeNext, moduleResolution NodeNext, strict true, outDir dist",
        "agent/orchestrator/src/index.ts created with placeholder main() function that logs startup",
        "agent/orchestrator/src/config.ts created with: AuthStorage.create() init, model fallback chain (minimax-m2.5-free → glm-5-free → gemini-3-flash), cost limits constants",
        "agent/orchestrator/src/lib/prisma.ts created importing PrismaClient (pointing to agent/frontend generated client via relative path or NODE_PATH)",
        "npm install in agent/orchestrator/ succeeds",
        "npx tsc --noEmit in agent/orchestrator/ passes"
      ],
      "priority": 2,
      "passes": true,
      "notes": "Completed by codex. npm install + tsc --noEmit pass."
    },
    {
      "id": "US-003",
      "title": "Observability Foundation",
      "description": "As a developer, I need comprehensive observability modules (tracing, cost, errors, performance, transcripts) so every subsequent tool can be debugged and monitored from day one.",
      "acceptanceCriteria": [
        "agent/orchestrator/src/observability/tracer.ts created with AgentTrace and AgentSpan interfaces, createTracer() function following observability.md Pattern 1",
        "agent/orchestrator/src/observability/cost-monitor.ts created with CostPolicy interface, enforceCostPolicy() function, per-tool cost attribution following Pattern 2",
        "agent/orchestrator/src/observability/error-classifier.ts created with AgentErrorClass union type and classifyError() function following Pattern 3",
        "agent/orchestrator/src/observability/performance.ts created with PerformanceMetrics interface and trackPerformance() following Pattern 4",
        "agent/orchestrator/src/observability/transcript.ts created with captureDebugTranscript() and save() following Pattern 3 debug transcript",
        "agent/orchestrator/src/observability/index.ts re-exports all modules",
        "Trace output directory: /var/log/pideploy/traces/ (created if not exists)",
        "npx tsc --noEmit passes"
      ],
      "priority": 3,
      "passes": true,
      "notes": "These are pure utility modules with no external runtime dependencies beyond pi-ai types. They subscribe to AgentSession/AgentLoop events. Reference: pi-agent-app-dev/references/observability.md Patterns 1-4. Use crypto.randomUUID() for trace/span IDs. mkdir -p /var/log/pideploy/traces/ as part of setup."
    },
    {
      "id": "US-004",
      "title": "Core Lifecycle Tools — create, start, stop",
      "description": "As a developer, I need instance_create, instance_start, and instance_stop custom tools with hardcoded security checks.",
      "acceptanceCriteria": [
        "agent/orchestrator/src/tools/instance-create.ts implements ToolDefinition with parameters: { name: string, channel?: string, botToken?: string, aiProvider?: string, apiKey?: string, userId: string }",
        "instance_create execute(): (1) DB create status=creating, (2) createInstanceStorage, (3) generateOpenClawConfig + writeInstanceConfig, (4) createContainer with Docker, (5) DB update with containerId+port+gatewayToken, (6) nginx sync. Returns { instanceId, port, status }",
        "instance_create hardcodes userId validation in execute()",
        "agent/orchestrator/src/tools/instance-start.ts: validates ownership → docker start → DB status=running → nginx sync",
        "agent/orchestrator/src/tools/instance-stop.ts: validates ownership → docker stop → DB status=stopped → nginx sync",
        "On failure: instance_create cleans up storage and sets DB status=error",
        "All tools use Type.Object() from @sinclair/typebox for parameter schemas",
        "All tools use onUpdate?.() for streaming progress",
        "Business logic reused from agent/frontend/src/lib/ (docker.ts, instance-config.ts, nginx.ts) — code copied into orchestrator/src/lib/ or imported directly",
        "npx tsc --noEmit passes"
      ],
      "priority": 4,
      "passes": true,
      "notes": "Copy the core library files (docker.ts, instance-config.ts, nginx.ts, prisma.ts, logger.ts) from agent/frontend/src/lib/ to agent/orchestrator/src/lib/ and adapt imports. The tool execute() function signature follows pi-coding-agent ToolDefinition: execute(toolCallId, params, signal, onUpdate, ctx)."
    },
    {
      "id": "US-005",
      "title": "Destructive & Complex Tools — delete, update",
      "description": "As a developer, I need instance_delete and instance_update tools with cleanup logic and rollback capability.",
      "acceptanceCriteria": [
        "agent/orchestrator/src/tools/instance-delete.ts: validates ownership → docker remove → removeInstanceStorage → DB delete → nginx sync",
        "instance_delete handles 'container not found' gracefully (still cleans DB + storage)",
        "agent/orchestrator/src/tools/instance-update.ts: validates ownership → set status=updating → stop container → remove container → rebuild image if needed → recreate container with same config → start → status=running",
        "instance_update uses file lock /tmp/pideploy-rebuild.lock to prevent concurrent image rebuilds",
        "instance_update rollback: if recreate fails, attempts to restart old container; if impossible, sets status=error with diagnostic info",
        "instance_update streams progress via onUpdate (pulling, rebuilding, recreating, etc.)",
        "Both tools hardcode userId validation",
        "npx tsc --noEmit passes"
      ],
      "priority": 5,
      "passes": true,
      "notes": "The file lock can use a simple fs.open with wx flag + cleanup on process exit. Image rebuild uses child_process.execFile (not execSync) with proper timeout. Reuse logic from original update/route.ts but improve error handling."
    },
    {
      "id": "US-006",
      "title": "Infrastructure Tools — nginx_sync, report_result",
      "description": "As a developer, I need nginx_sync (global port map sync with verification) and report_result (structured output) tools.",
      "acceptanceCriteria": [
        "agent/orchestrator/src/tools/nginx-sync.ts: queries all running instances with port → writes /etc/nginx/conf.d/clawdeploy-ports.conf → sudo nginx -s reload → verifies with nginx -t",
        "nginx_sync returns { instanceCount, reloadSuccess, verificationPassed }",
        "agent/orchestrator/src/tools/report-result.ts: accepts { success: boolean, action: string, data?: object, errors?: string[] } → writes to trace log → returns confirmation",
        "agent/orchestrator/src/tools/index.ts exports allTools array containing all 7 tools",
        "npx tsc --noEmit passes"
      ],
      "priority": 6,
      "passes": true,
      "notes": "nginx_sync is modeled on existing agent/frontend/src/lib/nginx.ts. report_result follows pi-agent-app-dev production.md 'Structured Output via Report Tool' pattern. The system prompt will instruct the agent to always call report_result when a task completes."
    },
    {
      "id": "US-007",
      "title": "Agent Loop + System Prompt + Bash Gate",
      "description": "As a developer, I need the core agent loop integrating all tools, with system prompt and bash permission gate.",
      "acceptanceCriteria": [
        "agent/orchestrator/src/agent-loop.ts exports executeTask(task) function that: creates AgentContext with system prompt + tools, runs agentLoop(), collects results via events, returns structured result",
        "agent/orchestrator/src/prompt.ts exports TASK_PROMPT and HEARTBEAT_PROMPT as string constants",
        "TASK_PROMPT defines: agent role (infrastructure execution engine), rules (use custom tools for changes, bash for diagnostics only), safety (never delete user data), structured output (always call report_result)",
        "agent/orchestrator/src/bash-gate.ts exports bashPermissionGate() that blocks: rm -rf /, docker rm -f, docker system prune, mkfs, dd if=, chmod 777 /, wget|sh, curl|sh",
        "Model fallback: tries minimax-m2.5-free first, catches model errors, falls back to glm-5-free then gemini-3-flash",
        "Agent loop integrates observability: createTracer + enforceCostPolicy + trackPerformance + captureDebugTranscript for every run",
        "AbortController with 5-minute timeout on each task",
        "Max 20 turns per task (prevent infinite loops)",
        "npx tsc --noEmit passes"
      ],
      "priority": 7,
      "passes": true,
      "notes": "The bash tool should be created using the patterns from pi-agent-core. Since we're at Layer 2 (agentLoop), bash is a regular Tool definition with execute() that calls child_process.exec. The permission gate runs inside execute() BEFORE the actual command. System prompt is ~500 tokens max."
    },
    {
      "id": "US-008",
      "title": "DB Task Queue + API Route Adaptation",
      "description": "As a developer, I need a DB-based task queue and modified API routes that submit tasks instead of executing directly.",
      "acceptanceCriteria": [
        "agent/frontend/prisma/schema.prisma adds Task model: id (cuid), type (String), params (Json), status (String: pending/processing/completed/failed), instanceId (String?), userId (String), result (Json?), error (String?), traceId (String?), createdAt, updatedAt",
        "prisma db push succeeds (or prisma generate if using migrations)",
        "agent/orchestrator/src/task-queue.ts exports: enqueueTask(), pollAndProcessTasks() functions",
        "pollAndProcessTasks polls every 2 seconds for pending tasks, claims one, calls executeTask()",
        "POST /api/instances modified: creates Instance record + enqueues 'instance_create' task → returns 202 with instance (status: creating)",
        "POST /api/instances/[id]/start modified: enqueues 'instance_start' task → returns 202",
        "POST /api/instances/[id]/stop modified: enqueues 'instance_stop' task → returns 202",
        "DELETE /api/instances/[id] modified: enqueues 'instance_delete' task → returns 202",
        "GET endpoints unchanged (still read directly from DB)",
        "cd agent/frontend && npx tsc --noEmit passes",
        "cd agent/frontend && npm run build passes",
        "cd agent/orchestrator && npx tsc --noEmit passes"
      ],
      "priority": 8,
      "passes": true,
      "notes": "Task queue uses simple Prisma queries: findFirst where status=pending ordered by createdAt, update to processing. On completion, update to completed/failed with result/error. The frontend returns 202 Accepted since the actual work happens async. Frontend polling on GET /api/instances will show updated status as agent completes work."
    },
    {
      "id": "US-009",
      "title": "Heartbeat Autonomous Loop",
      "description": "As a developer, I need a 60-second heartbeat loop that auto-checks all instances and self-heals issues.",
      "acceptanceCriteria": [
        "agent/orchestrator/src/heartbeat.ts exports heartbeatLoop() that runs indefinitely",
        "Every 60 seconds: queries all instances with status running or creating",
        "For each instance: agent uses bash to docker inspect and curl health check",
        "Crashed containers (exited/dead) → agent calls instance_start to restart",
        "Missing containers (not_found) → agent sets status=error with diagnostic note",
        "Consecutive 3 restart failures for same instance → stop retrying, mark as error",
        "DB-Docker status mismatch → agent corrects DB",
        "Nginx port map verified each cycle → calls nginx_sync if stale",
        "Each heartbeat cycle generates a lightweight trace",
        "Cost cap per heartbeat cycle: $0.05 (enforced via cost monitor)",
        "npx tsc --noEmit passes"
      ],
      "priority": 9,
      "passes": true,
      "notes": "Use a separate retry counter stored in-memory (Map<instanceId, failCount>). Reset on successful start. Heartbeat prompt is intentionally simple and short to minimize token usage. Uses the same agentLoop but with HEARTBEAT_PROMPT."
    },
    {
      "id": "US-010",
      "title": "Production Alerting + Eval Dataset",
      "description": "As a developer, I need alert rules and an eval dataset for continuous quality monitoring of agent behavior.",
      "acceptanceCriteria": [
        "agent/orchestrator/src/observability/alerting.ts defines AlertRule[] with: high_cost (>$0.50), high_turn_count (>15), tool_error_rate (>30%), slow_execution (>2min)",
        "evaluateAlerts(trace) checks all rules and writes alerts to /var/log/pideploy/alerts.jsonl",
        "agent/orchestrator/src/observability/eval.ts defines EvalCase[] with at least 5 cases: create_success, create_failure_recovery, heartbeat_normal, heartbeat_recovery, delete_flow",
        "evaluateAgentRun(trace, expectedTools) returns EvalResult with score, passed, checks",
        "Eval checks: completion, expected_tools, cost_reasonable, no_loops",
        "npx tsc --noEmit passes"
      ],
      "priority": 10,
      "passes": true,
      "notes": "These are the Pattern 7 (alerting) and Pattern 9 (eval) from observability.md. Alert actions write to file for now (can be upgraded to Discord/webhook later). Eval dataset is checked programmatically, not run against live LLM in this story."
    },
    {
      "id": "US-011",
      "title": "Orchestrator Service Entry Point + Integration Test",
      "description": "As a developer, I need the orchestrator running as a complete service and verified end-to-end.",
      "acceptanceCriteria": [
        "agent/orchestrator/src/index.ts is the full service entry: initializes Prisma, starts task queue poller, starts heartbeat loop, handles SIGTERM graceful shutdown",
        "npx tsx src/index.ts starts without errors and logs 'piDeploy Orchestrator started'",
        "Service processes a manually inserted task: INSERT INTO Task (type: 'instance_create', params, userId, status: 'pending') → orchestrator picks up, processes, generates trace",
        "Trace JSON file appears in /var/log/pideploy/traces/",
        "Service responds to SIGTERM by stopping loops and disconnecting Prisma",
        "cd agent/orchestrator && npx tsc --noEmit passes",
        "agent/orchestrator/scripts/start.sh created for running the service"
      ],
      "priority": 11,
      "passes": true,
      "notes": "The integration test is manual: start the service, insert a task via psql or prisma studio, watch it get processed. Check that trace file is generated. This story also ensures all modules compose correctly in the real entry point."
    },
    {
      "id": "US-012",
      "title": "E2E Browser Test — Full Lifecycle",
      "description": "As a developer, I need an end-to-end test using agent-browser that verifies the complete instance lifecycle through the UI.",
      "acceptanceCriteria": [
        "agent/scripts/e2e-test.sh created as executable bash script",
        "Script starts: agent/frontend (npm run dev) + agent/orchestrator (npx tsx src/index.ts) in background",
        "Script uses agent-browser to: open dashboard, verify page loads",
        "Script creates a test instance via curl POST /api/instances (with test params)",
        "Script polls GET /api/instances until status=running (timeout 120s)",
        "Script uses agent-browser to verify instance appears on dashboard",
        "Script stops instance via curl POST /api/instances/{id}/stop",
        "Script polls until status=stopped",
        "Script deletes instance via curl DELETE /api/instances/{id}",
        "Script uses agent-browser to verify instance removed from dashboard",
        "Script cleans up background processes on exit (trap)",
        "Script exits 0 on success, non-zero on failure"
      ],
      "priority": 12,
      "passes": true,
      "notes": "The E2E test skips Clerk auth by using a test-mode header or environment variable that the API routes check. If Clerk is enforced, the test can use the admin sync secret as a bypass. agent-browser commands: open, snapshot -i, get text, screenshot for verification."
    }
  ]
}