diff --git a/docs/plans/2026-03-20-jam-agent-engine.md b/docs/plans/2026-03-20-jam-agent-engine.md new file mode 100644 index 0000000..a1064aa --- /dev/null +++ b/docs/plans/2026-03-20-jam-agent-engine.md @@ -0,0 +1,1513 @@ +# Jam Agent Engine — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Build a shared agent engine powering `jam go` (interactive) and `jam run` (one-shot) with orchestrated parallel workers, tiered permissions, OS sandboxing, multimodal image input, and workspace intelligence. + +**Architecture:** New `src/agent/` module with orchestrator→worker pattern. Orchestrator decomposes tasks, dispatches workers in dependency order (parallel when independent), merges results. Each worker runs a focused agentic loop scoped to one subtask. Reuses existing tools, providers, memory, and planning infrastructure. + +**Tech Stack:** TypeScript, vitest, Zod, Commander.js, existing ProviderAdapter/ToolRegistry/WorkingMemory + +**Spec:** `docs/specs/2026-03-20-jam-agent-engine-design.md` + +--- + +## Dependency Graph + +``` +Task 1 (types) ──┬── Task 4 (permissions) ── Task 5 (sandbox) + ├── Task 6 (multimodal) + ├── Task 7 (file-lock) + ├── Task 8 (provider-pool) + └── Task 10 (workspace-intel profile) +Task 2 (errors) │ +Task 3 (config) │ +Task 9 (conventions)──┘ + │ + Task 11 (planner) + │ + Task 12 (worker) + │ + Task 13 (orchestrator) + │ + ┌────┴────┐ + Task 14 Task 15 + (jam run) (jam go) + │ + Task 16 (barrel + integration) +``` + +Tasks 1-3 must go first. Tasks 4-10 depend on Tasks 1-3 and are parallelizable among themselves (except Task 10 depends on Task 9). Tasks 11+ are sequential. Task 13.5 (progress output) can be built alongside Task 13. + +--- + +### Task 1: Agent Types + +**Files:** +- Create: `src/agent/types.ts` +- Test: `src/agent/types.test.ts` + +- [ ] **Step 1: Write the type definitions file** + +```typescript +// src/agent/types.ts +import type { TokenUsage } from '../providers/base.js'; + +// ── Permission Tiers ──────────────────────────────────────────────── + +export type PermissionTier = 'safe' | 'moderate' | 'dangerous'; + +export type AgentMode = 'supervised' | 'auto'; + +// ── Task Planning ─────────────────────────────────────────────────── + +export interface TaskPlan { + goal: string; + subtasks: Subtask[]; + dependencyGraph: Map; // subtaskId → [blockedBy] +} + +export interface Subtask { + id: string; + description: string; + files: FileOwnership[]; + estimatedRounds: number; + validationCommand?: string; +} + +export interface FileOwnership { + path: string; + mode: 'create' | 'modify' | 'read-only'; +} + +// ── Worker ────────────────────────────────────────────────────────── + +export interface WorkerOptions { + subtask: Subtask; + context: SubtaskContext; + signal: AbortSignal; +} + +export interface SubtaskContext { + priorSummary: string; + filesAvailable: string[]; + planReminder: string; +} + +export interface WorkerResult { + subtaskId: string; + status: 'completed' | 'failed' | 'blocked' | 'cancelled'; + filesChanged: FileChange[]; + summary: string; + tokensUsed: TokenUsage; + error?: string; +} + +export interface FileChange { + path: string; + action: 'created' | 'modified' | 'deleted'; + diff: string; +} + +// ── File Lock ─────────────────────────────────────────────────────── + +export type FileLockRequest = { + workerId: string; + path: string; + reason: string; +}; + +export type FileLockResponse = { + granted: boolean; + waitForWorker?: string; +}; + +// ── Sandbox ───────────────────────────────────────────────────────── + +export interface SandboxConfig { + filesystem: 'workspace-only' | 'unrestricted'; + network: 'blocked' | 'allowed'; + timeout: number; +} + +// ── Token Budget ──────────────────────────────────────────────────── + +export interface TokenBudget { + maxPerWorker: number; + maxTotal: number; + spent: number; + remaining: number; +} + +// ── Workspace Profile ─────────────────────────────────────────────── + +export interface WorkspaceProfile { + language: string; + framework?: string; + monorepo: boolean; + srcLayout: string; + entryPoints: string[]; + codeStyle: { + indent: 'tabs' | 'spaces'; + indentSize: number; + quotes: 'single' | 'double'; + semicolons: boolean; + trailingCommas: boolean; + namingConvention: 'camelCase' | 'snake_case' | 'PascalCase'; + }; + fileNaming: string; + exportStyle: 'named' | 'default' | 'barrel'; + importStyle: 'relative' | 'alias'; + errorHandling: string; + logging: string; + configPattern: string; + testFramework: string; + testLocation: string; + testNaming: string; + testStyle: string; + coverageThreshold?: number; + testCommand: string; + commitConvention: string; + branchPattern: string; + packageManager: string; + linter?: string; + formatter?: string; + typeChecker?: string; + buildTool?: string; +} + +// ── Helpers ───────────────────────────────────────────────────────── + +/** Validate a dependency graph is a DAG (no cycles). Returns null if valid, or the cycle path if invalid. */ +export function validateDAG(graph: Map): string[] | null { + const visited = new Set(); + const stack = new Set(); + const parent = new Map(); // tracks DFS parent for cycle reconstruction + + function dfs(node: string): string | null { + if (stack.has(node)) return node; // cycle back-edge found + if (visited.has(node)) return null; + + visited.add(node); + stack.add(node); + + for (const dep of graph.get(node) ?? []) { + parent.set(dep, node); + const cycleNode = dfs(dep); + if (cycleNode !== null) return cycleNode; + } + + stack.delete(node); + return null; + } + + for (const node of graph.keys()) { + if (!visited.has(node)) { + const cycleNode = dfs(node); + if (cycleNode !== null) { + // Reconstruct cycle path from the stack + const cyclePath = [cycleNode]; + for (const n of [...stack].reverse()) { + cyclePath.push(n); + if (n === cycleNode) break; + } + return cyclePath.reverse(); + } + } + } + return null; +} + +// dependencyGraph semantics: subtaskId → list of subtask IDs that must +// complete BEFORE this one can start (i.e., prerequisites, not dependents). + +/** Topological sort of subtask IDs. Throws if graph has cycles. */ +export function topologicalSort(graph: Map): string[] { + const cycle = validateDAG(graph); + if (cycle) throw new Error(`Cycle detected: ${cycle.join(' → ')}`); + + const sorted: string[] = []; + const visited = new Set(); + + function visit(node: string): void { + if (visited.has(node)) return; + visited.add(node); + for (const dep of graph.get(node) ?? []) { + visit(dep); + } + sorted.push(node); + } + + for (const node of graph.keys()) visit(node); + return sorted; +} +``` + +- [ ] **Step 2: Write tests for DAG validation and topological sort** + +```typescript +// src/agent/types.test.ts +import { describe, it, expect } from 'vitest'; +import { validateDAG, topologicalSort } from './types.js'; + +describe('validateDAG', () => { + it('returns null for valid DAG', () => { + const graph = new Map([ + ['a', []], + ['b', ['a']], + ['c', ['b']], + ]); + expect(validateDAG(graph)).toBeNull(); + }); + + it('returns cycle path for cyclic graph', () => { + const graph = new Map([ + ['a', ['c']], + ['b', ['a']], + ['c', ['b']], + ]); + expect(validateDAG(graph)).not.toBeNull(); + }); + + it('handles empty graph', () => { + expect(validateDAG(new Map())).toBeNull(); + }); + + it('handles self-loop', () => { + const graph = new Map([['a', ['a']]]); + expect(validateDAG(graph)).not.toBeNull(); + }); +}); + +describe('topologicalSort', () => { + it('sorts linear chain', () => { + const graph = new Map([ + ['a', []], + ['b', ['a']], + ['c', ['b']], + ]); + const sorted = topologicalSort(graph); + expect(sorted.indexOf('a')).toBeLessThan(sorted.indexOf('b')); + expect(sorted.indexOf('b')).toBeLessThan(sorted.indexOf('c')); + }); + + it('sorts diamond dependency', () => { + const graph = new Map([ + ['a', []], + ['b', ['a']], + ['c', ['a']], + ['d', ['b', 'c']], + ]); + const sorted = topologicalSort(graph); + expect(sorted.indexOf('a')).toBeLessThan(sorted.indexOf('b')); + expect(sorted.indexOf('a')).toBeLessThan(sorted.indexOf('c')); + expect(sorted.indexOf('b')).toBeLessThan(sorted.indexOf('d')); + expect(sorted.indexOf('c')).toBeLessThan(sorted.indexOf('d')); + }); + + it('throws on cycle', () => { + const graph = new Map([ + ['a', ['b']], + ['b', ['a']], + ]); + expect(() => topologicalSort(graph)).toThrow('Cycle detected'); + }); +}); +``` + +- [ ] **Step 3: Run tests** + +Run: `npx vitest run src/agent/types.test.ts` +Expected: All 7 tests pass. + +- [ ] **Step 4: Commit** + +```bash +git add src/agent/types.ts src/agent/types.test.ts +git commit -m "feat(agent): add shared types with DAG validation and topological sort" +``` + +--- + +### Task 2: Agent Error Codes + +**Files:** +- Modify: `src/utils/errors.ts` (ErrorCode type + ERROR_HINTS) + +- [ ] **Step 1: Write failing test** + +```typescript +// src/agent/errors.test.ts +import { describe, it, expect } from 'vitest'; +import { JamError } from '../utils/errors.js'; + +const AGENT_CODES = [ + 'AGENT_PLAN_FAILED', + 'AGENT_PLAN_CYCLE', + 'AGENT_WORKER_TIMEOUT', + 'AGENT_WORKER_CANCELLED', + 'AGENT_FILE_LOCK_CONFLICT', + 'AGENT_FILE_LOCK_TIMEOUT', + 'AGENT_BUDGET_EXCEEDED', + 'AGENT_SANDBOX_UNAVAILABLE', + 'AGENT_RATE_LIMITED', + 'AGENT_MERGE_CONFLICT', +] as const; + +describe('agent error codes', () => { + for (const code of AGENT_CODES) { + it(`creates JamError with code ${code}`, () => { + const err = new JamError(`test ${code}`, code); + expect(err.code).toBe(code); + expect(err.hint).toBeDefined(); + }); + } +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `npx vitest run src/agent/errors.test.ts` +Expected: FAIL — error codes not recognized by TypeScript. + +- [ ] **Step 3: Add agent error codes to ErrorCode type and ERROR_HINTS** + +Modify `src/utils/errors.ts`: +- Add to `ErrorCode` type union: `'AGENT_PLAN_FAILED' | 'AGENT_PLAN_CYCLE' | 'AGENT_WORKER_TIMEOUT' | 'AGENT_WORKER_CANCELLED' | 'AGENT_FILE_LOCK_CONFLICT' | 'AGENT_FILE_LOCK_TIMEOUT' | 'AGENT_BUDGET_EXCEEDED' | 'AGENT_SANDBOX_UNAVAILABLE' | 'AGENT_RATE_LIMITED' | 'AGENT_MERGE_CONFLICT'` +- Add to `ERROR_HINTS`: + - `AGENT_PLAN_FAILED`: `'The AI could not generate a valid execution plan. Try simplifying your task or breaking it into smaller pieces.'` + - `AGENT_PLAN_CYCLE`: `'The execution plan has circular dependencies. This is a bug — please report it.'` + - `AGENT_WORKER_TIMEOUT`: `'A worker exceeded its round budget. Try increasing maxRoundsPerWorker in config.'` + - `AGENT_WORKER_CANCELLED`: `'Worker was cancelled. This may be due to a dependency failure or user abort.'` + - `AGENT_FILE_LOCK_CONFLICT`: `'Two workers tried to edit the same file simultaneously. The orchestrator resolved the conflict.'` + - `AGENT_FILE_LOCK_TIMEOUT`: `'A file lock request timed out. Another worker may be stuck.'` + - `AGENT_BUDGET_EXCEEDED`: `'Token budget exceeded. Reduce task scope or increase maxTotal in agent config.'` + - `AGENT_SANDBOX_UNAVAILABLE`: `'OS sandbox not available. Running with permissions-only. Run jam doctor to check.'` + - `AGENT_RATE_LIMITED`: `'Provider rate limit hit. Workers paused automatically. Wait and retry.'` + - `AGENT_MERGE_CONFLICT`: `'Workers produced conflicting file edits. Manual resolution may be needed.'` + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/errors.test.ts` +Expected: All 10 tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/utils/errors.ts src/agent/errors.test.ts +git commit -m "feat(agent): add agent-specific error codes and hints" +``` + +--- + +### Task 3: Agent Config Schema + +**Files:** +- Modify: `src/config/schema.ts` +- Test: `src/agent/config.test.ts` + +- [ ] **Step 1: Write failing test** + +```typescript +// src/agent/config.test.ts +import { describe, it, expect } from 'vitest'; +import { JamConfigSchema } from '../config/schema.js'; + +describe('agent config schema', () => { + it('provides defaults when agent section is omitted', () => { + const result = JamConfigSchema.parse({}); + expect(result.agent).toBeDefined(); + expect(result.agent.maxWorkers).toBe(3); + expect(result.agent.defaultMode).toBe('supervised'); + expect(result.agent.maxRoundsPerWorker).toBe(20); + expect(result.agent.sandbox.filesystem).toBe('workspace-only'); + expect(result.agent.sandbox.network).toBe('allowed'); + expect(result.agent.sandbox.timeout).toBe(60000); + expect(result.agent.permissions.safe).toEqual([]); + expect(result.agent.permissions.dangerous).toEqual([]); + }); + + it('validates custom agent config', () => { + const result = JamConfigSchema.parse({ + agent: { + maxWorkers: 5, + defaultMode: 'auto', + permissions: { safe: ['npm test'], dangerous: ['docker rm'] }, + sandbox: { filesystem: 'unrestricted', network: 'blocked', timeout: 30000 }, + }, + }); + expect(result.agent.maxWorkers).toBe(5); + expect(result.agent.defaultMode).toBe('auto'); + expect(result.agent.permissions.safe).toEqual(['npm test']); + expect(result.agent.sandbox.network).toBe('blocked'); + }); + + it('rejects invalid mode', () => { + expect(() => + JamConfigSchema.parse({ agent: { defaultMode: 'yolo' } }) + ).toThrow(); + }); + + it('rejects maxWorkers < 1', () => { + expect(() => + JamConfigSchema.parse({ agent: { maxWorkers: 0 } }) + ).toThrow(); + }); + + it('rejects maxRoundsPerWorker out of bounds', () => { + expect(() => + JamConfigSchema.parse({ agent: { maxRoundsPerWorker: 0 } }) + ).toThrow(); + expect(() => + JamConfigSchema.parse({ agent: { maxRoundsPerWorker: 51 } }) + ).toThrow(); + }); +}); +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `npx vitest run src/agent/config.test.ts` +Expected: FAIL — `result.agent` is undefined. + +- [ ] **Step 3: Add AgentConfigSchema to config/schema.ts** + +Add before `JamConfigSchema`: + +```typescript +const AgentPermissionsSchema = z.object({ + safe: z.array(z.string()).default([]), + dangerous: z.array(z.string()).default([]), +}); + +const AgentSandboxSchema = z.object({ + filesystem: z.enum(['workspace-only', 'unrestricted']).default('workspace-only'), + network: z.enum(['allowed', 'blocked']).default('allowed'), + timeout: z.number().int().positive().default(60000), +}); + +export const AgentConfigSchema = z.object({ + maxWorkers: z.number().int().min(1).max(10).default(3), + defaultMode: z.enum(['supervised', 'auto']).default('supervised'), + maxRoundsPerWorker: z.number().int().min(1).max(50).default(20), + permissions: AgentPermissionsSchema.default({}), + sandbox: AgentSandboxSchema.default({}), +}); +``` + +Add to `JamConfigSchema`: `agent: AgentConfigSchema.default({}),` + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/config.test.ts` +Expected: All 4 tests pass. + +- [ ] **Step 5: Run full test suite to verify no regressions** + +Run: `npx vitest run` +Expected: All existing tests pass. + +- [ ] **Step 6: Commit** + +```bash +git add src/config/schema.ts src/agent/config.test.ts +git commit -m "feat(agent): add agent config schema with permissions, sandbox, and worker settings" +``` + +--- + +### Task 4: Tiered Permissions + +**Files:** +- Create: `src/agent/permissions.ts` +- Test: `src/agent/permissions.test.ts` + +- [ ] **Step 1: Write tests** + +```typescript +// src/agent/permissions.test.ts +import { describe, it, expect } from 'vitest'; +import { classifyCommand, PermissionClassifier, ApprovalTracker, isHardBlocked } from './permissions.js'; + +describe('classifyCommand', () => { + it('classifies safe commands', () => { + expect(classifyCommand('ls')).toBe('safe'); + expect(classifyCommand('cat file.txt')).toBe('safe'); + expect(classifyCommand('git status')).toBe('safe'); + expect(classifyCommand('git diff')).toBe('safe'); + expect(classifyCommand('npm test')).toBe('safe'); + expect(classifyCommand('npx vitest run')).toBe('safe'); + expect(classifyCommand('node script.js')).toBe('safe'); + }); + + it('classifies moderate commands', () => { + expect(classifyCommand('npm install express')).toBe('moderate'); + expect(classifyCommand('git add .')).toBe('moderate'); + expect(classifyCommand('git commit -m "msg"')).toBe('moderate'); + expect(classifyCommand('mkdir -p src/agent')).toBe('moderate'); + expect(classifyCommand('rm file.txt')).toBe('moderate'); + expect(classifyCommand('curl https://example.com')).toBe('moderate'); + }); + + it('classifies dangerous commands', () => { + expect(classifyCommand('rm -rf node_modules')).toBe('dangerous'); + expect(classifyCommand('git push origin main')).toBe('dangerous'); + expect(classifyCommand('git reset --hard')).toBe('dangerous'); + expect(classifyCommand('chmod 755 script.sh')).toBe('dangerous'); + expect(classifyCommand('echo "x" | bash')).toBe('dangerous'); + }); +}); + +describe('PermissionClassifier', () => { + it('respects custom safe overrides', () => { + const classifier = new PermissionClassifier({ + safe: ['docker build'], + dangerous: [], + }); + expect(classifier.classify('docker build .')).toBe('safe'); + }); + + it('respects custom dangerous overrides', () => { + const classifier = new PermissionClassifier({ + safe: [], + dangerous: ['kubectl delete'], + }); + expect(classifier.classify('kubectl delete pod foo')).toBe('dangerous'); + }); + + it('custom overrides take precedence over defaults', () => { + const classifier = new PermissionClassifier({ + safe: ['git push'], + dangerous: [], + }); + // git push is normally dangerous, but user overrode it + expect(classifier.classify('git push origin main')).toBe('safe'); + }); + + it('hard-block cannot be overridden by custom safe list', () => { + const classifier = new PermissionClassifier({ + safe: ['sudo'], + dangerous: [], + }); + expect(classifier.classify('sudo rm -rf /')).toBe('blocked'); + }); +}); + +describe('isHardBlocked', () => { + it('blocks sudo', () => { + expect(isHardBlocked('sudo apt install')).toBe(true); + }); + + it('does not block normal commands', () => { + expect(isHardBlocked('npm test')).toBe(false); + }); +}); + +describe('ApprovalTracker', () => { + it('tracks approvals by command type', () => { + const tracker = new ApprovalTracker(); + expect(tracker.isApproved('git push origin main')).toBe(false); + tracker.approve('git push origin main'); + expect(tracker.isApproved('git push origin develop')).toBe(true); + }); + + it('treats git push --force as different type from git push', () => { + const tracker = new ApprovalTracker(); + tracker.approve('git push origin main'); + // "git push" is approved, but this normalizes to first 2 words + // so "git push --force" normalizes to "git push" — same type + // This is intentional: --force is part of args, not the command type + expect(tracker.isApproved('git push --force')).toBe(true); + }); +}); +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npx vitest run src/agent/permissions.test.ts` +Expected: FAIL — module not found. + +- [ ] **Step 3: Implement permissions.ts** + +```typescript +// src/agent/permissions.ts +import type { PermissionTier } from './types.js'; + +const SAFE_PATTERNS = [ + /^ls\b/, /^cat\b/, /^head\b/, /^tail\b/, /^wc\b/, /^echo\b/, + /^git\s+(status|diff|log|show|branch|tag|remote|rev-parse)\b/, + /^npm\s+test\b/, /^npx\s+(vitest|jest|tsc|eslint|prettier)\b/, + /^node\b/, /^deno\b/, /^bun\s+(test|run)\b/, + /^cargo\s+test\b/, /^go\s+test\b/, /^python\s+-m\s+pytest\b/, + /^pwd$/, /^whoami$/, /^date$/, /^which\b/, /^env$/, + /^find\b/, /^grep\b/, /^rg\b/, /^fd\b/, +]; + +const DANGEROUS_PATTERNS = [ + /\brm\s+(-[a-zA-Z]*r|-[a-zA-Z]*f|--recursive|--force)\b/, + /\bgit\s+(push|reset|rebase|force-push)\b/, + /\bgit\s+checkout\s+--?\s/, + /\bchmod\b/, /\bchown\b/, + /\bsudo\b/, /\bsu\s/, + /\|/, // piped commands + /\bgit\s+branch\s+-[dD]\b/, +]; + +export function classifyCommand(command: string): PermissionTier { + const trimmed = command.trim(); + + for (const pattern of DANGEROUS_PATTERNS) { + if (pattern.test(trimmed)) return 'dangerous'; + } + + for (const pattern of SAFE_PATTERNS) { + if (pattern.test(trimmed)) return 'safe'; + } + + return 'moderate'; +} + +/** Check against the unoverridable hard-block list from run_command.ts. + * These are NEVER allowed, regardless of user config or mode. */ +export function isHardBlocked(command: string): boolean { + // Import DANGEROUS_PATTERNS from src/tools/run_command.ts + // These patterns (rm -rf /, sudo, mkfs, etc.) are the safety floor. + const HARD_BLOCK = [ + /\brm\s+(-\w*r\w*f|-\w*f\w*r)\s+\/\s*$/, + /\bsudo\b/, /\bsu\s+-/, /\bmkfs\b/, /\bdd\b/, + /\bchmod\s+777\s+\//, /\bshutdown\b/, /\breboot\b/, + ]; + return HARD_BLOCK.some(p => p.test(command.trim())); +} + +export class PermissionClassifier { + private customSafe: string[]; + private customDangerous: string[]; + + constructor(overrides: { safe: string[]; dangerous: string[] }) { + this.customSafe = overrides.safe; + this.customDangerous = overrides.dangerous; + } + + classify(command: string): PermissionTier | 'blocked' { + const trimmed = command.trim(); + + // Hard-block check FIRST — unoverridable safety floor + if (isHardBlocked(trimmed)) return 'blocked'; + + // Custom overrides take precedence over defaults + for (const pattern of this.customSafe) { + if (trimmed.startsWith(pattern)) return 'safe'; + } + for (const pattern of this.customDangerous) { + if (trimmed.startsWith(pattern)) return 'dangerous'; + } + + return classifyCommand(trimmed); + } +} + +/** Tracks session-level approvals for "confirm once per type" in auto mode. */ +export class ApprovalTracker { + private approved = new Set(); + + /** Normalize command to its "type" (e.g., "git push" regardless of args). */ + private commandType(command: string): string { + return command.trim().split(/\s+/).slice(0, 2).join(' '); + } + + isApproved(command: string): boolean { + return this.approved.has(this.commandType(command)); + } + + approve(command: string): void { + this.approved.add(this.commandType(command)); + } +} +``` + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/permissions.test.ts` +Expected: All tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/permissions.ts src/agent/permissions.test.ts +git commit -m "feat(agent): add tiered permission classifier (safe/moderate/dangerous)" +``` + +--- + +### Task 5: Sandbox + +**Files:** +- Create: `src/agent/sandbox.ts` +- Test: `src/agent/sandbox.test.ts` + +- [ ] **Step 1: Write tests** + +```typescript +// src/agent/sandbox.test.ts +import { describe, it, expect, vi } from 'vitest'; +import { detectSandboxStrategy, buildSandboxArgs } from './sandbox.js'; + +describe('detectSandboxStrategy', () => { + it('returns sandbox-exec on darwin', () => { + expect(detectSandboxStrategy('darwin')).toBe('sandbox-exec'); + }); + + it('returns permissions-only on win32', () => { + expect(detectSandboxStrategy('win32')).toBe('permissions-only'); + }); + + it('returns unshare or permissions-only on linux', () => { + const result = detectSandboxStrategy('linux'); + expect(['unshare', 'firejail', 'permissions-only']).toContain(result); + }); +}); + +describe('buildSandboxArgs', () => { + it('wraps command with sandbox-exec on darwin', () => { + const result = buildSandboxArgs('npm test', '/workspace', { + filesystem: 'workspace-only', + network: 'allowed', + timeout: 60000, + }, 'sandbox-exec'); + expect(result.command).toBe('sandbox-exec'); + expect(result.args).toContain('-p'); + }); + + it('returns passthrough for permissions-only', () => { + const result = buildSandboxArgs('npm test', '/workspace', { + filesystem: 'workspace-only', + network: 'allowed', + timeout: 60000, + }, 'permissions-only'); + expect(result.command).toBe('npm'); + expect(result.args[0]).toBe('test'); + }); +}); +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npx vitest run src/agent/sandbox.test.ts` +Expected: FAIL — module not found. + +- [ ] **Step 3: Implement sandbox.ts** + +Implement `detectSandboxStrategy()`, `buildSandboxArgs()`, and `executeSandboxed()` per spec Section 8. For macOS: generate `sandbox-exec` profile scoping filesystem to workspace. For Linux: try `unshare`/`firejail` detection. For Windows/fallback: passthrough with logging. + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/sandbox.test.ts` +Expected: All tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/sandbox.ts src/agent/sandbox.test.ts +git commit -m "feat(agent): add OS-level command sandbox (macOS/Linux with fallback)" +``` + +--- + +### Task 6: Multimodal Input + +**Files:** +- Create: `src/agent/multimodal.ts` +- Modify: `src/providers/base.ts` (add `supportsVision` to `ProviderInfo` — no Message type changes) +- Test: `src/agent/multimodal.test.ts` + +**Key design decision:** Do NOT change `Message.content` from `string` to a union type. This would break ~50 call sites. Instead, define `AgentMessage` with `content: MessageContent` in `src/agent/types.ts`, used only within the agent module. `flattenForProvider()` converts `AgentMessage[]` → `Message[]` before passing to the adapter. + +- [ ] **Step 1: Write tests** + +```typescript +// src/agent/multimodal.test.ts +import { describe, it, expect } from 'vitest'; +import { getTextContent, hasImages, flattenForProvider, loadImage } from './multimodal.js'; + +describe('getTextContent', () => { + it('returns string content as-is', () => { + expect(getTextContent({ role: 'user', content: 'hello' })).toBe('hello'); + }); + + it('extracts text from ContentPart array', () => { + expect(getTextContent({ + role: 'user', + content: [ + { type: 'text', text: 'Describe this' }, + { type: 'image', image: { data: 'abc', mediaType: 'image/png' } }, + ], + })).toBe('Describe this'); + }); + + it('joins multiple text parts', () => { + expect(getTextContent({ + role: 'user', + content: [ + { type: 'text', text: 'Part 1' }, + { type: 'text', text: 'Part 2' }, + ], + })).toBe('Part 1Part 2'); + }); +}); + +describe('hasImages', () => { + it('returns false for string content', () => { + expect(hasImages({ role: 'user', content: 'hello' })).toBe(false); + }); + + it('returns true when content has image parts', () => { + expect(hasImages({ + role: 'user', + content: [{ type: 'image', image: { data: 'abc', mediaType: 'image/png' } }], + })).toBe(true); + }); +}); + +describe('flattenForProvider', () => { + it('returns messages unchanged when supportsVision is true', () => { + const msgs = [{ role: 'user' as const, content: [{ type: 'text' as const, text: 'hi' }] }]; + expect(flattenForProvider(msgs, true)).toBe(msgs); + }); + + it('flattens multimodal to text with notice when no vision', () => { + const msgs = [{ + role: 'user' as const, + content: [ + { type: 'text' as const, text: 'Describe this' }, + { type: 'image' as const, image: { data: 'abc', mediaType: 'image/png' as const } }, + ], + }]; + const result = flattenForProvider(msgs, false); + expect(typeof result[0].content).toBe('string'); + expect(result[0].content).toContain('Describe this'); + expect(result[0].content).toContain('[Image provided'); + }); +}); + +describe('loadImage', () => { + it('loads a local file and returns base64 + mediaType', async () => { + // Create a tiny 1x1 PNG in memory for testing + const { writeFile, unlink } = await import('node:fs/promises'); + const path = '/tmp/test-jam-image.png'; + const pngHeader = Buffer.from([137, 80, 78, 71, 13, 10, 26, 10]); + await writeFile(path, pngHeader); + const result = await loadImage(path); + expect(result.mediaType).toBe('image/png'); + expect(result.data).toBeTruthy(); + await unlink(path); + }); +}); +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npx vitest run src/agent/multimodal.test.ts` +Expected: FAIL — module not found. + +- [ ] **Step 3: Add multimodal types to src/agent/types.ts and ProviderInfo** + +Add to `src/agent/types.ts`: + +```typescript +export interface ContentPart { + type: 'text' | 'image'; + text?: string; + image?: { + data: string; + mediaType: 'image/png' | 'image/jpeg' | 'image/gif' | 'image/webp'; + }; +} + +export type MessageContent = string | ContentPart[]; + +/** Agent-internal message type supporting multimodal content. + * Converted to standard Message (string content) via flattenForProvider(). */ +export interface AgentMessage { + role: 'system' | 'user' | 'assistant'; + content: MessageContent; +} +``` + +Add `supportsVision?: boolean` to `ProviderInfo` in `src/providers/base.ts`. Do NOT change `Message.content` type. + +- [ ] **Step 4: Implement multimodal.ts** + +Implement `getTextContent()`, `hasImages()`, `flattenForProvider()`, `loadImage()` (reads file, detects media type from extension, returns base64). + +- [ ] **Step 5: Run tests** + +Run: `npx vitest run src/agent/multimodal.test.ts` +Expected: All tests pass. + +- [ ] **Step 6: Run full test suite — no regressions expected** + +Run: `npx vitest run` +Expected: All tests pass. `Message` type in `base.ts` is unchanged. Only `ProviderInfo` got a new optional field. + +- [ ] **Step 7: Commit** + +```bash +git add src/providers/base.ts src/agent/multimodal.ts src/agent/multimodal.test.ts +git commit -m "feat(agent): add multimodal image input with provider fallback" +``` + +--- + +### Task 7: File-Lock Manager + +**Files:** +- Create: `src/agent/file-lock.ts` +- Test: `src/agent/file-lock.test.ts` + +- [ ] **Step 1: Write tests** + +Test ownership assignment, request-grant flow, deadlock detection (cycle in wait graph), release, and timeout. + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npx vitest run src/agent/file-lock.test.ts` + +- [ ] **Step 3: Implement file-lock.ts** + +Implement `FileLockManager` class with: +- `assignOwnership(subtaskId, files: FileOwnership[])` — bulk assign from plan +- `requestFile(request: FileLockRequest): FileLockResponse` — check ownership, detect deadlock via wait-graph cycle detection +- `releaseAll(workerId: string)` — release all locks held by a worker +- `getOwner(path: string): string | undefined` +- Private `detectDeadlock(requestingWorker, waitForWorker): boolean` — DFS on wait graph + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/file-lock.test.ts` +Expected: All tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/file-lock.ts src/agent/file-lock.test.ts +git commit -m "feat(agent): add file-lock manager with deadlock detection" +``` + +--- + +### Task 8: Provider Pool (Semaphore) + +**Files:** +- Create: `src/agent/provider-pool.ts` +- Test: `src/agent/provider-pool.test.ts` + +- [ ] **Step 1: Write tests** + +Test semaphore acquire/release, concurrency limit enforcement, rate-limit pause, and token usage aggregation. + +- [ ] **Step 2: Run tests to verify they fail** + +- [ ] **Step 3: Implement provider-pool.ts** + +Implement `ProviderPool` class with: +- `constructor(adapter: ProviderAdapter, concurrencyLimit: number)` +- `acquire(): Promise` — blocks via promise queue if at limit +- `release(lease: ProviderLease)` — releases slot, resolves next waiter +- `pauseForRateLimit(retryAfterMs: number)` — pauses all acquires +- `getTotalTokens(): TokenUsage` — aggregated across all leases + +`ProviderLease` wraps the adapter so token usage is tracked per-call. + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/provider-pool.test.ts` +Expected: All tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/provider-pool.ts src/agent/provider-pool.test.ts +git commit -m "feat(agent): add provider pool with semaphore-based concurrency control" +``` + +--- + +### Task 9: Conventions Analyzer (src/intel/) + +**Files:** +- Create: `src/intel/conventions.ts` (standalone function, NOT an AnalyzerPlugin — it needs root-level access to package.json, git log, config files, which doesn't fit the per-file `analyzeFile()` interface) +- Modify: `src/intel/index.ts` (add export for `analyzeConventions`) +- Test: `src/intel/conventions.test.ts` + +- [ ] **Step 1: Write tests** + +Test static analysis on the jam-cli project itself (or fixture): detect indent style (spaces/2), quotes (single), semicolons (true), naming convention (camelCase), test framework (vitest), test location (co-located), file naming (kebab-case.ts), package manager (npm), etc. + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npx vitest run src/intel/conventions.test.ts` + +- [ ] **Step 3: Implement conventions.ts** + +Implement `analyzeConventions(root: string)` as a standalone exported function that: +1. Reads `package.json` / `pyproject.toml` / `Cargo.toml` to detect language, packageManager, linter, formatter, typeChecker, buildTool, testFramework, testCommand +2. Reads `.eslintrc*`, `.prettierrc*`, `tsconfig.json`, `biome.json` for code style +3. Samples 5-10 source files (first `.ts`/`.js`/`.py` files found in `src/`), analyzes: indent char/size, quote style, semicolons, trailing commas, naming convention +4. Scans for test directories and test file naming pattern +5. Reads `git log --oneline -20` for commit convention detection +6. Returns partial `WorkspaceProfile` (conventions only, no framework/entryPoints — those come from intel graph) + +Note: This is NOT an `AnalyzerPlugin`. It does not implement `analyzeFile()`. It is a root-level analysis function that reads project config files, samples source files, and queries git. It lives in `src/intel/` because it is code analysis, but it is consumed directly by `src/agent/workspace-intel.ts`. + +- [ ] **Step 4: Add export to src/intel/index.ts** + +Add: `export { analyzeConventions } from './conventions.js';` + +- [ ] **Step 5: Run tests** + +Run: `npx vitest run src/intel/conventions.test.ts` +Expected: All tests pass. + +- [ ] **Step 6: Commit** + +```bash +git add src/intel/conventions.ts src/intel/conventions.test.ts src/intel/index.ts +git commit -m "feat(intel): add conventions analyzer for code style and patterns detection" +``` + +--- + +### Task 10: Workspace Intelligence (Profile Builder) + +**Files:** +- Create: `src/agent/workspace-intel.ts` +- Test: `src/agent/workspace-intel.test.ts` + +- [ ] **Step 1: Write tests** + +Test profile building with cache hit (returns cached), cache miss (rebuilds), hash-based invalidation, integration with intel graph when available, and `formatProfileForPrompt()` output. + +- [ ] **Step 2: Run tests to verify they fail** + +- [ ] **Step 3: Implement workspace-intel.ts** + +Implement: +- `buildWorkspaceProfile(root, adapter?)` — Layer 1: `analyzeConventions()`, Layer 2: load/build intel graph for structure, Layer 3: LLM pattern extraction if cache stale. Returns `WorkspaceProfile`. +- `loadCachedProfile(root): WorkspaceProfile | null` — reads `.jam/workspace-profile.json`, checks hash +- `computeProfileHash(root): string` — hash of package.json + src/ file list + config files +- `formatProfileForPrompt(profile: WorkspaceProfile): string` — formatted string for system prompt injection + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/workspace-intel.test.ts` +Expected: All tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/workspace-intel.ts src/agent/workspace-intel.test.ts +git commit -m "feat(agent): add workspace intelligence with cached profiling" +``` + +--- + +### Task 11: Planner + +**Files:** +- Create: `src/agent/planner.ts` +- Test: `src/agent/planner.test.ts` + +- [ ] **Step 1: Write tests** + +Test plan generation with mocked provider (returns JSON TaskPlan), DAG validation of generated plan, re-prompt on cycle detection, single-subtask optimization, and file ownership extraction. + +- [ ] **Step 2: Run tests to verify they fail** + +- [ ] **Step 3: Implement planner.ts** + +Implement `generateTaskPlan(adapter, prompt, profile, options)`: +1. Build planning prompt with workspace profile context +2. Call `adapter.chatWithTools()` (or `streamCompletion`) with planning prompt requesting JSON output +3. Parse JSON response into `TaskPlan` +4. Validate DAG with `validateDAG()` — if cycle, re-prompt once with "no cycles" constraint +5. Return validated `TaskPlan` +6. On failure, throw `AGENT_PLAN_FAILED` + +Also implement `estimateTokenCost(plan: TaskPlan): number` for pre-execution budgeting. + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/planner.test.ts` +Expected: All tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/planner.ts src/agent/planner.test.ts +git commit -m "feat(agent): add task planner with DAG validation and token cost estimation" +``` + +--- + +### Task 12: Worker + +**Files:** +- Create: `src/agent/worker.ts` +- Test: `src/agent/worker.test.ts` + +- [ ] **Step 1: Write tests** + +Test worker execution with mocked provider: +- Completes a subtask in N rounds (reads file, writes file, returns result) +- Respects `AbortSignal` cancellation — returns `status: 'cancelled'` +- On cancellation, rolls back uncommitted file writes via `git checkout -- ` +- Enforces round budget (stops at `estimatedRounds + 5`) +- At `estimatedRounds * 0.5` with no tool calls, injects correction hint (stuck detection) +- Returns `WorkerResult` with correct `filesChanged` and `summary` +- Runs `validationCommand` if provided +- Handles tool call errors gracefully +- Uses `ApprovalTracker` for "confirm once per type" in auto mode +- Checks `isHardBlocked()` before executing any command + +- [ ] **Step 2: Run tests to verify they fail** + +- [ ] **Step 3: Implement worker.ts** + +Implement `executeWorker(options: WorkerOptions, deps: WorkerDeps): Promise`: + +`WorkerDeps` includes: adapter (via ProviderPool lease), toolRegistry, mcpManager, workspaceRoot, workspaceProfile, agentMode, permissionClassifier, sandboxConfig. + +The execution loop: +1. Build system prompt from workspace profile + subtask description +2. Build initial messages from `SubtaskContext` +3. Create `WorkingMemory` instance +4. Loop up to `estimatedRounds + 5`: + - Check `signal.aborted` → return cancelled result + - Check scratchpad/compaction triggers + - Call `adapter.chatWithTools()` with all tool schemas + - If no tool calls → worker is done, return result + - Execute tool calls through registry (with permission classifier + sandbox) + - Track file changes (diffs captured via `git diff` after each write) + - StepVerifier check every 3 rounds +5. Run `validationCommand` if provided +6. Generate summary via LLM +7. Return `WorkerResult` + +Migrate guardrails from `run.ts`: +- Write-enforcement: detect code blocks in assistant content, re-prompt +- Read-before-write: auto-read files before allowing writes +- Shrinkage guard: warn if write is shorter than original + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/worker.test.ts` +Expected: All tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/worker.ts src/agent/worker.test.ts +git commit -m "feat(agent): add worker execution loop with guardrails and round budget" +``` + +--- + +### Task 13: Orchestrator + +**Files:** +- Create: `src/agent/orchestrator.ts` +- Test: `src/agent/orchestrator.test.ts` + +- [ ] **Step 1: Write tests** + +Test orchestrator end-to-end with mocked planner and workers: +- Single-subtask plan: dispatches one worker, no file-lock overhead, returns result +- Multi-subtask with deps: dispatches in topological order +- Parallel dispatch: independent subtasks run concurrently (verify via timing) +- Error recovery (auto mode): failed worker retried once, then skipped +- Error recovery (supervised mode): prompts user to retry/skip/abort +- File-lock conflict: two workers requesting same file — deadlock detected, lower-priority worker re-queued +- Token budget enforcement: pre-execution estimate warns user, stops when budget exceeded +- Cancellation: user abort propagates to workers +- Adaptive round estimates: if first subtask took 2x estimated, scale up remaining +- Cross-subtask summary compression: prior worker output summarized to ~200 tokens + +- [ ] **Step 2: Run tests to verify they fail** + +- [ ] **Step 3: Implement orchestrator.ts** + +Implement `Orchestrator` class: + +```typescript +class Orchestrator { + constructor(deps: OrchestratorDeps) {} + + async execute(prompt: string, options: OrchestratorOptions): Promise { + // 1. Build workspace profile + // 2. Generate task plan + // 3. Estimate token cost, warn if high + // 4. Assign file ownership from plan + // 5. Walk dependency graph topologically + // 6. Dispatch workers (parallel when independent, up to maxWorkers) + // 7. Monitor: handle file-lock requests, track progress + // 8. Collect results, resolve conflicts + // 9. Run validation commands + // 10. Generate summary + return result; + } +} +``` + +`OrchestratorDeps`: adapter, toolRegistry, mcpManager, config, workspaceRoot. +`OrchestratorOptions`: prompt, images, mode (supervised/auto), maxWorkers, signal. +`OrchestratorResult`: results per subtask, total tokens, files changed, summary. + +Key behaviors: +- Uses `Promise.all()` for parallel worker dispatch within concurrency limit +- Uses `ProviderPool` for safe adapter access +- Uses `FileLockManager` for file ownership +- Aggregates `WorkerResult` array +- Calls `criticEvaluate()` on merged result if provider supports it +- Progress callback for UI (worker started, completed, tool call, etc.) + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/orchestrator.test.ts` +Expected: All tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/orchestrator.ts src/agent/orchestrator.test.ts +git commit -m "feat(agent): add orchestrator with parallel dispatch, file-lock, and budget control" +``` + +--- + +### Task 13.5: Progress Reporter + +**Files:** +- Create: `src/agent/progress.ts` +- Test: `src/agent/progress.test.ts` + +- [ ] **Step 1: Write tests** + +Test progress output rendering: +- Multiplexed worker output: `[Worker 1: Create model]` prefix format +- Status bar: `[2/4 subtasks complete | 3 workers active | 1,240 tokens used]` +- Quiet mode: suppresses all output +- JSON mode: structured output per worker +- Event callbacks: worker started, tool call, tool result, worker completed, worker failed + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `npx vitest run src/agent/progress.test.ts` + +- [ ] **Step 3: Implement progress.ts** + +Implement `ProgressReporter` class: + +```typescript +type OutputMode = 'interactive' | 'default' | 'quiet' | 'json'; + +interface ProgressEvent { + type: 'worker-started' | 'worker-completed' | 'worker-failed' | 'tool-call' | 'tool-result' | 'status-update'; + workerId: string; + workerLabel: string; + data?: unknown; +} + +class ProgressReporter { + constructor(private mode: OutputMode, private write: (msg: string) => void) {} + + onEvent(event: ProgressEvent): void { /* format and write based on mode */ } + updateStatusBar(completed: number, total: number, activeWorkers: number, tokens: number): void {} + getJsonResults(): unknown[] { /* for --json mode */ } +} +``` + +- [ ] **Step 4: Run tests** + +Run: `npx vitest run src/agent/progress.test.ts` +Expected: All tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add src/agent/progress.ts src/agent/progress.test.ts +git commit -m "feat(agent): add progress reporter with multiplexed worker output" +``` + +--- + +### Task 14: Upgrade `jam run` + +**Files:** +- Modify: `src/commands/run.ts` +- Modify: `src/index.ts` (add new CLI flags) +- Test: `src/commands/run.test.ts` (add integration tests) + +- [ ] **Step 1: Write integration test** + +Test that `runRun` with a simple prompt uses the orchestrator, respects `--auto`, `--workers`, and `--image` flags. Mock the provider to return a single-subtask plan. + +- [ ] **Step 2: Run tests to verify they fail** + +- [ ] **Step 3: Refactor run.ts to delegate to Orchestrator** + +Replace the core agentic loop in `runRun()` with: +1. Create `Orchestrator` with deps +2. Call `orchestrator.execute(prompt, { mode, maxWorkers, images, signal })` +3. Render `OrchestratorResult` to stdout +4. Keep `JAM_LEGACY_RUN=1` env var check to fall back to old loop + +Keep existing CLI interface. Add new flags: +- `--auto`: set agent mode to 'auto' (autonomous execution with confirm-once-per-type for dangerous) +- `--yes` / `-y`: retained for backward compat — sets `toolPolicy: 'always'`. `--auto` implies `--yes`. +- `--workers `: max parallel workers +- `--image `: repeatable, attach images +- `--file `: read prompt from file (complements existing inline arg + stdin) +- `--no-sandbox`: disable sandbox +- `--json` and `--quiet`: existing flags, but update behavior for parallel worker output + +- [ ] **Step 4: Update src/index.ts command registration** + +Add new options to the `run` command: +```typescript +.option('--auto', 'Fully autonomous mode') +.option('--workers ', 'Max parallel workers', '3') +.option('--image ', 'Attach image', collect) // collect into array +.option('--no-sandbox', 'Disable OS sandbox') +``` + +- [ ] **Step 5: Run tests** + +Run: `npx vitest run src/commands/run.test.ts` +Expected: All tests pass. + +- [ ] **Step 6: Run full test suite** + +Run: `npx vitest run` +Expected: All tests pass including existing run tests. + +- [ ] **Step 7: Commit** + +```bash +git add src/commands/run.ts src/index.ts src/commands/run.test.ts +git commit -m "feat(agent): upgrade jam run to use orchestrator with parallel workers" +``` + +--- + +### Task 15: Rewrite `jam go` + +**Files:** +- Modify: `src/commands/go.ts` +- Modify: `src/index.ts` (add new CLI flags) +- Test: `src/commands/go.test.ts` + +- [ ] **Step 1: Write tests** + +Test the interactive loop: +- Processes a single task via orchestrator +- Handles `/stop` command (cancels current orchestrator) +- Handles `/compact` command (compacts session memory) +- Handles `/status` command (shows token usage + worker status) +- Session compaction between tasks (prior task summarized) + +- [ ] **Step 2: Run tests to verify they fail** + +- [ ] **Step 3: Rewrite go.ts** + +Replace the `startChat()` delegation with an interactive console: +1. Load config, create provider, workspace profile +2. Create session-level context (session summary, workspace profile) +3. Enter readline loop: + - Read user input + - Check for commands (`/stop`, `/compact`, `/status`, `/exit`) + - Create `Orchestrator` and call `execute(input, { mode, images, signal })` + - Display multiplexed worker output + - After completion, compact session context +4. Handle Ctrl+C gracefully (abort current orchestrator, don't exit) + +Add CLI flags: `--auto`, `--workers `, `--image `, `--no-sandbox` + +- [ ] **Step 4: Update src/index.ts command registration** + +Add new options to `go` command. + +- [ ] **Step 5: Run tests** + +Run: `npx vitest run src/commands/go.test.ts` +Expected: All tests pass. + +- [ ] **Step 6: Commit** + +```bash +git add src/commands/go.ts src/index.ts src/commands/go.test.ts +git commit -m "feat(agent): rewrite jam go as interactive agent console with session memory" +``` + +--- + +### Task 16: Barrel Export & Integration Test + +**Files:** +- Create: `src/agent/index.ts` +- Create: `src/agent/integration.test.ts` + +- [ ] **Step 1: Create barrel export** + +```typescript +// src/agent/index.ts +export * from './types.js'; +export { PermissionClassifier, classifyCommand } from './permissions.js'; +export { detectSandboxStrategy, buildSandboxArgs, executeSandboxed } from './sandbox.js'; +export { getTextContent, hasImages, flattenForProvider, loadImage } from './multimodal.js'; +export { FileLockManager } from './file-lock.js'; +export { ProviderPool } from './provider-pool.js'; +export { buildWorkspaceProfile, formatProfileForPrompt } from './workspace-intel.js'; +export { generateTaskPlan } from './planner.js'; +export { executeWorker } from './worker.js'; +export { Orchestrator } from './orchestrator.js'; +``` + +- [ ] **Step 2: Write integration test** + +End-to-end test with a mocked provider that simulates a 2-subtask plan: +1. Subtask 1: create a file (worker reads dir, writes file) +2. Subtask 2: modify the file (depends on subtask 1, reads file, writes modified version) + +Verify: files created/modified, dependency order respected, orchestrator result has correct summaries and token usage. + +- [ ] **Step 3: Run integration test** + +Run: `npx vitest run src/agent/integration.test.ts` +Expected: Pass. + +- [ ] **Step 4: Run full test suite** + +Run: `npx vitest run` +Expected: All tests pass (existing + new). + +- [ ] **Step 5: Run type check** + +Run: `npx tsc --noEmit` +Expected: No errors. + +- [ ] **Step 6: Commit** + +```bash +git add src/agent/index.ts src/agent/integration.test.ts +git commit -m "feat(agent): add barrel export and end-to-end integration test" +``` + +- [ ] **Step 7: Final commit — version bump** + +Update `package.json` version to `0.8.0` (major feature addition). + +```bash +git add package.json +git commit -m "chore: bump version to 0.8.0 for agent engine" +``` diff --git a/docs/specs/2026-03-20-jam-agent-engine-design.md b/docs/specs/2026-03-20-jam-agent-engine-design.md new file mode 100644 index 0000000..d395396 --- /dev/null +++ b/docs/specs/2026-03-20-jam-agent-engine-design.md @@ -0,0 +1,805 @@ +# Jam Agent Engine — Design Spec + +**Date:** 2026-03-20 +**Status:** Draft +**Scope:** `jam go` (interactive agent) + `jam run` (one-shot agent) sharing a new agent engine + +--- + +## 1. Problem + +Jam CLI has read-only agentic capabilities (`jam ask`) and basic write tools (`jam run`, `jam go`), but lacks the autonomous multi-step coding capabilities that modern AI CLI tools offer: file editing with context awareness, shell execution within agentic loops, multimodal input, parallel task execution, and workspace convention understanding. + +## 2. Goals + +- **`jam go`**: Interactive console for continuous agentic work — user types tasks, agent executes, user gives feedback mid-flight +- **`jam run`**: One-shot autonomous execution — single prompt, runs to completion, exits. Suitable for CI/scripts +- Both commands share the same agent engine with full capabilities: + - Multi-step task decomposition with parallel worker execution + - File editing with workspace convention awareness + - Shell execution with tiered permissions and OS-level sandboxing + - Image input for screenshots, diagrams, mockups + - Intelligent workspace profiling cached for future use + +## 3. Non-Goals (v1) + +- Audio/video input (future) +- Auto-screenshot of running apps (future) +- Container-based sandboxing (future) +- Windows OS-level sandboxing (permissions-only for v1, sandbox in v2) +- Plugin API for custom agent capabilities (future) + +--- + +## 4. Architecture + +### 4.1 Module Layout + +``` +src/agent/ + orchestrator.ts — task decomposition, dispatch, merge, conflict resolution + worker.ts — single-subtask execution loop + planner.ts — multi-step plan generation with dependency graph + sandbox.ts — OS-level command sandboxing (macOS/Linux) + permissions.ts — tiered permission system (safe/moderate/dangerous) + multimodal.ts — image input parsing, encoding, provider routing + file-lock.ts — hybrid file ownership + request-grant protocol + workspace-intel.ts — convention detection, style analysis, cached profiling + types.ts — shared types + index.ts — barrel export +``` + +Entry points: +- `src/commands/go.ts` — interactive console, calls orchestrator per user task +- `src/commands/run.ts` — one-shot, single orchestrator invocation then exit + +Shared infrastructure (reused, not duplicated): +- `src/tools/` — ToolRegistry, all read+write tools, safePath, policies +- `src/utils/agent.ts` — planning utils, step verification, tool call tracking +- `src/utils/memory.ts` — WorkingMemory, context compaction, scratchpad +- `src/utils/critic.ts` — answer quality evaluation +- `src/providers/` — ProviderAdapter (extended for multimodal) +- `src/mcp/` — McpManager shared across workers +- `src/config/` — JamConfigSchema (extended with agent section) + +### 4.2 Data Flow + +``` +User prompt + images + | + Workspace Intelligence (cached profile) + | + Orchestrator + | + Planner (LLM call -> TaskPlan with dependency graph) + | + Orchestrator dispatches workers (topological order) + | + +----------+-----------+ + Worker A Worker B Worker C (parallel if independent) + +----------+-----------+ + | + Orchestrator merges results + | + Conflict resolution (auto or user-prompted) + | + Validation (tests, lint, type-check) + | + Final summary + applied changes +``` + +### 4.3 Concurrency Model + +Parallel workers need safe access to the provider and MCP servers. + +**Provider access:** Workers share a single `ProviderAdapter` instance through a request semaphore. The semaphore limits concurrent `chatWithTools()` calls to avoid API rate limits: + +```typescript +interface ProviderPool { + acquire(): Promise; // blocks if at concurrency limit + release(lease: ProviderLease): void; + concurrencyLimit: number; // default: 3 (matches maxWorkers) +} +``` + +- Token usage is aggregated: each `WorkerResult` includes `tokensUsed`, orchestrator sums them. +- On rate-limit (429/retry-after): semaphore pauses all workers until cooldown expires. +- Provider adapters with internal state (e.g., `CopilotAdapter.ensureBackend()`) are protected by the semaphore — only one initialization can run at a time. + +**MCP access:** MCP servers are stdio-based child processes. Concurrent tool calls are serialized per-server via an internal queue in `McpManager`. Multiple servers can be called in parallel. + +### 4.4 Cancellation Protocol + +Workers accept an `AbortSignal` from the orchestrator: + +```typescript +interface WorkerOptions { + subtask: Subtask; + context: SubtaskContext; + signal: AbortSignal; // orchestrator can abort at any time +} +``` + +Cancellation triggers: +- User presses Ctrl+C or types "stop" in `jam go` interactive mode +- Orchestrator detects a critical dependency failed (no point continuing) +- Worker exceeds its round budget (see Section 6.6) +- Global timeout exceeded + +On cancellation, workers: +1. Stop after the current tool call completes (no mid-tool abort) +2. Roll back any uncommitted file writes from the current round +3. Return a `WorkerResult` with `status: 'cancelled'` and a summary of partial work + +### 4.5 Command Comparison + +| Aspect | `jam go` | `jam run` | +|--------|----------|-----------| +| Mode | Interactive console | One-shot, exits when done | +| Input | Continuous — type tasks, give feedback | Single prompt (arg, stdin, `--file`) | +| Default autonomy | Supervised (user is present) | Semi-autonomous (user isn't watching) | +| `--auto` | Available, promotes to fully autonomous | Primary usage mode | +| Use case | Dev at keyboard | CI/CD, scripts, automation | +| Session | Persistent, multi-task | Single task | + +--- + +## 5. Orchestrator + +### 5.1 Lifecycle + +``` +1. PLAN — LLM generates TaskPlan from prompt + WorkspaceProfile +2. ASSIGN — Assign file ownership per subtask from plan +3. DISPATCH — Launch workers (parallel when dependency graph allows) +4. MONITOR — Track progress, handle file-lock requests +5. MERGE — Collect WorkerResults, resolve file conflicts +6. VERIFY — Run validation commands (tests, lint, type-check) +7. REPORT — Summary of changes to user +``` + +### 5.2 TaskPlan + +```typescript +interface TaskPlan { + goal: string; + subtasks: Subtask[]; + dependencyGraph: Map; // subtaskId -> [blockedBy] +} + +interface Subtask { + id: string; + description: string; + files: FileOwnership[]; + estimatedRounds: number; + validationCommand?: string; // e.g. "npm test -- --grep user" +} + +interface FileOwnership { + path: string; + mode: 'create' | 'modify' | 'read-only'; +} +``` + +### 5.3 Dependency-Aware Dispatch + +The orchestrator walks the dependency graph topologically. Independent subtasks launch in parallel. When a subtask completes, its dependents become eligible. + +Example — "Add REST API with tests and docs": +``` +Subtask 1: Create user model (no deps -> starts immediately) +Subtask 2: Create API routes (depends on 1) +Subtask 3: Write tests (depends on 2) } parallel +Subtask 4: Update API docs (depends on 2) } +``` + +Max parallel workers: 3 (configurable via `--workers N`). + +### 5.4 File-Lock Protocol + +Default: file ownership assigned from plan. When a worker needs an unplanned file: + +1. Worker sends `REQUEST_FILE` to orchestrator with path + reason +2. Orchestrator checks: is the owner done with the file? +3. Available -> grants ownership, worker continues +4. Locked -> worker queues and waits (or orchestrator reorders) +5. `--auto` mode: auto-resolves. Supervised: asks user. + +**Deadlock prevention:** The orchestrator maintains a wait graph. Before granting a file-lock request, it checks for cycles (Worker A waits on B, B waits on A). If a cycle is detected: +1. The worker with the lower-priority subtask (later in dependency order) is cancelled +2. Its partial work is saved, and it's re-queued to run after the blocking worker completes +3. In supervised mode, the user is informed and can choose an alternative resolution + +**Dependency graph validation:** The planner's output is validated for DAG properties before dispatch. If the LLM generates a cyclic dependency graph, the orchestrator rejects the plan and re-prompts the planner with an explicit "no cycles" constraint. + +--- + +## 6. Worker + +### 6.1 Evolved from `jam run` + +Each worker is a focused agentic loop scoped to one subtask. Differences from current `jam run`: + +| Aspect | `jam run` (current) | Worker (new) | +|--------|---------------------|--------------| +| Scope | Entire user prompt | Single subtask | +| Max rounds | 15 fixed | Dynamic (estimated, default 20, max 50) | +| Tools | All tools, flat | All tools + orchestrator IPC | +| Context | One shared window | Own WorkingMemory + prior subtask summary | +| Completion | Model stops calling tools | Meets validationCommand + model signals done | + +### 6.2 Worker Lifecycle + +``` +1. INIT — Receives subtask, file ownership, prior context summary +2. PLAN — Quick local plan (generateExecutionPlan in readwrite mode) +3. EXECUTE — Agentic loop: read -> think -> write -> verify +4. VALIDATE — Runs subtask's validationCommand if provided +5. REPORT — Returns WorkerResult to orchestrator +``` + +### 6.3 WorkerResult + +```typescript +interface WorkerResult { + subtaskId: string; + status: 'completed' | 'failed' | 'blocked'; + filesChanged: FileChange[]; + summary: string; // LLM-generated summary + tokensUsed: TokenUsage; + error?: string; +} + +interface FileChange { + path: string; + action: 'created' | 'modified' | 'deleted'; + diff: string; // unified diff +} +``` + +### 6.4 Context Handoff + +When subtask B depends on completed subtask A, orchestrator passes: + +```typescript +interface SubtaskContext { + priorSummary: string; // what was done + filesAvailable: string[]; // files created/modified by prior subtasks + planReminder: string; // current subtask description + context +} +``` + +Each worker starts with a fresh context window — no accumulated history from prior subtasks. + +### 6.5 Error Recovery + +- Worker fails -> orchestrator gets error + context summary +- `--auto`: retry once with error context, then skip subtask, continue remaining +- Supervised: ask user to retry, skip, or abort +- Blocked (file-lock timeout): orchestrator reorders or escalates + +### 6.6 Round Budget Policy + +Each subtask has an `estimatedRounds` from the planner (default 20, max 50). + +- At `estimatedRounds` reached: orchestrator injects a `StepVerifier` check. If verifier says `ready-to-answer` or `need-more` (with progress), worker gets 5 bonus rounds. +- At `estimatedRounds + 5`: hard stop. Worker must synthesize a result from whatever it has. +- At `estimatedRounds * 0.5`: if no tool calls have been made, orchestrator flags the worker as potentially stuck and injects a correction hint. +- The orchestrator tracks actual vs estimated rounds per subtask. For later subtasks in the same plan, it adjusts estimates based on observed drift (e.g., if first two subtasks took 2x estimated, scale up remaining estimates). + +--- + +## 7. Tiered Permissions + +### 7.1 Three Tiers + +```typescript +type PermissionTier = 'safe' | 'moderate' | 'dangerous'; +``` + +| Tier | Examples | Supervised | `--auto` | +|------|----------|-----------|----------| +| Safe | `ls`, `cat`, `git status`, `git diff`, `npm test`, `npx tsc`, file reads | Auto-approve | Auto-approve | +| Moderate | `npm install`, `git add`, `git commit`, `mkdir`, `rm` (single file), file writes, `curl` | Auto-approve | Auto-approve | +| Dangerous | `rm -rf`, `git push`, `git reset`, `chmod`, `sudo`, piped commands with write side-effects | Confirm always | Confirm once per type | + +"Confirm once per type" in `--auto`: approving `git push` once auto-approves subsequent `git push` in that session. `git push --force` is a separate confirmation. + +### 7.2 Relationship with Existing `DANGEROUS_PATTERNS` + +The existing `run_command.ts` has a hardcoded `DANGEROUS_PATTERNS` blocklist (`rm -rf /`, `sudo`, `mkfs`, etc.) that performs a hard block — commands are rejected outright regardless of user confirmation. + +The tiered permission system layers on top: +- **`DANGEROUS_PATTERNS` remains as an unoverridable safety floor.** These commands are always blocked, even in `--auto` mode, even if the user adds them to their `safe` config. They represent catastrophic system-level risk. +- **Tiered permissions handle everything else.** Commands not in the hard-block list are classified as safe/moderate/dangerous and subject to the confirmation rules in Section 7.1. + +``` +Command received + -> DANGEROUS_PATTERNS check (hard block, unoverridable) + -> Tier classifier (safe/moderate/dangerous) + -> Confirmation rules based on tier + mode + -> Sandbox wrapper (if moderate/dangerous) + -> Execute +``` + +### 7.3 Classifier + +Pattern matching on command strings. Built-in defaults with user overrides in config: + +```yaml +agent: + permissions: + safe: ["npm test", "cargo test", "go test"] + dangerous: ["docker rm", "kubectl delete"] +``` + +--- + +## 8. Sandbox + +### 8.1 OS-Level Command Sandboxing + +Wraps command execution for moderate and dangerous tiers. + +```typescript +interface SandboxConfig { + filesystem: 'workspace-only' | 'unrestricted'; // default: workspace-only + network: 'blocked' | 'allowed'; // default: allowed + timeout: number; // default: 60s +} +``` + +### 8.2 Platform Implementation + +| Platform | Primary | Fallback | +|----------|---------|----------| +| macOS | `sandbox-exec` (deprecated but functional) | Permissions-only | +| Linux | `unshare` + mount namespace, or `firejail` | Permissions-only | +| Windows | Permissions-only (v1) | Permissions-only | + +- Safe-tier commands skip sandbox entirely (performance) +- Sandbox adds ~50ms overhead per invocation +- If OS sandbox unavailable, falls back to permissions-only with warning logged +- Windows v2: explore Windows Sandbox (lightweight VM) or WSL2 integration + +**macOS note:** `sandbox-exec` is deprecated since macOS 10.15 but still functional. It may break on future macOS versions. `jam doctor` will verify sandbox availability on the current system. If `sandbox-exec` is unavailable, falls back to permissions-only with a warning. Future: investigate App Sandbox entitlements or seatbelt profiles as replacements. + +### 8.3 Config + +```yaml +agent: + sandbox: + filesystem: 'workspace-only' + network: 'allowed' +``` + +--- + +## 9. Multimodal Input + +### 9.1 Scope (v1) + +Image input only: screenshots, diagrams, UI mockups. + +### 9.2 Extended Message Type + +```typescript +// src/providers/base.ts +type MessageContent = string | ContentPart[]; + +interface ContentPart { + type: 'text' | 'image'; + text?: string; + image?: { + data: string; // base64-encoded + mediaType: 'image/png' | 'image/jpeg' | 'image/gif' | 'image/webp'; + }; +} + +interface Message { + role: 'system' | 'user' | 'assistant'; + content: MessageContent; // string for backwards compat +} +``` + +### 9.3 CLI Input + +```bash +jam go "fix the layout bug" --image screenshot.png +jam go "match this design" --image design.png --image current.png +jam run "build this component" --image mockup.png +pbpaste | jam go "what's wrong with this error?" +jam go "build this" --image https://example.com/mockup.png +``` + +### 9.4 Provider Compatibility + +```typescript +interface ProviderInfo { + name: string; + supportsStreaming: boolean; + supportsTools?: boolean; + supportsVision?: boolean; // NEW + contextWindow?: number; +} +``` + +When `supportsVision` is false, images are stripped and replaced with a notice: `[Image provided but this model doesn't support vision]`. + +For Ollama, vision support depends on model (llava = yes, llama3.2 = no) — checked at runtime via model metadata. + +### 9.5 Message Type Migration + +The `Message.content` type change from `string` to `string | ContentPart[]` is a cross-cutting concern affecting ~50 call sites across providers, commands, and utils. + +**Phased approach:** + +**Phase 1 (agent engine only):** Introduce `ContentPart[]` only in `src/agent/` code paths. A `getTextContent(msg: Message): string` helper extracts text for backward compatibility. Existing commands (`ask`, `chat`, `commit`, etc.) continue using `string` content unchanged. + +```typescript +// src/agent/multimodal.ts +function getTextContent(msg: Message): string { + if (typeof msg.content === 'string') return msg.content; + return msg.content + .filter(p => p.type === 'text') + .map(p => p.text!) + .join(''); +} +``` + +**Phase 2 (provider adapters):** Update provider adapters that support vision (OpenAI, Anthropic, Gemini, Ollama/llava) to handle `ContentPart[]` in `chatWithTools()`. Non-vision providers receive pre-flattened `string` content via `flattenForProvider()`: + +```typescript +function flattenForProvider(messages: Message[], supportsVision: boolean): Message[] { + if (supportsVision) return messages; + return messages.map(m => ({ + ...m, + content: typeof m.content === 'string' + ? m.content + : getTextContent(m) + (hasImages(m) ? '\n[Image provided but this model does not support vision]' : '') + })); +} +``` + +**Phase 3 (full rollout):** Migrate remaining commands to support `ContentPart[]` if needed. This is optional — most commands will never need multimodal. + +Existing string operations like `m.content.startsWith('[Tool result:')` in `agent.ts` work unchanged in Phase 1 because those code paths only receive `string` content. + +### 9.6 Images in Agentic Loop + +- Images attached to the initial user message only +- Workers receive text description of the image in their context summary (saves tokens) +- If a worker specifically needs the image (e.g., UI implementation subtask), orchestrator passes it through + +--- + +## 10. Workspace Intelligence + +### 10.1 Purpose + +Before any planning or execution, build a comprehensive understanding of the codebase's conventions, patterns, and structure. This ensures the agent writes code that matches existing style and uses existing utilities. + +### 10.2 WorkspaceProfile + +```typescript +interface WorkspaceProfile { + // Structure + language: string; + framework?: string; + monorepo: boolean; + srcLayout: string; + entryPoints: string[]; + + // Code conventions + codeStyle: { + indent: 'tabs' | 'spaces'; + indentSize: number; + quotes: 'single' | 'double'; + semicolons: boolean; + trailingCommas: boolean; + namingConvention: 'camelCase' | 'snake_case' | 'PascalCase'; + }; + fileNaming: string; + exportStyle: 'named' | 'default' | 'barrel'; + importStyle: 'relative' | 'alias'; + + // Patterns + errorHandling: string; + logging: string; + configPattern: string; + + // Testing + testFramework: string; + testLocation: string; + testNaming: string; + testStyle: string; + coverageThreshold?: number; + testCommand: string; + + // Git + commitConvention: string; + branchPattern: string; + + // Tooling + packageManager: string; + linter?: string; + formatter?: string; + typeChecker?: string; + buildTool?: string; +} +``` + +### 10.3 Three-Layer Build + +**Layer 1: Static analysis (no LLM, fast)** +- Parse package.json / pyproject.toml / Cargo.toml +- Read config files (.eslintrc, .prettierrc, tsconfig.json, biome.json) +- Sample 5-10 source files for style detection (indent, quotes, naming) +- Check test directory structure and naming patterns +- Read git log for commit convention +- Detect src layout by directory scanning + +**Layer 2: Pattern extraction (one targeted LLM call)** +- Feed 3-4 representative source files to the model +- Ask: "What patterns does this codebase follow?" +- Extracts: error handling, logging, config, architectural patterns +- ~500 output tokens + +**Layer 3: Cache and persist** +- Saved to `.jam/workspace-profile.json` +- Hash-based staleness check (hash of package.json + src/ file list + config files) +- If hash matches -> load from cache (instant) +- If hash differs -> rebuild (~2-3s static + 1 LLM call) + +### 10.4 Integration with `jam intel` + +Workspace Intelligence is an extension of `jam intel`, not a parallel system. The `jam intel` scanner is authoritative for structural analysis (framework detection, entry points, dependency graph). WorkspaceIntel adds the convention/style layer on top. + +**Architecture:** +- Style analysis (indentation, quotes, naming, test patterns) is implemented as a new analyzer in `src/intel/analyzers/conventions.ts` that plugs into the existing scanner framework. +- `workspace-intel.ts` in `src/agent/` is a consumer that imports from `src/intel/`, not a standalone analyzer. +- The WorkspaceProfile references the intel graph as authoritative for structure, adding only conventions on top. + +```typescript +async function buildWorkspaceProfile(root: string): Promise { + // Layer 1: static convention analysis (new analyzer in src/intel/) + const conventions = await analyzeConventions(root); + + // Structural data from intel graph (authoritative) + const intelGraph = await loadOrBuildIntelGraph(root); // runs lightweight scan if not cached + const structure = extractStructure(intelGraph); // framework, entryPoints, srcLayout + + // Layer 2: LLM pattern extraction (only if cache stale) + const patterns = await extractPatterns(root, { ...conventions, ...structure }); + + // Merge: intel graph for structure, conventions analyzer for style, LLM for patterns + const profile = merge(structure, conventions, patterns); + await saveProfile(root, profile); + return profile; +} +``` + +**Single source of truth:** Both `.jam/intel/` (code graph) and `.jam/workspace-profile.json` (conventions + patterns) are cached, but the profile's structural fields always come from the intel graph. No divergent representations. + +### 10.5 Injection into Workers + +The WorkspaceProfile is formatted and injected into every worker's system prompt: + +``` +You are working in a TypeScript/Express project. +- Style: 2-space indent, single quotes, semicolons, camelCase +- Files: kebab-case.ts with barrel exports (index.ts) +- Imports: relative paths (../utils/), not aliases +- Errors: custom JamError class with error codes (see src/utils/errors.ts) +- Logging: custom Logger with API key redaction (see src/utils/logger.ts) +- Tests: vitest, co-located *.test.ts files, describe/it style +- Run tests: npm test +- Commits: conventional (feat:, fix:, chore:) +``` + +--- + +## 11. Configuration + +### 11.1 New Config Section + +```yaml +# .jamrc.yml +agent: + maxWorkers: 3 + defaultMode: 'supervised' # 'supervised' | 'auto' + maxRoundsPerWorker: 20 + permissions: + safe: [] # additional safe patterns + dangerous: [] # additional dangerous patterns + sandbox: + filesystem: 'workspace-only' # 'workspace-only' | 'unrestricted' + network: 'allowed' # 'allowed' | 'blocked' + timeout: 60000 # ms per command +``` + +### 11.2 CLI Flags + +``` +jam go [options] + --auto Fully autonomous mode (no confirmations except dangerous) + --image Attach image(s) to the task + --workers Max parallel workers (default: 3) + --no-sandbox Disable OS-level sandboxing + --yes Auto-confirm all prompts (alias for --auto) + +jam run [options] + --auto Fully autonomous (default behavior) + --image Attach image(s) + --workers Max parallel workers (default: 3) + --no-sandbox Disable OS-level sandboxing + --file Read prompt from file + --json JSON output + --quiet Suppress non-essential output +``` + +--- + +## 12. Parallel Worker Output + +### 12.1 Progress Display + +When multiple workers run simultaneously in `jam go`: +- Multiplexed output with worker prefixes: `[Worker 1: Create user model]`, `[Worker 2: Write tests]` +- Each worker's tool calls and results shown inline under its prefix +- A status bar shows overall progress: `[2/4 subtasks complete | 3 workers active | 1,240 tokens used]` + +In `jam run` (non-interactive): +- `--quiet`: only final summary +- Default: worker prefixes + tool calls on stderr, final result on stdout +- `--json`: structured JSON with per-worker results + +--- + +## 13. Testing Strategy + +### 13.1 Unit Tests + +- `permissions.ts` — tier classification for known commands +- `sandbox.ts` — profile generation per platform (mocked OS calls) +- `file-lock.ts` — ownership assignment, request-grant flow +- `workspace-intel.ts` — static analysis on fixture projects +- `multimodal.ts` — image parsing, base64 encoding, provider fallback +- `planner.ts` — TaskPlan generation with dependency graph validation +- `types.ts` — type guards and validation + +### 13.2 Integration Tests + +- Orchestrator end-to-end: prompt -> plan -> workers -> merge -> verify +- Worker execution loop: subtask -> tool calls -> validation -> result +- Cross-platform sandbox behavior (skip on CI if OS sandbox unavailable) +- Multimodal message flow through providers (mocked provider) +- Workspace profiling on fixture projects (TypeScript, Python, Rust fixtures) + +### 13.3 Manual Testing + +- `jam go` interactive session: multi-step feature implementation +- `jam run` one-shot: bug fix with image input +- Parallel workers: task with 3+ independent subtasks +- Error recovery: intentionally failing subtask, verify retry + skip +- File conflict: two subtasks touching same file + +--- + +## 14. Context Compaction & Token Optimization + +Token efficiency is critical for long-running `jam go` sessions and expensive parallel `jam run` executions. + +### 14.1 Worker-Level Compaction + +Each worker uses its own `WorkingMemory` instance (reusing existing `src/utils/memory.ts`): +- **Tool result capping:** Large outputs truncated to `MAX_TOOL_RESULT_TOKENS` (1500) before injection +- **Scratchpad checkpoints:** Every 3 rounds, model summarizes findings so far — keeps context focused +- **Context compaction:** When messages approach 70% of context window, older rounds are summarized into a compact block via a separate LLM call + +### 14.2 Orchestrator-Level Optimization + +The orchestrator manages token budget across all workers: + +```typescript +interface TokenBudget { + maxPerWorker: number; // derived from model's context window + maxTotal: number; // global cap across all workers + spent: number; // running total + remaining: number; +} +``` + +- **Pre-execution estimate:** Before dispatching, orchestrator estimates total token cost from the plan (subtask count * estimated rounds * avg tokens per round). If estimate exceeds budget, warns user and suggests reducing scope. +- **Live tracking:** Each worker reports `tokensUsed` in its result. Orchestrator tracks cumulative spend. +- **Budget enforcement:** If cumulative tokens exceed `maxTotal`, orchestrator pauses remaining subtasks and asks user whether to continue (supervised) or stops gracefully (auto). + +### 14.3 Cross-Subtask Summary Compression + +When passing context between dependent subtasks, summaries are compressed: +- Worker A's full output (potentially thousands of tokens) is summarized into a ~200 token `SubtaskContext.priorSummary` +- Only file paths and key decisions are preserved, not implementation details +- If Worker B needs specifics, it reads the files directly (cheaper than passing context) + +### 14.4 `jam go` Session Compaction + +In long interactive sessions, the orchestrator maintains a session-level working memory: +- After each completed task, the full orchestrator/worker history is compacted into a session summary +- New tasks start with the session summary + workspace profile, not the full history +- User can trigger manual compaction with `/compact` in the interactive console + +--- + +## 15. Migration + +### 15.1 Refactoring Plan + +**`jam run` refactoring:** + +`run.ts` (577 lines) delegates entirely to the orchestrator, even for single-subtask work. The orchestrator detects single-subtask plans and optimizes: no file-lock overhead, no parallel dispatch, just a single worker. + +Existing guardrails migrate as follows: + +| Guardrail | Moves to | +|-----------|----------| +| Write-enforcement (no code blocks as substitute for write_file) | `worker.ts` — part of the execution loop | +| Read-before-write gate | `worker.ts` — enforced per tool call | +| Shrinkage guard (write must be >= original length) | `worker.ts` — part of write validation | +| Critic evaluation | `orchestrator.ts` — runs on final merged result | +| Synthesis reminder | `worker.ts` — injected when worker is ready to report | +| Step verification | `worker.ts` — reuses existing `StepVerifier` | + +**`jam go` rewrite:** + +Current `go.ts` (69 lines) is a thin wrapper around `startChat()`. It becomes an interactive console that: +1. Reads user input in a loop (Ink TUI or readline) +2. Passes each task to the orchestrator +3. Displays worker progress with multiplexed output (`[Worker 1]`, `[Worker 2]` prefixes) +4. Accepts mid-flight commands: `/stop`, `/compact`, `/status` + +**Feature flag during transition:** + +`JAM_LEGACY_RUN=1` env var falls back to the old `run.ts` loop for users who hit issues. Removed after one minor version. + +### 15.2 Backward Compatibility + +- `jam run` CLI interface unchanged — same flags, same behavior, better results +- `jam go` gains new capabilities but retains interactive chat as its core +- Config is additive — new `agent` section with defaults, existing `toolPolicy`/`toolAllowlist` continue to work +- Existing tool policies layer with tiered permissions: if `toolPolicy: 'never'` for a tool, tiered permissions cannot override it + +--- + +## 16. Error Codes + +New `JamError` codes for agent-specific failures: + +| Code | Description | +|------|-------------| +| `AGENT_PLAN_FAILED` | Planner could not generate a valid TaskPlan | +| `AGENT_PLAN_CYCLE` | Dependency graph contains a cycle | +| `AGENT_WORKER_TIMEOUT` | Worker exceeded its round budget | +| `AGENT_WORKER_CANCELLED` | Worker was cancelled by orchestrator or user | +| `AGENT_FILE_LOCK_CONFLICT` | File-lock deadlock detected | +| `AGENT_FILE_LOCK_TIMEOUT` | File-lock request timed out | +| `AGENT_BUDGET_EXCEEDED` | Total token budget exceeded | +| `AGENT_SANDBOX_UNAVAILABLE` | OS sandbox not available, running in permissions-only mode | +| `AGENT_RATE_LIMITED` | Provider rate limit hit, workers paused | +| `AGENT_MERGE_CONFLICT` | Workers produced conflicting file edits | + +--- + +## 17. Risks and Mitigations + +| Risk | Mitigation | +|------|------------| +| Token cost explosion with parallel workers | Max worker cap (default 3), token budget with pre-execution estimates, context summaries instead of full history | +| Sandbox breaks dev tools | Fallback to permissions-only with warning; `--no-sandbox` escape hatch; `jam doctor` verifies sandbox availability | +| Workers produce conflicting edits | File-lock protocol with deadlock detection + orchestrator merge phase | +| Workspace profiling misdetects conventions | Cache is editable (`.jam/workspace-profile.json`), user can override; intel graph is authoritative for structure | +| Large tasks exceed context window | Per-worker fresh context with summary handoff, WorkingMemory compaction, session-level compaction in `jam go` | +| Windows sandbox gap | Tiered permissions + safePath still catch dangerous cases; sandbox in v2 | +| macOS `sandbox-exec` deprecation | Runtime check via `jam doctor`; permissions-only fallback; future migration to App Sandbox entitlements | +| Provider rate limits with parallel workers | Semaphore-based provider pool; automatic pause on 429; budget tracking | +| LLM generates cyclic dependency graph | DAG validation before dispatch; re-prompt planner on cycle detection | +| File-lock deadlock between workers | Wait-graph cycle detection; lower-priority worker cancelled and re-queued | diff --git a/src/agent/config.test.ts b/src/agent/config.test.ts new file mode 100644 index 0000000..24f2c3c --- /dev/null +++ b/src/agent/config.test.ts @@ -0,0 +1,45 @@ +import { describe, it, expect } from 'vitest'; +import { JamConfigSchema } from '../config/schema.js'; + +describe('agent config schema', () => { + it('provides defaults when agent section is omitted', () => { + const result = JamConfigSchema.parse({}); + expect(result.agent).toBeDefined(); + expect(result.agent.maxWorkers).toBe(3); + expect(result.agent.defaultMode).toBe('supervised'); + expect(result.agent.maxRoundsPerWorker).toBe(20); + expect(result.agent.sandbox.filesystem).toBe('workspace-only'); + expect(result.agent.sandbox.network).toBe('allowed'); + expect(result.agent.sandbox.timeout).toBe(60000); + expect(result.agent.permissions.safe).toEqual([]); + expect(result.agent.permissions.dangerous).toEqual([]); + }); + + it('validates custom agent config', () => { + const result = JamConfigSchema.parse({ + agent: { + maxWorkers: 5, + defaultMode: 'auto', + permissions: { safe: ['npm test'], dangerous: ['docker rm'] }, + sandbox: { filesystem: 'unrestricted', network: 'blocked', timeout: 30000 }, + }, + }); + expect(result.agent.maxWorkers).toBe(5); + expect(result.agent.defaultMode).toBe('auto'); + expect(result.agent.permissions.safe).toEqual(['npm test']); + expect(result.agent.sandbox.network).toBe('blocked'); + }); + + it('rejects invalid mode', () => { + expect(() => JamConfigSchema.parse({ agent: { defaultMode: 'yolo' } })).toThrow(); + }); + + it('rejects maxWorkers < 1', () => { + expect(() => JamConfigSchema.parse({ agent: { maxWorkers: 0 } })).toThrow(); + }); + + it('rejects maxRoundsPerWorker out of bounds', () => { + expect(() => JamConfigSchema.parse({ agent: { maxRoundsPerWorker: 0 } })).toThrow(); + expect(() => JamConfigSchema.parse({ agent: { maxRoundsPerWorker: 51 } })).toThrow(); + }); +}); diff --git a/src/agent/errors.test.ts b/src/agent/errors.test.ts new file mode 100644 index 0000000..3be9c70 --- /dev/null +++ b/src/agent/errors.test.ts @@ -0,0 +1,19 @@ +import { describe, it, expect } from 'vitest'; +import { JamError } from '../utils/errors.js'; + +const AGENT_CODES = [ + 'AGENT_PLAN_FAILED', 'AGENT_PLAN_CYCLE', 'AGENT_WORKER_TIMEOUT', + 'AGENT_WORKER_CANCELLED', 'AGENT_FILE_LOCK_CONFLICT', 'AGENT_FILE_LOCK_TIMEOUT', + 'AGENT_BUDGET_EXCEEDED', 'AGENT_SANDBOX_UNAVAILABLE', 'AGENT_RATE_LIMITED', + 'AGENT_MERGE_CONFLICT', +] as const; + +describe('agent error codes', () => { + for (const code of AGENT_CODES) { + it(`creates JamError with code ${code}`, () => { + const err = new JamError(`test ${code}`, code); + expect(err.code).toBe(code); + expect(err.hint).toBeDefined(); + }); + } +}); diff --git a/src/agent/file-lock.test.ts b/src/agent/file-lock.test.ts new file mode 100644 index 0000000..f47fcf5 --- /dev/null +++ b/src/agent/file-lock.test.ts @@ -0,0 +1,72 @@ +import { describe, it, expect } from 'vitest'; +import { FileLockManager } from './file-lock.js'; + +describe('FileLockManager', () => { + it('assigns ownership from plan', () => { + const mgr = new FileLockManager(); + mgr.assignOwnership('w1', [ + { path: 'src/a.ts', mode: 'create' }, + { path: 'src/b.ts', mode: 'modify' }, + ]); + expect(mgr.getOwner('src/a.ts')).toBe('w1'); + expect(mgr.getOwner('src/b.ts')).toBe('w1'); + }); + + it('grants request for unowned file', () => { + const mgr = new FileLockManager(); + const resp = mgr.requestFile({ workerId: 'w1', path: 'src/c.ts', reason: 'need it' }); + expect(resp.granted).toBe(true); + expect(mgr.getOwner('src/c.ts')).toBe('w1'); + }); + + it('grants request for own file', () => { + const mgr = new FileLockManager(); + mgr.assignOwnership('w1', [{ path: 'src/a.ts', mode: 'modify' }]); + const resp = mgr.requestFile({ workerId: 'w1', path: 'src/a.ts', reason: 'already mine' }); + expect(resp.granted).toBe(true); + }); + + it('denies request for file owned by another worker', () => { + const mgr = new FileLockManager(); + mgr.assignOwnership('w1', [{ path: 'src/a.ts', mode: 'modify' }]); + const resp = mgr.requestFile({ workerId: 'w2', path: 'src/a.ts', reason: 'need it' }); + expect(resp.granted).toBe(false); + expect(resp.waitForWorker).toBe('w1'); + }); + + it('releases all locks for a worker', () => { + const mgr = new FileLockManager(); + mgr.assignOwnership('w1', [ + { path: 'src/a.ts', mode: 'create' }, + { path: 'src/b.ts', mode: 'modify' }, + ]); + mgr.releaseAll('w1'); + expect(mgr.getOwner('src/a.ts')).toBeUndefined(); + expect(mgr.getOwner('src/b.ts')).toBeUndefined(); + }); + + it('grants file after previous owner releases', () => { + const mgr = new FileLockManager(); + mgr.assignOwnership('w1', [{ path: 'src/a.ts', mode: 'modify' }]); + mgr.releaseAll('w1'); + const resp = mgr.requestFile({ workerId: 'w2', path: 'src/a.ts', reason: 'now free' }); + expect(resp.granted).toBe(true); + }); + + it('detects deadlock (cycle in wait graph)', () => { + const mgr = new FileLockManager(); + mgr.assignOwnership('w1', [{ path: 'src/a.ts', mode: 'modify' }]); + mgr.assignOwnership('w2', [{ path: 'src/b.ts', mode: 'modify' }]); + // w1 waits for w2's file + mgr.requestFile({ workerId: 'w1', path: 'src/b.ts', reason: 'need b' }); + // Now w2 wants w1's file — this would create a deadlock + const resp = mgr.requestFile({ workerId: 'w2', path: 'src/a.ts', reason: 'need a' }); + expect(resp.granted).toBe(false); + // detectDeadlock should return true internally + }); + + it('returns undefined owner for unknown path', () => { + const mgr = new FileLockManager(); + expect(mgr.getOwner('nonexistent')).toBeUndefined(); + }); +}); diff --git a/src/agent/file-lock.ts b/src/agent/file-lock.ts new file mode 100644 index 0000000..a91dde8 --- /dev/null +++ b/src/agent/file-lock.ts @@ -0,0 +1,81 @@ +import type { FileOwnership, FileLockRequest, FileLockResponse } from './types.js'; + +export class FileLockManager { + // Maps file path → owner worker ID + private owners = new Map(); + // Maps worker ID → set of file paths they own + private workerFiles = new Map>(); + // Wait graph: worker ID → worker ID they're waiting on (for deadlock detection) + private waitGraph = new Map(); + + /** Bulk assign ownership from plan */ + assignOwnership(workerId: string, files: FileOwnership[]): void { + for (const file of files) { + this.owners.set(file.path, workerId); + if (!this.workerFiles.has(workerId)) { + this.workerFiles.set(workerId, new Set()); + } + this.workerFiles.get(workerId)!.add(file.path); + } + } + + /** Request access to a file not originally owned */ + requestFile(request: FileLockRequest): FileLockResponse { + const owner = this.owners.get(request.path); + + // No owner → grant immediately + if (!owner) { + this.owners.set(request.path, request.workerId); + if (!this.workerFiles.has(request.workerId)) { + this.workerFiles.set(request.workerId, new Set()); + } + this.workerFiles.get(request.workerId)!.add(request.path); + return { granted: true }; + } + + // Already own it + if (owner === request.workerId) return { granted: true }; + + // Check for deadlock before adding to wait graph + if (this.detectDeadlock(request.workerId, owner)) { + return { granted: false, waitForWorker: owner }; + // Caller (orchestrator) handles the deadlock + } + + // Not available — caller must wait + this.waitGraph.set(request.workerId, owner); + return { granted: false, waitForWorker: owner }; + } + + /** Release all locks held by a worker */ + releaseAll(workerId: string): void { + const files = this.workerFiles.get(workerId); + if (files) { + for (const path of files) { + this.owners.delete(path); + } + this.workerFiles.delete(workerId); + } + this.waitGraph.delete(workerId); + } + + /** Get owner of a file */ + getOwner(path: string): string | undefined { + return this.owners.get(path); + } + + /** Check if granting would create a deadlock (cycle in wait graph) */ + detectDeadlock(requestingWorker: string, waitForWorker: string): boolean { + // DFS from waitForWorker through wait graph + // If we reach requestingWorker, it's a cycle (deadlock) + const visited = new Set(); + let current: string | undefined = waitForWorker; + while (current) { + if (current === requestingWorker) return true; + if (visited.has(current)) break; + visited.add(current); + current = this.waitGraph.get(current); + } + return false; + } +} diff --git a/src/agent/index.ts b/src/agent/index.ts new file mode 100644 index 0000000..eb65c7f --- /dev/null +++ b/src/agent/index.ts @@ -0,0 +1,16 @@ +export * from './types.js'; +export { PermissionClassifier, classifyCommand, isHardBlocked, ApprovalTracker } from './permissions.js'; +export { detectSandboxStrategy, buildSandboxArgs, executeSandboxed } from './sandbox.js'; +export type { SandboxStrategy } from './sandbox.js'; +export { getTextContent, hasImages, flattenForProvider, loadImage } from './multimodal.js'; +export { FileLockManager } from './file-lock.js'; +export { ProviderPool } from './provider-pool.js'; +export type { ProviderLease } from './provider-pool.js'; +export { buildWorkspaceProfile, formatProfileForPrompt, computeProfileHash, loadCachedProfile } from './workspace-intel.js'; +export { generateTaskPlan, estimateTokenCost } from './planner.js'; +export { executeWorker } from './worker.js'; +export type { WorkerDeps } from './worker.js'; +export { Orchestrator } from './orchestrator.js'; +export type { OrchestratorDeps, OrchestratorOptions, OrchestratorResult, ProgressEvent } from './orchestrator.js'; +export { ProgressReporter, createProgressReporter } from './progress.js'; +export type { OutputMode } from './progress.js'; diff --git a/src/agent/integration.test.ts b/src/agent/integration.test.ts new file mode 100644 index 0000000..2438291 --- /dev/null +++ b/src/agent/integration.test.ts @@ -0,0 +1,142 @@ +import { describe, it, expect, vi, beforeEach, type Mock } from 'vitest'; +import { Orchestrator } from './orchestrator.js'; +import type { ProgressEvent } from './orchestrator.js'; +import type { ProviderAdapter } from '../providers/base.js'; +import type { Subtask, SubtaskContext } from './types.js'; + +// Mock workspace-intel to avoid filesystem dependency +vi.mock('./workspace-intel.js', () => ({ + buildWorkspaceProfile: vi.fn().mockResolvedValue({ + language: 'typescript', monorepo: false, srcLayout: 'src/', + entryPoints: ['src/index.ts'], codeStyle: { + indent: 'spaces', indentSize: 2, quotes: 'single', + semicolons: true, trailingCommas: true, namingConvention: 'camelCase', + }, + fileNaming: 'kebab-case.ts', exportStyle: 'barrel', importStyle: 'relative', + errorHandling: 'JamError', logging: 'Logger', configPattern: 'cosmiconfig', + testFramework: 'vitest', testLocation: 'co-located', testNaming: '*.test.ts', + testStyle: 'describe/it', testCommand: 'npm test', commitConvention: 'conventional', + branchPattern: 'feat/*', packageManager: 'npm', typeChecker: 'tsc', + }), + formatProfileForPrompt: vi.fn().mockReturnValue('TypeScript project'), +})); + +// Mock planner to return a 2-subtask plan with dependency +vi.mock('./planner.js', () => ({ + generateTaskPlan: vi.fn().mockResolvedValue({ + goal: 'Add greeting feature', + subtasks: [ + { + id: '1', description: 'Create greeting module', + files: [{ path: 'src/greeting.ts', mode: 'create' }], + estimatedRounds: 5, + }, + { + id: '2', description: 'Add tests for greeting', + files: [{ path: 'src/greeting.test.ts', mode: 'create' }], + estimatedRounds: 5, + validationCommand: 'npm test', + }, + ], + dependencyGraph: new Map([['1', []], ['2', ['1']]]), + }), + estimateTokenCost: vi.fn().mockReturnValue(10000), +})); + +// Mock worker to simulate completing subtasks +let _workerCallCount = 0; +vi.mock('./worker.js', () => ({ + executeWorker: vi.fn().mockImplementation((subtask: Subtask) => { + _workerCallCount++; + return Promise.resolve({ + subtaskId: subtask.id, + status: 'completed', + filesChanged: [{ path: subtask.files[0]?.path ?? 'unknown', action: 'created', diff: '' }], + summary: `Completed subtask ${subtask.id}: ${subtask.description}`, + tokensUsed: { promptTokens: 200, completionTokens: 100, totalTokens: 300 }, + }); + }), +})); + +const mockAdapter = { + info: { name: 'mock', supportsStreaming: true, supportsTools: true }, + validateCredentials: vi.fn(), + streamCompletion: vi.fn(), + listModels: vi.fn(), + chatWithTools: vi.fn(), +} as unknown as ProviderAdapter; + +describe('Agent Engine Integration', () => { + beforeEach(() => { + _workerCallCount = 0; + vi.clearAllMocks(); + }); + + it('orchestrates a 2-subtask plan end-to-end', async () => { + const orch = new Orchestrator({ + adapter: mockAdapter, + workspaceRoot: '/workspace', + toolSchemas: [], + executeTool: vi.fn().mockResolvedValue('ok'), + }); + + const events: ProgressEvent[] = []; + const result = await orch.execute('add a greeting feature', { + mode: 'auto', + maxWorkers: 2, + onProgress: (e) => events.push(e), + }); + + // Plan was generated + expect(result.plan.goal).toBe('Add greeting feature'); + expect(result.plan.subtasks).toHaveLength(2); + + // Both subtasks completed + expect(result.results).toHaveLength(2); + expect(result.results.every(r => r.status === 'completed')).toBe(true); + + // Dependency order: subtask 1 before subtask 2 + expect(result.results[0].subtaskId).toBe('1'); + expect(result.results[1].subtaskId).toBe('2'); + + // Files tracked + expect(result.filesChanged).toContain('src/greeting.ts'); + expect(result.filesChanged).toContain('src/greeting.test.ts'); + + // Token usage aggregated + expect(result.totalTokens.totalTokens).toBe(600); // 300 * 2 + + // Progress events fired + expect(events.some(e => e.type === 'plan-ready')).toBe(true); + expect(events.filter(e => e.type === 'worker-started')).toHaveLength(2); + expect(events.filter(e => e.type === 'worker-completed')).toHaveLength(2); + expect(events.some(e => e.type === 'all-done')).toBe(true); + + // Summary contains both subtask results + expect(result.summary).toContain('1:'); + expect(result.summary).toContain('2:'); + }); + + it('worker receives prior context from dependency', async () => { + const { executeWorker } = await import('./worker.js'); + + const orch = new Orchestrator({ + adapter: mockAdapter, + workspaceRoot: '/workspace', + toolSchemas: [], + executeTool: vi.fn().mockResolvedValue('ok'), + }); + + await orch.execute('test', { mode: 'auto', maxWorkers: 1 }); + + // Second worker call should have received context from first + const mockFn = executeWorker as unknown as Mock; + const calls = mockFn.mock.calls as Array<[Subtask, SubtaskContext, ...unknown[]]>; + expect(calls).toHaveLength(2); + + // Second call's context should reference subtask 1's output + const secondCallContext = calls[1][1]; // context parameter + expect(secondCallContext.priorSummary).toContain('subtask 1'); + expect(secondCallContext.filesAvailable).toContain('src/greeting.ts'); + }); +}); diff --git a/src/agent/multimodal.test.ts b/src/agent/multimodal.test.ts new file mode 100644 index 0000000..4ce7fd9 --- /dev/null +++ b/src/agent/multimodal.test.ts @@ -0,0 +1,127 @@ +import { describe, it, expect } from 'vitest'; +import { getTextContent, hasImages, flattenForProvider, loadImage } from './multimodal.js'; +import type { AgentMessage } from './types.js'; + +describe('getTextContent', () => { + it('returns string content as-is', () => { + const msg: AgentMessage = { role: 'user', content: 'hello' }; + expect(getTextContent(msg)).toBe('hello'); + }); + + it('extracts text from ContentPart array', () => { + const msg: AgentMessage = { + role: 'user', + content: [ + { type: 'text', text: 'Describe this' }, + { type: 'image', image: { data: 'abc', mediaType: 'image/png' } }, + ], + }; + expect(getTextContent(msg)).toBe('Describe this'); + }); + + it('joins multiple text parts', () => { + const msg: AgentMessage = { + role: 'user', + content: [ + { type: 'text', text: 'Part 1' }, + { type: 'text', text: 'Part 2' }, + ], + }; + expect(getTextContent(msg)).toBe('Part 1Part 2'); + }); + + it('returns empty string for image-only content', () => { + const msg: AgentMessage = { + role: 'user', + content: [{ type: 'image', image: { data: 'abc', mediaType: 'image/png' } }], + }; + expect(getTextContent(msg)).toBe(''); + }); +}); + +describe('hasImages', () => { + it('returns false for string content', () => { + expect(hasImages({ role: 'user', content: 'hello' })).toBe(false); + }); + + it('returns true when content has image parts', () => { + expect(hasImages({ + role: 'user', + content: [{ type: 'image', image: { data: 'abc', mediaType: 'image/png' } }], + })).toBe(true); + }); + + it('returns false for text-only ContentPart array', () => { + expect(hasImages({ + role: 'user', + content: [{ type: 'text', text: 'hello' }], + })).toBe(false); + }); +}); + +describe('flattenForProvider', () => { + it('passes through string-content messages unchanged', () => { + const msgs: AgentMessage[] = [{ role: 'user', content: 'hello' }]; + const result = flattenForProvider(msgs, false); + expect(result[0].content).toBe('hello'); + }); + + it('flattens ContentPart to text for non-vision providers', () => { + const msgs: AgentMessage[] = [{ + role: 'user', + content: [ + { type: 'text', text: 'Describe this' }, + { type: 'image', image: { data: 'abc', mediaType: 'image/png' } }, + ], + }]; + const result = flattenForProvider(msgs, false); + expect(typeof result[0].content).toBe('string'); + expect(result[0].content).toContain('Describe this'); + expect(result[0].content).toContain('[Image provided'); + }); + + it('flattens ContentPart to text for vision providers too', () => { + const msgs: AgentMessage[] = [{ + role: 'user', + content: [{ type: 'text', text: 'hello' }], + }]; + const result = flattenForProvider(msgs, true); + expect(result[0].content).toBe('hello'); + }); + + it('handles mixed string and ContentPart messages', () => { + const msgs: AgentMessage[] = [ + { role: 'user', content: 'plain text' }, + { role: 'user', content: [{ type: 'text', text: 'array text' }] }, + ]; + const result = flattenForProvider(msgs, false); + expect(result[0].content).toBe('plain text'); + expect(result[1].content).toBe('array text'); + }); +}); + +describe('loadImage', () => { + it('loads a PNG file', async () => { + const { writeFile, unlink } = await import('node:fs/promises'); + const path = '/tmp/test-jam-multimodal.png'; + await writeFile(path, Buffer.from([137, 80, 78, 71, 13, 10, 26, 10])); + const result = await loadImage(path); + expect(result.mediaType).toBe('image/png'); + expect(result.data).toBeTruthy(); + expect(typeof result.data).toBe('string'); // base64 + await unlink(path); + }); + + it('detects JPEG media type', async () => { + const { writeFile, unlink } = await import('node:fs/promises'); + const path = '/tmp/test-jam-multimodal.jpg'; + await writeFile(path, Buffer.from([0xFF, 0xD8, 0xFF])); + const result = await loadImage(path); + expect(result.mediaType).toBe('image/jpeg'); + await unlink(path); + }); + + it('throws for non-existent file', async () => { + await expect(loadImage('/tmp/nonexistent-image-12345.png')).rejects.toThrow(); + }); +}); diff --git a/src/agent/multimodal.ts b/src/agent/multimodal.ts new file mode 100644 index 0000000..2fc4ed2 --- /dev/null +++ b/src/agent/multimodal.ts @@ -0,0 +1,103 @@ +import { existsSync } from 'node:fs'; +import { readFile } from 'node:fs/promises'; +import { extname } from 'node:path'; +import type { AgentMessage, ContentPart } from './types.js'; +import type { Message } from '../providers/base.js'; +import { JamError } from '../utils/errors.js'; + +/** + * Extracts the plain text content from an AgentMessage. + * If content is a string, returns it as-is. + * If content is a ContentPart[], filters for text parts and joins their text values. + */ +export function getTextContent(msg: AgentMessage): string { + if (typeof msg.content === 'string') { + return msg.content; + } + return msg.content + .filter((part): part is ContentPart & { type: 'text'; text: string } => + part.type === 'text' && part.text !== undefined + ) + .map(part => part.text) + .join(''); +} + +/** + * Returns true if the message content is a ContentPart[] with at least one image part. + */ +export function hasImages(msg: AgentMessage): boolean { + if (typeof msg.content === 'string') { + return false; + } + return msg.content.some(part => part.type === 'image'); +} + +/** + * Converts AgentMessage[] to standard Message[] (string content). + * For each message: if content is a string, pass through. + * If content is a ContentPart[], extract text via getTextContent(). + * If !supportsVision and the message had images, appends a notice. + */ +export function flattenForProvider( + messages: AgentMessage[], + supportsVision: boolean +): Message[] { + return messages.map(msg => { + if (typeof msg.content === 'string') { + return { role: msg.role, content: msg.content }; + } + + const textContent = getTextContent(msg); + const hadImages = hasImages(msg); + + let content = textContent; + if (!supportsVision && hadImages) { + content = textContent + '\n[Image provided but this model does not support vision]'; + } + + return { role: msg.role, content }; + }); +} + +/** + * Reads an image file and returns base64-encoded data with detected media type. + * Throws JamError with code 'INPUT_FILE_NOT_FOUND' if the file does not exist. + */ +export async function loadImage(filePath: string): Promise<{ + data: string; + mediaType: 'image/png' | 'image/jpeg' | 'image/gif' | 'image/webp'; +}> { + if (!existsSync(filePath)) { + throw new JamError( + `Image file not found: ${filePath}`, + 'INPUT_FILE_NOT_FOUND' + ); + } + + const ext = extname(filePath).toLowerCase(); + let mediaType: 'image/png' | 'image/jpeg' | 'image/gif' | 'image/webp'; + + switch (ext) { + case '.png': + mediaType = 'image/png'; + break; + case '.jpg': + case '.jpeg': + mediaType = 'image/jpeg'; + break; + case '.gif': + mediaType = 'image/gif'; + break; + case '.webp': + mediaType = 'image/webp'; + break; + default: + throw new JamError( + `Unsupported image format: ${ext}. Supported formats: .png, .jpg, .jpeg, .gif, .webp`, + 'INPUT_FILE_NOT_FOUND' + ); + } + + const buffer = await readFile(filePath); + return { data: buffer.toString('base64'), mediaType }; +} diff --git a/src/agent/orchestrator.test.ts b/src/agent/orchestrator.test.ts new file mode 100644 index 0000000..65a5264 --- /dev/null +++ b/src/agent/orchestrator.test.ts @@ -0,0 +1,125 @@ +import { describe, it, expect, vi } from 'vitest'; +import { Orchestrator } from './orchestrator.js'; +import type { ProviderAdapter } from '../providers/base.js'; + +// Mock all dependencies +vi.mock('./workspace-intel.js', () => ({ + buildWorkspaceProfile: vi.fn().mockResolvedValue({ + language: 'typescript', monorepo: false, srcLayout: 'src/', + entryPoints: ['src/index.ts'], codeStyle: { + indent: 'spaces', indentSize: 2, quotes: 'single', + semicolons: true, trailingCommas: true, namingConvention: 'camelCase', + }, + fileNaming: 'kebab-case.ts', exportStyle: 'barrel', importStyle: 'relative', + errorHandling: 'JamError', logging: 'Logger', configPattern: 'cosmiconfig', + testFramework: 'vitest', testLocation: 'co-located', testNaming: '*.test.ts', + testStyle: 'describe/it', testCommand: 'npm test', commitConvention: 'conventional', + branchPattern: 'feat/*', packageManager: 'npm', typeChecker: 'tsc', + }), + formatProfileForPrompt: vi.fn().mockReturnValue('Mock profile'), +})); + +vi.mock('./planner.js', () => ({ + generateTaskPlan: vi.fn().mockResolvedValue({ + goal: 'Test goal', + subtasks: [ + { id: '1', description: 'First task', files: [{ path: 'src/a.ts', mode: 'create' }], estimatedRounds: 3 }, + ], + dependencyGraph: new Map([['1', []]]), + }), + estimateTokenCost: vi.fn().mockReturnValue(3000), +})); + +vi.mock('./worker.js', () => ({ + executeWorker: vi.fn().mockResolvedValue({ + subtaskId: '1', + status: 'completed', + filesChanged: [{ path: 'src/a.ts', action: 'created', diff: '' }], + summary: 'Created file', + tokensUsed: { promptTokens: 100, completionTokens: 50, totalTokens: 150 }, + }), +})); + +const mockAdapter = { + info: { name: 'mock', supportsStreaming: true, supportsTools: true }, + validateCredentials: vi.fn(), + streamCompletion: vi.fn(), + listModels: vi.fn(), + chatWithTools: vi.fn(), +} as unknown as ProviderAdapter; + +describe('Orchestrator', () => { + it('executes a single-subtask plan', async () => { + const orch = new Orchestrator({ + adapter: mockAdapter, + workspaceRoot: '/workspace', + toolSchemas: [], + executeTool: vi.fn(), + }); + + const result = await orch.execute('do something', { + mode: 'auto', + maxWorkers: 1, + }); + + expect(result.plan.goal).toBe('Test goal'); + expect(result.results).toHaveLength(1); + expect(result.results[0].status).toBe('completed'); + expect(result.filesChanged).toContain('src/a.ts'); + expect(result.totalTokens.totalTokens).toBeGreaterThan(0); + }); + + it('calls onProgress callbacks', async () => { + const events: string[] = []; + const orch = new Orchestrator({ + adapter: mockAdapter, + workspaceRoot: '/workspace', + toolSchemas: [], + executeTool: vi.fn(), + }); + + await orch.execute('do something', { + mode: 'auto', + maxWorkers: 1, + onProgress: (e) => events.push(e.type), + }); + + expect(events).toContain('plan-ready'); + expect(events).toContain('worker-started'); + expect(events).toContain('worker-completed'); + expect(events).toContain('all-done'); + }); + + it('respects abort signal', async () => { + const controller = new AbortController(); + controller.abort(); + const orch = new Orchestrator({ + adapter: mockAdapter, + workspaceRoot: '/workspace', + toolSchemas: [], + executeTool: vi.fn(), + }); + + const result = await orch.execute('do something', { + mode: 'auto', + maxWorkers: 1, + signal: controller.signal, + }); + + // Should complete without executing workers (aborted before dispatch) + expect(result.results).toHaveLength(0); + }); + + it('generates a summary string', async () => { + const orch = new Orchestrator({ + adapter: mockAdapter, + workspaceRoot: '/workspace', + toolSchemas: [], + executeTool: vi.fn(), + }); + + const result = await orch.execute('do something', { mode: 'auto', maxWorkers: 1 }); + expect(result.summary).toContain('1:'); + expect(result.summary).toContain('completed'); + }); +}); diff --git a/src/agent/orchestrator.ts b/src/agent/orchestrator.ts new file mode 100644 index 0000000..1eddd1e --- /dev/null +++ b/src/agent/orchestrator.ts @@ -0,0 +1,159 @@ +import type { ProviderAdapter, TokenUsage, ToolDefinition } from '../providers/base.js'; +import type { TaskPlan, WorkerResult, AgentMode } from './types.js'; +import { topologicalSort } from './types.js'; +import { generateTaskPlan, estimateTokenCost } from './planner.js'; +import { buildWorkspaceProfile } from './workspace-intel.js'; +import { executeWorker } from './worker.js'; +import { FileLockManager } from './file-lock.js'; +import { ProviderPool } from './provider-pool.js'; + +export interface OrchestratorDeps { + adapter: ProviderAdapter; + workspaceRoot: string; + toolSchemas: ToolDefinition[]; + executeTool: (name: string, args: Record) => Promise; +} + +export interface OrchestratorOptions { + mode: AgentMode; + maxWorkers: number; + images?: string[]; // image file paths + signal?: AbortSignal; + onProgress?: (event: ProgressEvent) => void; +} + +export interface ProgressEvent { + type: 'plan-ready' | 'worker-started' | 'worker-completed' | 'worker-failed' | 'all-done'; + subtaskId?: string; + message: string; +} + +export interface OrchestratorResult { + plan: TaskPlan; + results: WorkerResult[]; + totalTokens: TokenUsage; + filesChanged: string[]; + summary: string; +} + +export class Orchestrator { + private deps: OrchestratorDeps; + + constructor(deps: OrchestratorDeps) { + this.deps = deps; + } + + async execute(prompt: string, options: OrchestratorOptions): Promise { + const { adapter, workspaceRoot, toolSchemas, executeTool } = this.deps; + const signal = options.signal ?? AbortSignal.timeout(600000); // 10min default + + // 1. Build workspace profile + const profile = await buildWorkspaceProfile(workspaceRoot); + + // 2. Generate task plan + const plan = await generateTaskPlan(adapter, prompt, profile); + options.onProgress?.({ type: 'plan-ready', message: `Plan: ${plan.goal} (${plan.subtasks.length} subtasks)` }); + + // 3. Estimate token cost + const _estimatedCost = estimateTokenCost(plan); + + // 4. Set up infrastructure + const pool = new ProviderPool(adapter, options.maxWorkers); + const fileLock = new FileLockManager(); + + // Assign file ownership from plan + for (const subtask of plan.subtasks) { + fileLock.assignOwnership(subtask.id, subtask.files); + } + + // 5. Walk dependency graph topologically + const order = topologicalSort(plan.dependencyGraph); + const results: WorkerResult[] = []; + const completedSummaries = new Map(); // subtaskId -> summary + + // Track round estimates for adaptive adjustment + const _estimateDrift = 1.0; // multiplier + + // 6. Dispatch workers respecting dependencies + for (const subtaskId of order) { + if (signal.aborted) break; + + const subtask = plan.subtasks.find(s => s.id === subtaskId); + if (!subtask) continue; + + // Build context from completed dependencies + const deps = plan.dependencyGraph.get(subtaskId) ?? []; + const priorSummaries = deps + .map(d => completedSummaries.get(d)) + .filter(Boolean) + .join('\n'); + const priorFiles = results + .filter(r => deps.includes(r.subtaskId)) + .flatMap(r => r.filesChanged.map(f => f.path)); + + const context = { + priorSummary: priorSummaries, + filesAvailable: priorFiles, + planReminder: `You are on subtask ${subtask.id} of ${plan.subtasks.length}: ${subtask.description}`, + }; + + options.onProgress?.({ type: 'worker-started', subtaskId, message: `Starting: ${subtask.description}` }); + + // Acquire provider lease + const lease = await pool.acquire(); + + try { + const result = await executeWorker(subtask, context, signal, { + lease, + workspaceRoot, + workspaceProfile: profile, + toolSchemas, + executeTool, + }); + + // Track token usage + pool.addTokenUsage(result.tokensUsed); + + if (result.status === 'completed') { + completedSummaries.set(subtaskId, result.summary); + options.onProgress?.({ type: 'worker-completed', subtaskId, message: `Done: ${subtask.description}` }); + } else if (result.status === 'failed') { + // Retry once + options.onProgress?.({ type: 'worker-failed', subtaskId, message: `Failed: ${result.error}. Retrying...` }); + const retryResult = await executeWorker(subtask, context, signal, { + lease, + workspaceRoot, + workspaceProfile: profile, + toolSchemas, + executeTool, + }); + pool.addTokenUsage(retryResult.tokensUsed); + results.push(retryResult); + if (retryResult.status === 'completed') { + completedSummaries.set(subtaskId, retryResult.summary); + } + lease.release(); + continue; + } + + results.push(result); + } finally { + lease.release(); + fileLock.releaseAll(subtaskId); + } + } + + // 7. Build summary + const totalTokens = pool.getTotalTokens(); + const allFiles = results.flatMap(r => r.filesChanged.map(f => f.path)); + const uniqueFiles = [...new Set(allFiles)]; + + const summary = results + .map(r => `- ${r.subtaskId}: ${r.status} — ${r.summary}`) + .join('\n'); + + options.onProgress?.({ type: 'all-done', message: `Completed ${results.filter(r => r.status === 'completed').length}/${plan.subtasks.length} subtasks` }); + + return { plan, results, totalTokens, filesChanged: uniqueFiles, summary }; + } +} diff --git a/src/agent/permissions.test.ts b/src/agent/permissions.test.ts new file mode 100644 index 0000000..d6ed44b --- /dev/null +++ b/src/agent/permissions.test.ts @@ -0,0 +1,344 @@ +import { describe, it, expect, beforeEach } from 'vitest'; +import { classifyCommand, PermissionClassifier, ApprovalTracker, isHardBlocked } from './permissions.js'; + +describe('classifyCommand', () => { + // Safe commands + it('classifies ls as safe', () => { + expect(classifyCommand('ls')).toBe('safe'); + expect(classifyCommand('ls -la')).toBe('safe'); + }); + + it('classifies cat as safe', () => { + expect(classifyCommand('cat file.txt')).toBe('safe'); + }); + + it('classifies head and tail as safe', () => { + expect(classifyCommand('head -n 10 file.txt')).toBe('safe'); + expect(classifyCommand('tail -f app.log')).toBe('safe'); + }); + + it('classifies wc as safe', () => { + expect(classifyCommand('wc -l src/index.ts')).toBe('safe'); + }); + + it('classifies echo as safe', () => { + expect(classifyCommand('echo hello')).toBe('safe'); + }); + + it('classifies git status as safe', () => { + expect(classifyCommand('git status')).toBe('safe'); + expect(classifyCommand('git status --short')).toBe('safe'); + }); + + it('classifies git diff as safe', () => { + expect(classifyCommand('git diff')).toBe('safe'); + expect(classifyCommand('git diff HEAD~1')).toBe('safe'); + }); + + it('classifies git log as safe', () => { + expect(classifyCommand('git log --oneline -10')).toBe('safe'); + }); + + it('classifies git show as safe', () => { + expect(classifyCommand('git show HEAD')).toBe('safe'); + }); + + it('classifies git branch as safe (list only)', () => { + expect(classifyCommand('git branch')).toBe('safe'); + expect(classifyCommand('git branch -v')).toBe('safe'); + }); + + it('classifies git tag as safe', () => { + expect(classifyCommand('git tag')).toBe('safe'); + }); + + it('classifies git remote as safe', () => { + expect(classifyCommand('git remote -v')).toBe('safe'); + }); + + it('classifies git rev-parse as safe', () => { + expect(classifyCommand('git rev-parse HEAD')).toBe('safe'); + }); + + it('classifies npm test as safe', () => { + expect(classifyCommand('npm test')).toBe('safe'); + expect(classifyCommand('npm test -- --watch')).toBe('safe'); + }); + + it('classifies npx vitest/jest/tsc/eslint/prettier as safe', () => { + expect(classifyCommand('npx vitest run')).toBe('safe'); + expect(classifyCommand('npx jest')).toBe('safe'); + expect(classifyCommand('npx tsc --noEmit')).toBe('safe'); + expect(classifyCommand('npx eslint src/')).toBe('safe'); + expect(classifyCommand('npx prettier --check .')).toBe('safe'); + }); + + it('classifies node as safe', () => { + expect(classifyCommand('node script.js')).toBe('safe'); + }); + + it('classifies deno as safe', () => { + expect(classifyCommand('deno run mod.ts')).toBe('safe'); + }); + + it('classifies bun test/run as safe', () => { + expect(classifyCommand('bun test')).toBe('safe'); + expect(classifyCommand('bun run build')).toBe('safe'); + }); + + it('classifies cargo test as safe', () => { + expect(classifyCommand('cargo test')).toBe('safe'); + }); + + it('classifies go test as safe', () => { + expect(classifyCommand('go test ./...')).toBe('safe'); + }); + + it('classifies python -m pytest as safe', () => { + expect(classifyCommand('python -m pytest tests/')).toBe('safe'); + }); + + it('classifies pwd, whoami, date, which, env as safe', () => { + expect(classifyCommand('pwd')).toBe('safe'); + expect(classifyCommand('whoami')).toBe('safe'); + expect(classifyCommand('date')).toBe('safe'); + expect(classifyCommand('which node')).toBe('safe'); + expect(classifyCommand('env')).toBe('safe'); + }); + + it('classifies find, grep, rg, fd as safe', () => { + expect(classifyCommand('find . -name "*.ts"')).toBe('safe'); + expect(classifyCommand('grep -r TODO src/')).toBe('safe'); + expect(classifyCommand('rg "pattern" .')).toBe('safe'); + expect(classifyCommand('fd ".ts$"')).toBe('safe'); + }); + + // Moderate commands + it('classifies npm install as moderate', () => { + expect(classifyCommand('npm install lodash')).toBe('moderate'); + }); + + it('classifies git add as moderate', () => { + expect(classifyCommand('git add .')).toBe('moderate'); + expect(classifyCommand('git add src/index.ts')).toBe('moderate'); + }); + + it('classifies git commit as moderate', () => { + expect(classifyCommand('git commit -m "fix: bug"')).toBe('moderate'); + }); + + it('classifies mkdir as moderate', () => { + expect(classifyCommand('mkdir -p src/utils')).toBe('moderate'); + }); + + it('classifies rm of a single file as moderate', () => { + expect(classifyCommand('rm file.txt')).toBe('moderate'); + }); + + it('classifies curl as moderate', () => { + expect(classifyCommand('curl https://example.com')).toBe('moderate'); + }); + + // Dangerous commands + it('classifies rm -rf as dangerous', () => { + expect(classifyCommand('rm -rf node_modules')).toBe('dangerous'); + expect(classifyCommand('rm -fr dist')).toBe('dangerous'); + }); + + it('classifies rm -f as dangerous', () => { + expect(classifyCommand('rm -f important.txt')).toBe('dangerous'); + }); + + it('classifies git push as dangerous', () => { + expect(classifyCommand('git push')).toBe('dangerous'); + expect(classifyCommand('git push origin main')).toBe('dangerous'); + }); + + it('classifies git reset as dangerous', () => { + expect(classifyCommand('git reset --hard HEAD~1')).toBe('dangerous'); + expect(classifyCommand('git reset HEAD')).toBe('dangerous'); + }); + + it('classifies git rebase as dangerous', () => { + expect(classifyCommand('git rebase main')).toBe('dangerous'); + }); + + it('classifies git checkout -- as dangerous', () => { + expect(classifyCommand('git checkout -- .')).toBe('dangerous'); + }); + + it('classifies git branch -d/-D as dangerous', () => { + expect(classifyCommand('git branch -d feature/old')).toBe('dangerous'); + expect(classifyCommand('git branch -D feature/old')).toBe('dangerous'); + }); + + it('classifies chmod as dangerous', () => { + expect(classifyCommand('chmod +x script.sh')).toBe('dangerous'); + }); + + it('classifies chown as dangerous', () => { + expect(classifyCommand('chown user:group file')).toBe('dangerous'); + }); + + it('classifies piped commands as dangerous', () => { + expect(classifyCommand('echo "x" | bash')).toBe('dangerous'); + expect(classifyCommand('cat file.txt | wc -l')).toBe('dangerous'); + }); +}); + +describe('PermissionClassifier', () => { + it('classifies normally with no overrides', () => { + const classifier = new PermissionClassifier({ safe: [], dangerous: [] }); + expect(classifier.classify('ls -la')).toBe('safe'); + expect(classifier.classify('npm install')).toBe('moderate'); + expect(classifier.classify('git push')).toBe('dangerous'); + }); + + it('custom safe override promotes a command to safe', () => { + const classifier = new PermissionClassifier({ + safe: ['npm install'], + dangerous: [], + }); + expect(classifier.classify('npm install lodash')).toBe('safe'); + }); + + it('custom dangerous override demotes a command to dangerous', () => { + const classifier = new PermissionClassifier({ + safe: [], + dangerous: ['curl'], + }); + expect(classifier.classify('curl https://example.com')).toBe('dangerous'); + }); + + it('custom dangerous override takes precedence over custom safe override', () => { + const classifier = new PermissionClassifier({ + safe: ['npm'], + dangerous: ['npm install'], + }); + // dangerous checked before safe — npm install is dangerous + expect(classifier.classify('npm install react')).toBe('dangerous'); + // npm test is safe (no dangerous prefix match, safe prefix matches 'npm') + expect(classifier.classify('npm test')).toBe('safe'); + }); + + it('hard-block cannot be overridden by custom safe list', () => { + const classifier = new PermissionClassifier({ + safe: ['sudo', 'reboot'], + dangerous: [], + }); + expect(classifier.classify('sudo rm -rf /')).toBe('blocked'); + expect(classifier.classify('reboot')).toBe('blocked'); + }); + + it('hard-block cannot be overridden by custom dangerous list either', () => { + const classifier = new PermissionClassifier({ + safe: [], + dangerous: ['sudo'], + }); + expect(classifier.classify('sudo apt-get install vim')).toBe('blocked'); + }); + + it('returns blocked for hard-blocked commands', () => { + const classifier = new PermissionClassifier({ safe: [], dangerous: [] }); + expect(classifier.classify('sudo apt install vim')).toBe('blocked'); + expect(classifier.classify('mkfs /dev/sda')).toBe('blocked'); + }); +}); + +describe('isHardBlocked', () => { + it('blocks sudo', () => { + expect(isHardBlocked('sudo rm -rf /')).toBe(true); + expect(isHardBlocked('sudo apt install vim')).toBe(true); + }); + + it('blocks su -', () => { + expect(isHardBlocked('su -')).toBe(true); + expect(isHardBlocked('su')).toBe(true); + }); + + it('blocks mkfs', () => { + expect(isHardBlocked('mkfs /dev/sda1')).toBe(true); + }); + + it('blocks dd', () => { + expect(isHardBlocked('dd if=/dev/zero of=/dev/sda')).toBe(true); + }); + + it('blocks chmod 777 /', () => { + expect(isHardBlocked('chmod 777 /')).toBe(true); + }); + + it('blocks shutdown', () => { + expect(isHardBlocked('shutdown now')).toBe(true); + expect(isHardBlocked('shutdown -h now')).toBe(true); + }); + + it('blocks reboot', () => { + expect(isHardBlocked('reboot')).toBe(true); + }); + + it('blocks rm -rf /', () => { + expect(isHardBlocked('rm -rf /')).toBe(true); + expect(isHardBlocked('rm -fr /')).toBe(true); + }); + + it('does not block normal commands', () => { + expect(isHardBlocked('ls -la')).toBe(false); + expect(isHardBlocked('git status')).toBe(false); + expect(isHardBlocked('npm install')).toBe(false); + expect(isHardBlocked('rm -rf node_modules')).toBe(false); + expect(isHardBlocked('chmod +x script.sh')).toBe(false); + }); +}); + +describe('ApprovalTracker', () => { + let tracker: ApprovalTracker; + + beforeEach(() => { + tracker = new ApprovalTracker(); + }); + + it('returns false for a command that has not been approved', () => { + expect(tracker.isApproved('git push origin main')).toBe(false); + }); + + it('returns true after approve() is called', () => { + tracker.approve('git push origin main'); + expect(tracker.isApproved('git push origin main')).toBe(true); + }); + + it('normalizes to first 2 words — different args match same type', () => { + tracker.approve('git push origin main'); + expect(tracker.isApproved('git push upstream feature/x')).toBe(true); + }); + + it('does not cross-match different 2-word types', () => { + tracker.approve('git push origin main'); + expect(tracker.isApproved('git commit -m "msg"')).toBe(false); + }); + + it('single-word commands normalize correctly', () => { + // 'reboot' has 1 word → key is "reboot" + tracker.approve('reboot'); + expect(tracker.isApproved('reboot')).toBe(true); + // 'reboot --force' has 2 words → key is "reboot --force", different from "reboot" + expect(tracker.isApproved('reboot --force')).toBe(false); + // approve the 2-word form separately + tracker.approve('reboot --force'); + expect(tracker.isApproved('reboot --force')).toBe(true); + }); + + it('tracks multiple approved types independently', () => { + tracker.approve('npm install lodash'); + tracker.approve('git push origin main'); + expect(tracker.isApproved('npm install react')).toBe(true); + expect(tracker.isApproved('git push upstream dev')).toBe(true); + expect(tracker.isApproved('npm run build')).toBe(false); + }); + + it('approving again is idempotent', () => { + tracker.approve('git push origin main'); + tracker.approve('git push upstream dev'); + expect(tracker.isApproved('git push')).toBe(true); + }); +}); diff --git a/src/agent/permissions.ts b/src/agent/permissions.ts new file mode 100644 index 0000000..7e41178 --- /dev/null +++ b/src/agent/permissions.ts @@ -0,0 +1,160 @@ +import type { PermissionTier } from './types.js'; + +// ── Regex patterns for classification ──────────────────────────────────────── + +const SAFE_PATTERNS: RegExp[] = [ + // File inspection + /^ls(\s|$)/, + /^cat(\s|$)/, + /^head(\s|$)/, + /^tail(\s|$)/, + /^wc(\s|$)/, + /^echo(\s|$)/, + /^pwd(\s|$)/, + /^whoami(\s|$)/, + /^date(\s|$)/, + /^which(\s|$)/, + /^env(\s|$)/, + /^find(\s|$)/, + /^grep(\s|$)/, + /^rg(\s|$)/, + /^fd(\s|$)/, + // Git read-only + /^git\s+status(\s|$)/, + /^git\s+diff(\s|$)/, + /^git\s+log(\s|$)/, + /^git\s+show(\s|$)/, + /^git\s+branch(\s|$)/, + /^git\s+tag(\s|$)/, + /^git\s+remote(\s|$)/, + /^git\s+rev-parse(\s|$)/, + // Test runners + /^npm\s+test(\s|$)/, + /^npx\s+vitest(\s|$)/, + /^npx\s+jest(\s|$)/, + /^npx\s+tsc(\s|$)/, + /^npx\s+eslint(\s|$)/, + /^npx\s+prettier(\s|$)/, + // Runtimes (read/run only) + /^node(\s|$)/, + /^deno(\s|$)/, + /^bun\s+test(\s|$)/, + /^bun\s+run(\s|$)/, + /^cargo\s+test(\s|$)/, + /^go\s+test(\s|$)/, + /^python\s+-m\s+pytest(\s|$)/, +]; + +const DANGEROUS_PATTERNS: RegExp[] = [ + // Destructive rm + /\brm\b.*-[a-zA-Z]*[rf][a-zA-Z]*/, + /\brm\b.*--recursive/, + /\brm\b.*--force/, + // Dangerous git + /^git\s+push(\s|$)/, + /^git\s+reset(\s|$)/, + /^git\s+rebase(\s|$)/, + /^git\s+push\s+.*--force/, + /^git\s+checkout\s+--(\s|$)/, + /^git\s+branch\s+(-d|-D)(\s|$)/, + // Permission changes + /^chmod(\s|$)/, + /^chown(\s|$)/, + // Privilege escalation + /^sudo(\s|$)/, + /^su\s+-(\s|$)/, + /^su\s*$/, + // Piped commands + /\|/, +]; + +// ── Hard-block patterns (unoverridable safety floor) ───────────────────────── + +const HARD_BLOCK_PATTERNS: RegExp[] = [ + /\brm\b.*-[a-zA-Z]*r[a-zA-Z]*f[a-zA-Z]*\s*\//, // rm -rf / + /\brm\b.*-[a-zA-Z]*f[a-zA-Z]*r[a-zA-Z]*\s*\//, // rm -fr / + /^sudo(\s|$)/, + /^su\s+-(\s|$)/, + /^su\s*$/, + /^mkfs(\s|$)/, + /^dd(\s|$)/, + /^chmod\s+777\s+\//, + /^shutdown(\s|$)/, + /^reboot(\s|$)/, +]; + +// ── Public API ──────────────────────────────────────────────────────────────── + +/** + * Check whether a command is unconditionally blocked regardless of user config. + */ +export function isHardBlocked(command: string): boolean { + const trimmed = command.trim(); + return HARD_BLOCK_PATTERNS.some((pattern) => pattern.test(trimmed)); +} + +/** + * Classify a shell command into safe / moderate / dangerous using default patterns. + */ +export function classifyCommand(command: string): PermissionTier { + const trimmed = command.trim(); + + if (DANGEROUS_PATTERNS.some((p) => p.test(trimmed))) return 'dangerous'; + if (SAFE_PATTERNS.some((p) => p.test(trimmed))) return 'safe'; + return 'moderate'; +} + +/** + * Permission classifier with custom per-session overrides. + * Override lists use string startsWith matching and take precedence over + * the default patterns, but hard-blocks always win. + */ +export class PermissionClassifier { + private safeOverrides: string[]; + private dangerousOverrides: string[]; + + constructor(overrides: { safe: string[]; dangerous: string[] }) { + this.safeOverrides = overrides.safe; + this.dangerousOverrides = overrides.dangerous; + } + + classify(command: string): PermissionTier | 'blocked' { + const trimmed = command.trim(); + + // Hard-block check first — cannot be overridden + if (isHardBlocked(trimmed)) return 'blocked'; + + // Custom dangerous overrides + if (this.dangerousOverrides.some((prefix) => trimmed.startsWith(prefix))) { + return 'dangerous'; + } + + // Custom safe overrides + if (this.safeOverrides.some((prefix) => trimmed.startsWith(prefix))) { + return 'safe'; + } + + // Fall back to default classifier + return classifyCommand(trimmed); + } +} + +/** + * Tracks session-level approvals for "confirm once per type" in auto mode. + * Command type is the first 2 words (e.g. "git push origin main" → "git push"). + */ +export class ApprovalTracker { + private approved = new Set(); + + private normalize(command: string): string { + return command.trim().split(/\s+/).slice(0, 2).join(' '); + } + + isApproved(command: string): boolean { + return this.approved.has(this.normalize(command)); + } + + approve(command: string): void { + this.approved.add(this.normalize(command)); + } +} diff --git a/src/agent/planner.test.ts b/src/agent/planner.test.ts new file mode 100644 index 0000000..9bcfd8f --- /dev/null +++ b/src/agent/planner.test.ts @@ -0,0 +1,115 @@ +import { describe, it, expect, vi, type Mock } from 'vitest'; +import { generateTaskPlan, estimateTokenCost } from './planner.js'; +import type { WorkspaceProfile, TaskPlan } from './types.js'; +import type { ProviderAdapter } from '../providers/base.js'; + +const mockProfile: WorkspaceProfile = { + language: 'typescript', monorepo: false, srcLayout: 'src/', + entryPoints: ['src/index.ts'], codeStyle: { + indent: 'spaces', indentSize: 2, quotes: 'single', + semicolons: true, trailingCommas: true, namingConvention: 'camelCase', + }, + fileNaming: 'kebab-case.ts', exportStyle: 'barrel', importStyle: 'relative', + errorHandling: 'JamError', logging: 'Logger', configPattern: 'cosmiconfig', + testFramework: 'vitest', testLocation: 'co-located', testNaming: '*.test.ts', + testStyle: 'describe/it', testCommand: 'npm test', commitConvention: 'conventional', + branchPattern: 'feat/*', packageManager: 'npm', typeChecker: 'tsc', +}; + +// Mock adapter that returns a valid plan JSON +const makeAdapter = (responseContent: string) => ({ + info: { name: 'mock', supportsStreaming: true, supportsTools: true }, + validateCredentials: vi.fn(), + streamCompletion: vi.fn(), + listModels: vi.fn(), + chatWithTools: vi.fn().mockResolvedValue({ + content: responseContent, + toolCalls: [], + }), +} as unknown as ProviderAdapter); + +describe('generateTaskPlan', () => { + it('parses a valid single-subtask plan', async () => { + const json = JSON.stringify({ + goal: 'Fix the auth bug', + subtasks: [{ + id: '1', description: 'Fix login validation', + files: [{ path: 'src/auth.ts', mode: 'modify' }], + estimatedRounds: 10, + }], + dependencies: {}, + }); + const adapter = makeAdapter(json); + const plan = await generateTaskPlan(adapter, 'fix auth bug', mockProfile); + expect(plan.goal).toBe('Fix the auth bug'); + expect(plan.subtasks).toHaveLength(1); + expect(plan.subtasks[0].id).toBe('1'); + }); + + it('parses JSON wrapped in code fences', async () => { + const json = '```json\n' + JSON.stringify({ + goal: 'Add feature', + subtasks: [{ id: '1', description: 'Do thing', files: [], estimatedRounds: 5 }], + dependencies: {}, + }) + '\n```'; + const adapter = makeAdapter(json); + const plan = await generateTaskPlan(adapter, 'add feature', mockProfile); + expect(plan.subtasks).toHaveLength(1); + }); + + it('parses multi-subtask plan with dependencies', async () => { + const json = JSON.stringify({ + goal: 'Add user API', + subtasks: [ + { id: '1', description: 'Create model', files: [{ path: 'src/model.ts', mode: 'create' }], estimatedRounds: 8 }, + { id: '2', description: 'Create routes', files: [{ path: 'src/routes.ts', mode: 'create' }], estimatedRounds: 12 }, + { id: '3', description: 'Write tests', files: [{ path: 'src/routes.test.ts', mode: 'create' }], estimatedRounds: 10 }, + ], + dependencies: { '2': ['1'], '3': ['2'] }, + }); + const adapter = makeAdapter(json); + const plan = await generateTaskPlan(adapter, 'add user API', mockProfile); + expect(plan.subtasks).toHaveLength(3); + expect(plan.dependencyGraph.get('2')).toEqual(['1']); + expect(plan.dependencyGraph.get('3')).toEqual(['2']); + }); + + it('throws AGENT_PLAN_FAILED on empty response', async () => { + const adapter = makeAdapter(''); + // chatWithTools returns empty content + (adapter.chatWithTools as unknown as Mock).mockResolvedValue({ content: '', toolCalls: [] }); + await expect(generateTaskPlan(adapter, 'test', mockProfile)).rejects.toThrow('AGENT_PLAN_FAILED'); + }); + + it('throws AGENT_PLAN_FAILED on invalid JSON', async () => { + const adapter = makeAdapter('not json at all'); + await expect(generateTaskPlan(adapter, 'test', mockProfile)).rejects.toThrow(); + }); + + it('throws AGENT_PLAN_CYCLE on cyclic dependencies', async () => { + const json = JSON.stringify({ + goal: 'Cyclic', + subtasks: [ + { id: '1', description: 'A', files: [], estimatedRounds: 5 }, + { id: '2', description: 'B', files: [], estimatedRounds: 5 }, + ], + dependencies: { '1': ['2'], '2': ['1'] }, + }); + const adapter = makeAdapter(json); + await expect(generateTaskPlan(adapter, 'test', mockProfile)).rejects.toThrow('AGENT_PLAN_CYCLE'); + }); +}); + +describe('estimateTokenCost', () => { + it('estimates based on total rounds', () => { + const plan: TaskPlan = { + goal: 'test', + subtasks: [ + { id: '1', description: '', files: [], estimatedRounds: 10 }, + { id: '2', description: '', files: [], estimatedRounds: 15 }, + ], + dependencyGraph: new Map([['1', []], ['2', ['1']]]), + }; + expect(estimateTokenCost(plan)).toBe(25000); // 25 rounds * 1000 + }); +}); diff --git a/src/agent/planner.ts b/src/agent/planner.ts new file mode 100644 index 0000000..fbbe2fa --- /dev/null +++ b/src/agent/planner.ts @@ -0,0 +1,134 @@ +import type { ProviderAdapter, Message } from '../providers/base.js'; +import type { TaskPlan, Subtask, WorkspaceProfile } from './types.js'; +import { validateDAG } from './types.js'; +import { JamError } from '../utils/errors.js'; +import { formatProfileForPrompt } from './workspace-intel.js'; + +interface PlannerOptions { + model?: string; + temperature?: number; + maxTokens?: number; +} + +export async function generateTaskPlan( + adapter: ProviderAdapter, + prompt: string, + profile: WorkspaceProfile, + options?: PlannerOptions, +): Promise { + const profileContext = formatProfileForPrompt(profile); + + const systemPrompt = `You are a task planner for an AI coding agent. Given a user task and workspace context, decompose it into subtasks with a dependency graph. + +Workspace context: +${profileContext} + +Respond with ONLY valid JSON matching this schema: +{ + "goal": "one sentence description", + "subtasks": [ + { + "id": "1", + "description": "what to do", + "files": [{ "path": "src/file.ts", "mode": "create" | "modify" | "read-only" }], + "estimatedRounds": 10, + "validationCommand": "npm test -- --grep pattern" // optional + } + ], + "dependencies": { "2": ["1"], "3": ["2"] } // subtaskId -> [prerequisite IDs] +} + +Rules: +- Each subtask should be a focused unit of work +- Files should list ALL files the subtask will touch +- Dependencies must form a DAG (no cycles) +- Use "read-only" mode for files that are only referenced +- estimatedRounds: typically 5-15 for simple, 15-25 for complex +- For simple single-file tasks, output a single subtask with no dependencies`; + + const messages: Message[] = [ + { role: 'user', content: prompt }, + ]; + + // Call provider + const response = adapter.chatWithTools + ? await adapter.chatWithTools(messages, [], { + model: options?.model, + temperature: options?.temperature ?? 0.3, + maxTokens: options?.maxTokens ?? 2000, + systemPrompt, + }) + : null; + + if (!response?.content) { + throw new JamError('AGENT_PLAN_FAILED: Planner received empty response', 'AGENT_PLAN_FAILED'); + } + + // Parse JSON from response (may be wrapped in markdown code fences) + const jsonStr = extractJSON(response.content); + let parsed: unknown; + try { + parsed = JSON.parse(jsonStr); + } catch { + throw new JamError('Planner returned invalid JSON', 'AGENT_PLAN_FAILED'); + } + + // Type guard for the parsed object + const obj = parsed as Record; + + // Build TaskPlan + const rawSubtasks = (Array.isArray(obj.subtasks) ? obj.subtasks : []) as Array>; + const subtasks: Subtask[] = rawSubtasks.map((s) => { + const rawFiles = (Array.isArray(s.files) ? s.files : []) as Array>; + return { + id: String(s.id), + description: String(s.description ?? ''), + files: rawFiles.map((f) => ({ + path: String(f.path), + mode: f.mode === 'create' || f.mode === 'modify' || f.mode === 'read-only' + ? f.mode : 'read-only' as const, + })), + estimatedRounds: Number(s.estimatedRounds) || 10, + validationCommand: s.validationCommand ? String(s.validationCommand) : undefined, + }; + }); + + const rawDeps = (obj.dependencies ?? {}) as Record; + const depGraph = new Map(); + for (const st of subtasks) { + depGraph.set(st.id, rawDeps[st.id] ?? []); + } + + // Validate DAG + const cycle = validateDAG(depGraph); + if (cycle) { + // Re-prompt once with no-cycles constraint + // For now, just throw + throw new JamError( + `AGENT_PLAN_CYCLE: Plan has circular dependencies: ${cycle.join(' → ')}`, + 'AGENT_PLAN_CYCLE', + ); + } + + return { goal: String(obj.goal ?? prompt), subtasks, dependencyGraph: depGraph }; +} + +/** Extract JSON from a string that may be wrapped in markdown code fences */ +function extractJSON(text: string): string { + // Try to find JSON in code fences + const fenceMatch = text.match(/```(?:json)?\s*\n?([\s\S]*?)\n?```/); + if (fenceMatch?.[1]) return fenceMatch[1].trim(); + + // Try to find raw JSON object + const objMatch = text.match(/\{[\s\S]*\}/); + if (objMatch) return objMatch[0]; + + return text.trim(); +} + +/** Estimate token cost for a plan */ +export function estimateTokenCost(plan: TaskPlan): number { + // Rough estimate: each round uses ~1000 tokens (prompt + completion) + const totalRounds = plan.subtasks.reduce((sum, s) => sum + s.estimatedRounds, 0); + return totalRounds * 1000; +} diff --git a/src/agent/progress.test.ts b/src/agent/progress.test.ts new file mode 100644 index 0000000..a0ebadc --- /dev/null +++ b/src/agent/progress.test.ts @@ -0,0 +1,58 @@ +import { describe, it, expect } from 'vitest'; +import { ProgressReporter } from './progress.js'; + +describe('ProgressReporter', () => { + it('formats worker output with prefixes', () => { + const output: string[] = []; + const reporter = new ProgressReporter('default', (msg) => output.push(msg)); + + reporter.onEvent({ type: 'plan-ready', message: 'Plan: Add API (2 subtasks)' }); + reporter.onEvent({ type: 'worker-started', subtaskId: '1', message: 'Starting: Create model' }); + reporter.onEvent({ type: 'worker-completed', subtaskId: '1', message: 'Done: Create model' }); + + expect(output.some(o => o.includes('[Worker 1]'))).toBe(true); + expect(output.some(o => o.includes('subtasks complete'))).toBe(true); + }); + + it('suppresses output in quiet mode', () => { + const output: string[] = []; + const reporter = new ProgressReporter('quiet', (msg) => output.push(msg)); + + reporter.onEvent({ type: 'plan-ready', message: 'Plan' }); + reporter.onEvent({ type: 'worker-started', subtaskId: '1', message: 'Starting' }); + + expect(output).toHaveLength(0); + }); + + it('collects events for json mode', () => { + const reporter = new ProgressReporter('json', () => {}); + + reporter.onEvent({ type: 'plan-ready', message: 'Plan' }); + reporter.onEvent({ type: 'worker-started', subtaskId: '1', message: 'Starting' }); + + const results = reporter.getJsonResults(); + expect(results).toHaveLength(2); + expect(results[0].type).toBe('plan-ready'); + }); + + it('shows status bar after worker completion', () => { + const output: string[] = []; + const reporter = new ProgressReporter('default', (msg) => output.push(msg)); + + reporter.onEvent({ type: 'plan-ready', message: 'Plan: Test (3 subtasks)' }); + reporter.onEvent({ type: 'worker-started', subtaskId: '1', message: 'Starting' }); + reporter.updateTokenCount(1500); + reporter.onEvent({ type: 'worker-completed', subtaskId: '1', message: 'Done' }); + + const statusBar = output.find(o => o.includes('1/3')); + expect(statusBar).toBeDefined(); + expect(statusBar).toContain('1,500 tokens'); + }); + + it('renders all-done event', () => { + const output: string[] = []; + const reporter = new ProgressReporter('default', (msg) => output.push(msg)); + reporter.onEvent({ type: 'all-done', message: 'Completed 2/2 subtasks' }); + expect(output.some(o => o.includes('Completed 2/2'))).toBe(true); + }); +}); diff --git a/src/agent/progress.ts b/src/agent/progress.ts new file mode 100644 index 0000000..b410169 --- /dev/null +++ b/src/agent/progress.ts @@ -0,0 +1,81 @@ +import type { ProgressEvent } from './orchestrator.js'; + +export type OutputMode = 'interactive' | 'default' | 'quiet' | 'json'; + +export class ProgressReporter { + private mode: OutputMode; + private write: (msg: string) => void; + private events: ProgressEvent[] = []; + private completed = 0; + private total = 0; + private activeWorkers = 0; + private totalTokens = 0; + + constructor(mode: OutputMode, write: (msg: string) => void = (msg) => process.stderr.write(msg)) { + this.mode = mode; + this.write = write; + } + + onEvent(event: ProgressEvent): void { + this.events.push(event); + + if (this.mode === 'quiet') return; + if (this.mode === 'json') return; // collected, rendered at end + + switch (event.type) { + case 'plan-ready': { + // Extract total from message + const match = event.message.match(/(\d+) subtasks/); + if (match?.[1]) this.total = parseInt(match[1], 10); + this.write(`\n${this.formatPlanReady(event.message)}\n`); + break; + } + case 'worker-started': + this.activeWorkers++; + this.write(`${this.formatWorkerPrefix(event.subtaskId)} ${event.message}\n`); + break; + case 'worker-completed': + this.activeWorkers--; + this.completed++; + this.write(`${this.formatWorkerPrefix(event.subtaskId)} ${event.message}\n`); + this.write(this.formatStatusBar() + '\n'); + break; + case 'worker-failed': + this.activeWorkers--; + this.write(`${this.formatWorkerPrefix(event.subtaskId)} ${event.message}\n`); + break; + case 'all-done': + this.write(`\n${event.message}\n`); + break; + } + } + + updateTokenCount(tokens: number): void { + this.totalTokens = tokens; + } + + /** For --json mode: return all events as structured data */ + getJsonResults(): ProgressEvent[] { + return [...this.events]; + } + + private formatWorkerPrefix(subtaskId?: string): string { + return subtaskId ? `[Worker ${subtaskId}]` : '[Agent]'; + } + + private formatPlanReady(message: string): string { + return `--- ${message} ---`; + } + + private formatStatusBar(): string { + return `[${this.completed}/${this.total} subtasks complete | ${this.activeWorkers} active | ${this.totalTokens.toLocaleString()} tokens]`; + } +} + +/** Create a ProgressReporter from CLI options */ +export function createProgressReporter(options: { quiet?: boolean; json?: boolean }): ProgressReporter { + if (options.json) return new ProgressReporter('json'); + if (options.quiet) return new ProgressReporter('quiet'); + const isTTY = process.stdout.isTTY; + return new ProgressReporter(isTTY ? 'interactive' : 'default'); +} diff --git a/src/agent/provider-pool.test.ts b/src/agent/provider-pool.test.ts new file mode 100644 index 0000000..0e2d12e --- /dev/null +++ b/src/agent/provider-pool.test.ts @@ -0,0 +1,74 @@ +import { describe, it, expect, vi } from 'vitest'; +import { ProviderPool } from './provider-pool.js'; +import type { ProviderAdapter } from '../providers/base.js'; + +// Minimal mock adapter +const mockAdapter = { + info: { name: 'mock', supportsStreaming: true }, + validateCredentials: vi.fn(), + streamCompletion: vi.fn(), + listModels: vi.fn(), +} as unknown as ProviderAdapter; + +describe('ProviderPool', () => { + it('acquires and releases leases', async () => { + const pool = new ProviderPool(mockAdapter, 2); + const lease1 = await pool.acquire(); + expect(pool.activeCount).toBe(1); + const lease2 = await pool.acquire(); + expect(pool.activeCount).toBe(2); + lease1.release(); + expect(pool.activeCount).toBe(1); + lease2.release(); + expect(pool.activeCount).toBe(0); + }); + + it('queues when at limit', async () => { + const pool = new ProviderPool(mockAdapter, 1); + const lease1 = await pool.acquire(); + expect(pool.activeCount).toBe(1); + + // This should queue + let lease2Resolved = false; + const lease2Promise = pool.acquire().then(l => { lease2Resolved = true; return l; }); + + // Give microtask queue a tick + await new Promise(r => setTimeout(r, 10)); + expect(lease2Resolved).toBe(false); + expect(pool.queuedCount).toBe(1); + + // Release lease1 should resolve lease2 + lease1.release(); + const lease2 = await lease2Promise; + expect(lease2Resolved).toBe(true); + expect(pool.activeCount).toBe(1); + lease2.release(); + }); + + it('provides adapter through lease', async () => { + const pool = new ProviderPool(mockAdapter, 1); + const lease = await pool.acquire(); + expect(lease.adapter).toBe(mockAdapter); + lease.release(); + }); + + it('tracks token usage', () => { + const pool = new ProviderPool(mockAdapter, 1); + pool.addTokenUsage({ promptTokens: 100, completionTokens: 50, totalTokens: 150 }); + pool.addTokenUsage({ promptTokens: 200, completionTokens: 100, totalTokens: 300 }); + const total = pool.getTotalTokens(); + expect(total.promptTokens).toBe(300); + expect(total.completionTokens).toBe(150); + expect(total.totalTokens).toBe(450); + }); + + it('pauses for rate limit', async () => { + const pool = new ProviderPool(mockAdapter, 1); + pool.pauseForRateLimit(50); // 50ms pause + const start = Date.now(); + const lease = await pool.acquire(); + const elapsed = Date.now() - start; + expect(elapsed).toBeGreaterThanOrEqual(40); // Allow some timing slack + lease.release(); + }); +}); diff --git a/src/agent/provider-pool.ts b/src/agent/provider-pool.ts new file mode 100644 index 0000000..b54cd32 --- /dev/null +++ b/src/agent/provider-pool.ts @@ -0,0 +1,74 @@ +import type { ProviderAdapter, TokenUsage } from '../providers/base.js'; + +export interface ProviderLease { + adapter: ProviderAdapter; + /** Call when done with this lease to release the semaphore slot */ + release(): void; +} + +export class ProviderPool { + private adapter: ProviderAdapter; + private limit: number; + private active = 0; + private queue: Array<(lease: ProviderLease) => void> = []; + private totalTokens: TokenUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 }; + private rateLimitPauseUntil = 0; + + constructor(adapter: ProviderAdapter, concurrencyLimit: number = 3) { + this.adapter = adapter; + this.limit = concurrencyLimit; + } + + /** Acquire a lease. Blocks (via promise) if at concurrency limit or rate-limited. */ + async acquire(): Promise { + // If rate limited, wait until cooldown expires + const now = Date.now(); + if (this.rateLimitPauseUntil > now) { + await new Promise(resolve => setTimeout(resolve, this.rateLimitPauseUntil - now)); + } + + if (this.active < this.limit) { + this.active++; + return this.createLease(); + } + + // At limit — queue the request + return new Promise(resolve => { + this.queue.push(resolve); + }); + } + + /** Pause all acquires for rate limiting */ + pauseForRateLimit(retryAfterMs: number): void { + this.rateLimitPauseUntil = Date.now() + retryAfterMs; + } + + /** Add to aggregate token usage */ + addTokenUsage(usage: TokenUsage): void { + this.totalTokens.promptTokens += usage.promptTokens; + this.totalTokens.completionTokens += usage.completionTokens; + this.totalTokens.totalTokens += usage.totalTokens; + } + + /** Get aggregate token usage across all leases */ + getTotalTokens(): TokenUsage { + return { ...this.totalTokens }; + } + + get activeCount(): number { return this.active; } + get queuedCount(): number { return this.queue.length; } + + private createLease(): ProviderLease { + return { + adapter: this.adapter, + release: () => { + this.active--; + const next = this.queue.shift(); + if (next) { + this.active++; + next(this.createLease()); + } + }, + }; + } +} diff --git a/src/agent/sandbox.test.ts b/src/agent/sandbox.test.ts new file mode 100644 index 0000000..72199c3 --- /dev/null +++ b/src/agent/sandbox.test.ts @@ -0,0 +1,119 @@ +import { describe, it, expect } from 'vitest'; +import { detectSandboxStrategy, buildSandboxArgs, executeSandboxed } from './sandbox.js'; +import type { SandboxConfig } from './types.js'; + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +const defaultConfig: SandboxConfig = { + filesystem: 'workspace-only', + network: 'allowed', + timeout: 60_000, +}; + +const workspaceRoot = '/tmp/test-workspace'; + +// ── detectSandboxStrategy ───────────────────────────────────────────────────── + +describe('detectSandboxStrategy', () => { + it('returns sandbox-exec on darwin', () => { + expect(detectSandboxStrategy('darwin')).toBe('sandbox-exec'); + }); + + it('returns permissions-only on win32', () => { + expect(detectSandboxStrategy('win32')).toBe('permissions-only'); + }); + + it('returns a valid strategy on linux (one of firejail/unshare/permissions-only)', () => { + const result = detectSandboxStrategy('linux'); + const valid: string[] = ['firejail', 'unshare', 'permissions-only']; + expect(valid).toContain(result); + }); +}); + +// ── buildSandboxArgs ────────────────────────────────────────────────────────── + +describe('buildSandboxArgs', () => { + it('wraps with sandbox-exec on darwin (command is sandbox-exec, args contain -p)', () => { + const result = buildSandboxArgs('echo hello', workspaceRoot, defaultConfig, 'sandbox-exec'); + expect(result.command).toBe('sandbox-exec'); + expect(result.args).toContain('-p'); + }); + + it('wraps with firejail (command is firejail, args contain --whitelist=)', () => { + const result = buildSandboxArgs('echo hello', workspaceRoot, defaultConfig, 'firejail'); + expect(result.command).toBe('firejail'); + expect(result.args.some((a) => a.startsWith('--whitelist='))).toBe(true); + }); + + it('wraps with unshare (command is unshare, args contain -r and -m)', () => { + const result = buildSandboxArgs('echo hello', workspaceRoot, defaultConfig, 'unshare'); + expect(result.command).toBe('unshare'); + expect(result.args).toContain('-r'); + expect(result.args).toContain('-m'); + }); + + it('returns passthrough for permissions-only (splits command into command + args)', () => { + const result = buildSandboxArgs('echo hello world', workspaceRoot, defaultConfig, 'permissions-only'); + expect(result.command).toBe('echo'); + expect(result.args).toEqual(['hello world']); + }); + + it('returns passthrough for permissions-only with no-arg command', () => { + const result = buildSandboxArgs('pwd', workspaceRoot, defaultConfig, 'permissions-only'); + expect(result.command).toBe('pwd'); + expect(result.args).toEqual([]); + }); + + it('includes network deny in sandbox-exec profile when network is blocked', () => { + const blockedConfig: SandboxConfig = { ...defaultConfig, network: 'blocked' }; + const result = buildSandboxArgs('echo hello', workspaceRoot, blockedConfig, 'sandbox-exec'); + // The profile is the second argument (after '-p') + const profileIdx = result.args.indexOf('-p'); + expect(profileIdx).toBeGreaterThanOrEqual(0); + const profile = result.args[profileIdx + 1]; + expect(profile).toContain('(deny network*)'); + }); + + it('does not include network deny in sandbox-exec profile when network is allowed', () => { + const result = buildSandboxArgs('echo hello', workspaceRoot, defaultConfig, 'sandbox-exec'); + const profileIdx = result.args.indexOf('-p'); + const profile = result.args[profileIdx + 1]; + expect(profile).not.toContain('(deny network*)'); + }); + + it('sandbox-exec profile restricts writes to workspaceRoot and /tmp', () => { + const result = buildSandboxArgs('echo hello', workspaceRoot, defaultConfig, 'sandbox-exec'); + const profileIdx = result.args.indexOf('-p'); + const profile = result.args[profileIdx + 1]; + expect(profile).toContain(workspaceRoot); + expect(profile).toContain('/tmp'); + expect(profile).toContain('deny file-write*'); + }); +}); + +// ── executeSandboxed ────────────────────────────────────────────────────────── + +describe('executeSandboxed', () => { + it('executes a simple command (echo hello) and returns stdout', async () => { + const result = await executeSandboxed('echo hello', workspaceRoot, defaultConfig); + expect(result.stdout.trim()).toBe('hello'); + expect(result.exitCode).toBe(0); + }); + + it('returns non-zero exitCode for failing command', async () => { + // `exit 1` must be run through a shell; on permissions-only that means + // we pass the whole string as the command to bash. + const result = await executeSandboxed('/bin/bash -c "exit 1"', workspaceRoot, defaultConfig); + expect(result.exitCode).not.toBe(0); + }); + + it('respects timeout (sleep with short timeout returns exitCode -1)', async () => { + const result = await executeSandboxed( + 'sleep 10', + workspaceRoot, + defaultConfig, + { timeout: 100 }, + ); + expect(result.exitCode).toBe(-1); + }, 5_000); +}); diff --git a/src/agent/sandbox.ts b/src/agent/sandbox.ts new file mode 100644 index 0000000..ef8a68a --- /dev/null +++ b/src/agent/sandbox.ts @@ -0,0 +1,162 @@ +import { execFile, spawnSync } from 'child_process'; +import type { SandboxConfig } from './types.js'; + +// ── Types ───────────────────────────────────────────────────────────────────── + +export type SandboxStrategy = 'sandbox-exec' | 'unshare' | 'firejail' | 'permissions-only'; + +// ── Strategy Detection ──────────────────────────────────────────────────────── + +/** + * Detect the best available sandbox strategy for the current (or given) platform. + * On darwin → sandbox-exec (deprecated but functional on macOS 10.15+). + * On linux → firejail if available, else unshare if available, else permissions-only. + * Elsewhere → permissions-only. + */ +export function detectSandboxStrategy(platform?: string): SandboxStrategy { + const os = platform ?? process.platform; + + if (os === 'darwin') { + return 'sandbox-exec'; + } + + if (os === 'linux') { + const firejailCheck = spawnSync('which', ['firejail'], { encoding: 'utf8' }); + if (firejailCheck.status === 0 && firejailCheck.stdout.trim().length > 0) { + return 'firejail'; + } + + const unshareCheck = spawnSync('which', ['unshare'], { encoding: 'utf8' }); + if (unshareCheck.status === 0 && unshareCheck.stdout.trim().length > 0) { + return 'unshare'; + } + + return 'permissions-only'; + } + + return 'permissions-only'; +} + +// ── Seatbelt Profile Builder ────────────────────────────────────────────────── + +/** + * Build a macOS sandbox-exec (seatbelt) profile that: + * - allows everything by default + * - denies file writes outside workspaceRoot and /tmp + * - optionally denies all network activity + */ +function buildSeatbeltProfile(workspaceRoot: string, config: SandboxConfig): string { + const lines: string[] = [ + '(version 1)', + '(allow default)', + // Deny writes everywhere except workspaceRoot and /tmp + `(deny file-write*`, + ` (subpath "/")`, + ` (require-not (subpath "${workspaceRoot}"))`, + ` (require-not (subpath "/tmp")))`, + ]; + + if (config.network === 'blocked') { + lines.push('(deny network*)'); + } + + return lines.join('\n'); +} + +// ── Arg Builder ─────────────────────────────────────────────────────────────── + +/** + * Wrap a shell command string for sandboxed execution according to the chosen + * strategy. Returns `{ command, args }` suitable for passing to execFile. + */ +export function buildSandboxArgs( + command: string, + workspaceRoot: string, + config: SandboxConfig, + strategy: SandboxStrategy, +): { command: string; args: string[] } { + switch (strategy) { + case 'sandbox-exec': { + const profile = buildSeatbeltProfile(workspaceRoot, config); + return { + command: 'sandbox-exec', + args: ['-p', profile, '/bin/bash', '-c', command], + }; + } + + case 'firejail': { + return { + command: 'firejail', + args: ['--noprofile', `--whitelist=${workspaceRoot}`, '--', '/bin/bash', '-c', command], + }; + } + + case 'unshare': { + return { + command: 'unshare', + args: ['-r', '-m', '/bin/bash', '-c', command], + }; + } + + case 'permissions-only': { + // Split on first space: first word is the executable, the rest are args. + const spaceIdx = command.indexOf(' '); + if (spaceIdx === -1) { + return { command, args: [] }; + } + const exe = command.slice(0, spaceIdx); + const rest = command.slice(spaceIdx + 1); + return { command: exe, args: [rest] }; + } + } +} + +// ── Executor ────────────────────────────────────────────────────────────────── + +/** + * Execute a shell command inside the appropriate sandbox. + * Returns stdout, stderr, and the process exit code. + * On timeout, kills the child and returns exitCode -1. + */ +export async function executeSandboxed( + command: string, + workspaceRoot: string, + config: SandboxConfig, + options?: { timeout?: number }, +): Promise<{ stdout: string; stderr: string; exitCode: number }> { + const strategy = detectSandboxStrategy(); + const { command: exe, args } = buildSandboxArgs(command, workspaceRoot, config, strategy); + const timeoutMs = options?.timeout ?? config.timeout ?? 60_000; + + return new Promise((resolve) => { + let settled = false; + + const child = execFile(exe, args, (error, stdout, stderr) => { + if (settled) return; + settled = true; + clearTimeout(timer); + + // child.exitCode is set when the callback fires; fall back to parsing + // the error object for non-zero exits. + const exitCode = + child.exitCode !== null && child.exitCode !== undefined + ? child.exitCode + : error + ? 1 + : 0; + + resolve({ + stdout: stdout ?? '', + stderr: stderr ?? '', + exitCode, + }); + }); + + const timer = setTimeout(() => { + if (settled) return; + settled = true; + child.kill('SIGKILL'); + resolve({ stdout: '', stderr: 'Process timed out', exitCode: -1 }); + }, timeoutMs); + }); +} diff --git a/src/agent/types.test.ts b/src/agent/types.test.ts new file mode 100644 index 0000000..045148e --- /dev/null +++ b/src/agent/types.test.ts @@ -0,0 +1,46 @@ +import { describe, it, expect } from 'vitest'; +import { validateDAG, topologicalSort } from './types.js'; + +describe('validateDAG', () => { + it('returns null for valid DAG', () => { + const graph = new Map([['a', []], ['b', ['a']], ['c', ['b']]]); + expect(validateDAG(graph)).toBeNull(); + }); + + it('returns cycle path for cyclic graph', () => { + const graph = new Map([['a', ['c']], ['b', ['a']], ['c', ['b']]]); + expect(validateDAG(graph)).not.toBeNull(); + }); + + it('handles empty graph', () => { + expect(validateDAG(new Map())).toBeNull(); + }); + + it('handles self-loop', () => { + const graph = new Map([['a', ['a']]]); + expect(validateDAG(graph)).not.toBeNull(); + }); +}); + +describe('topologicalSort', () => { + it('sorts linear chain', () => { + const graph = new Map([['a', []], ['b', ['a']], ['c', ['b']]]); + const sorted = topologicalSort(graph); + expect(sorted.indexOf('a')).toBeLessThan(sorted.indexOf('b')); + expect(sorted.indexOf('b')).toBeLessThan(sorted.indexOf('c')); + }); + + it('sorts diamond dependency', () => { + const graph = new Map([['a', []], ['b', ['a']], ['c', ['a']], ['d', ['b', 'c']]]); + const sorted = topologicalSort(graph); + expect(sorted.indexOf('a')).toBeLessThan(sorted.indexOf('b')); + expect(sorted.indexOf('a')).toBeLessThan(sorted.indexOf('c')); + expect(sorted.indexOf('b')).toBeLessThan(sorted.indexOf('d')); + expect(sorted.indexOf('c')).toBeLessThan(sorted.indexOf('d')); + }); + + it('throws on cycle', () => { + const graph = new Map([['a', ['b']], ['b', ['a']]]); + expect(() => topologicalSort(graph)).toThrow('Cycle detected'); + }); +}); diff --git a/src/agent/types.ts b/src/agent/types.ts new file mode 100644 index 0000000..f818e01 --- /dev/null +++ b/src/agent/types.ts @@ -0,0 +1,198 @@ +import type { TokenUsage } from '../providers/base.js'; + +// ── Permission Tiers +export type PermissionTier = 'safe' | 'moderate' | 'dangerous'; +export type AgentMode = 'supervised' | 'auto'; + +// ── Task Planning +export interface TaskPlan { + goal: string; + subtasks: Subtask[]; + dependencyGraph: Map; // subtaskId → [blockedBy] +} + +export interface Subtask { + id: string; + description: string; + files: FileOwnership[]; + estimatedRounds: number; + validationCommand?: string; +} + +export interface FileOwnership { + path: string; + mode: 'create' | 'modify' | 'read-only'; +} + +// ── Worker +export interface WorkerOptions { + subtask: Subtask; + context: SubtaskContext; + signal: AbortSignal; +} + +export interface SubtaskContext { + priorSummary: string; + filesAvailable: string[]; + planReminder: string; +} + +export interface WorkerResult { + subtaskId: string; + status: 'completed' | 'failed' | 'blocked' | 'cancelled'; + filesChanged: FileChange[]; + summary: string; + tokensUsed: TokenUsage; + error?: string; +} + +export interface FileChange { + path: string; + action: 'created' | 'modified' | 'deleted'; + diff: string; +} + +// ── File Lock +export type FileLockRequest = { + workerId: string; + path: string; + reason: string; +}; + +export type FileLockResponse = { + granted: boolean; + waitForWorker?: string; +}; + +// ── Sandbox +export interface SandboxConfig { + filesystem: 'workspace-only' | 'unrestricted'; + network: 'blocked' | 'allowed'; + timeout: number; +} + +// ── Token Budget +export interface TokenBudget { + maxPerWorker: number; + maxTotal: number; + spent: number; + remaining: number; +} + +// ── Workspace Profile +export interface WorkspaceProfile { + language: string; + framework?: string; + monorepo: boolean; + srcLayout: string; + entryPoints: string[]; + codeStyle: { + indent: 'tabs' | 'spaces'; + indentSize: number; + quotes: 'single' | 'double'; + semicolons: boolean; + trailingCommas: boolean; + namingConvention: 'camelCase' | 'snake_case' | 'PascalCase'; + }; + fileNaming: string; + exportStyle: 'named' | 'default' | 'barrel'; + importStyle: 'relative' | 'alias'; + errorHandling: string; + logging: string; + configPattern: string; + testFramework: string; + testLocation: string; + testNaming: string; + testStyle: string; + coverageThreshold?: number; + testCommand: string; + commitConvention: string; + branchPattern: string; + packageManager: string; + linter?: string; + formatter?: string; + typeChecker?: string; + buildTool?: string; +} + +// ── Multimodal (AgentMessage type — internal to agent module only) +export interface ContentPart { + type: 'text' | 'image'; + text?: string; + image?: { + data: string; + mediaType: 'image/png' | 'image/jpeg' | 'image/gif' | 'image/webp'; + }; +} + +export type MessageContent = string | ContentPart[]; + +export interface AgentMessage { + role: 'system' | 'user' | 'assistant'; + content: MessageContent; +} + +// ── Helpers + +// dependencyGraph semantics: subtaskId → list of subtask IDs that must +// complete BEFORE this one can start (i.e., prerequisites, not dependents). + +/** Validate a dependency graph is a DAG (no cycles). Returns null if valid, or the cycle path if invalid. */ +export function validateDAG(graph: Map): string[] | null { + const visited = new Set(); + const stack = new Set(); + const parent = new Map(); + + function dfs(node: string): string | null { + if (stack.has(node)) return node; + if (visited.has(node)) return null; + + visited.add(node); + stack.add(node); + + for (const dep of graph.get(node) ?? []) { + parent.set(dep, node); + const cycleNode = dfs(dep); + if (cycleNode !== null) return cycleNode; + } + + stack.delete(node); + return null; + } + + for (const node of graph.keys()) { + if (!visited.has(node)) { + const cycleNode = dfs(node); + if (cycleNode !== null) { + const cyclePath = [cycleNode]; + for (const n of [...stack].reverse()) { + cyclePath.push(n); + if (n === cycleNode) break; + } + return cyclePath.reverse(); + } + } + } + return null; +} + +/** Topological sort of subtask IDs. Throws if graph has cycles. */ +export function topologicalSort(graph: Map): string[] { + const cycle = validateDAG(graph); + if (cycle) throw new Error(`Cycle detected: ${cycle.join(' → ')}`); + + const sorted: string[] = []; + const visited = new Set(); + + function visit(node: string): void { + if (visited.has(node)) return; + visited.add(node); + for (const dep of graph.get(node) ?? []) { + visit(dep); + } + sorted.push(node); + } + + for (const node of graph.keys()) visit(node); + return sorted; +} diff --git a/src/agent/worker.test.ts b/src/agent/worker.test.ts new file mode 100644 index 0000000..f41001e --- /dev/null +++ b/src/agent/worker.test.ts @@ -0,0 +1,233 @@ +import { describe, it, expect, vi } from 'vitest'; +import { executeWorker } from './worker.js'; +import type { WorkspaceProfile, Subtask, SubtaskContext } from './types.js'; +import type { ProviderAdapter, ChatWithToolsResponse } from '../providers/base.js'; + +const mockProfile: WorkspaceProfile = { + language: 'typescript', monorepo: false, srcLayout: 'src/', + entryPoints: ['src/index.ts'], codeStyle: { + indent: 'spaces', indentSize: 2, quotes: 'single', + semicolons: true, trailingCommas: true, namingConvention: 'camelCase', + }, + fileNaming: 'kebab-case.ts', exportStyle: 'barrel', importStyle: 'relative', + errorHandling: 'JamError', logging: 'Logger', configPattern: 'cosmiconfig', + testFramework: 'vitest', testLocation: 'co-located', testNaming: '*.test.ts', + testStyle: 'describe/it', testCommand: 'npm test', commitConvention: 'conventional', + branchPattern: 'feat/*', packageManager: 'npm', typeChecker: 'tsc', +}; + +const subtask: Subtask = { + id: '1', + description: 'Create hello.ts', + files: [{ path: 'src/hello.ts', mode: 'create' }], + estimatedRounds: 5, +}; + +const context: SubtaskContext = { + priorSummary: '', + filesAvailable: [], + planReminder: '', +}; + +function makeMockAdapter(responses: Array>) { + let callIndex = 0; + return { + info: { name: 'mock', supportsStreaming: true, supportsTools: true }, + validateCredentials: vi.fn(), + streamCompletion: vi.fn(), + listModels: vi.fn(), + chatWithTools: vi.fn().mockImplementation(() => { + const resp = responses[callIndex] ?? { content: 'Done', toolCalls: [] }; + callIndex++; + return Promise.resolve({ ...resp, usage: { promptTokens: 100, completionTokens: 50, totalTokens: 150 } }); + }), + } as unknown as ProviderAdapter; +} + +describe('executeWorker', () => { + it('completes a subtask with tool calls', async () => { + const adapter = makeMockAdapter([ + { content: 'Writing file', toolCalls: [{ name: 'write_file', arguments: { path: 'src/hello.ts', content: 'export const hello = 1;' } }] }, + { content: 'Done creating the file', toolCalls: [] }, + ]); + const executeTool = vi.fn().mockResolvedValue('File written'); + const result = await executeWorker(subtask, context, AbortSignal.timeout(5000), { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool, + }); + expect(result.status).toBe('completed'); + expect(result.subtaskId).toBe('1'); + expect(result.tokensUsed.totalTokens).toBeGreaterThan(0); + expect(executeTool).toHaveBeenCalledWith('write_file', expect.objectContaining({ path: 'src/hello.ts' })); + }); + + it('returns cancelled on abort signal', async () => { + const controller = new AbortController(); + controller.abort(); // abort immediately + const adapter = makeMockAdapter([]); + const result = await executeWorker(subtask, context, controller.signal, { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool: vi.fn(), + }); + expect(result.status).toBe('cancelled'); + }); + + it('fails when exceeding round budget', async () => { + // Always return tool calls — never completes + const adapter = makeMockAdapter( + Array>(15).fill({ content: 'Reading', toolCalls: [{ name: 'read_file', arguments: { path: 'src/a.ts' } }] }), + ); + const subtaskShort = { ...subtask, estimatedRounds: 3 }; + const result = await executeWorker(subtaskShort, context, AbortSignal.timeout(5000), { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool: vi.fn().mockResolvedValue('file content'), + }); + expect(result.status).toBe('failed'); + expect(result.error).toContain('TIMEOUT'); + }); + + it('handles tool execution errors gracefully', async () => { + const adapter = makeMockAdapter([ + { content: 'Trying', toolCalls: [{ name: 'read_file', arguments: { path: 'bad.ts' } }] }, + { content: 'Done', toolCalls: [] }, + ]); + const executeTool = vi.fn().mockRejectedValueOnce(new Error('file not found')); + const result = await executeWorker(subtask, context, AbortSignal.timeout(5000), { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool, + }); + expect(result.status).toBe('completed'); + }); + + it('fails when provider lacks tool support', async () => { + const adapter = { + info: { name: 'mock', supportsStreaming: true }, + validateCredentials: vi.fn(), + streamCompletion: vi.fn(), + listModels: vi.fn(), + // NO chatWithTools + } as unknown as ProviderAdapter; + const result = await executeWorker(subtask, context, AbortSignal.timeout(5000), { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool: vi.fn(), + }); + expect(result.status).toBe('failed'); + }); + + it('tracks file changes from write_file calls', async () => { + const adapter = makeMockAdapter([ + { content: 'Writing', toolCalls: [{ name: 'write_file', arguments: { path: 'src/a.ts', content: 'code' } }] }, + { content: 'Writing more', toolCalls: [{ name: 'write_file', arguments: { path: 'src/b.ts', content: 'code' } }] }, + { content: 'Done', toolCalls: [] }, + ]); + const result = await executeWorker(subtask, context, AbortSignal.timeout(5000), { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool: vi.fn().mockResolvedValue('ok'), + }); + expect(result.filesChanged).toHaveLength(2); + expect(result.filesChanged[0].path).toBe('src/a.ts'); + expect(result.filesChanged[1].path).toBe('src/b.ts'); + }); + + it('runs validation command on completion', async () => { + const subtaskWithValidation: Subtask = { + ...subtask, + validationCommand: 'npm test', + }; + const adapter = makeMockAdapter([ + { content: 'Done', toolCalls: [] }, + ]); + const executeTool = vi.fn().mockResolvedValue('ok'); + const result = await executeWorker(subtaskWithValidation, context, AbortSignal.timeout(5000), { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool, + }); + expect(result.status).toBe('completed'); + expect(executeTool).toHaveBeenCalledWith('run_command', { command: 'npm test' }); + }); + + it('completes even when validation command fails', async () => { + const subtaskWithValidation: Subtask = { + ...subtask, + validationCommand: 'npm test', + }; + const adapter = makeMockAdapter([ + { content: 'Done', toolCalls: [] }, + ]); + const executeTool = vi.fn().mockRejectedValue(new Error('tests failed')); + const result = await executeWorker(subtaskWithValidation, context, AbortSignal.timeout(5000), { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool, + }); + expect(result.status).toBe('completed'); + }); + + it('includes prior context in initial prompt', async () => { + const contextWithPrior: SubtaskContext = { + priorSummary: 'Created the model file', + filesAvailable: ['src/model.ts'], + planReminder: 'Remember to use barrel exports', + }; + const adapter = makeMockAdapter([ + { content: 'Done', toolCalls: [] }, + ]); + await executeWorker(subtask, contextWithPrior, AbortSignal.timeout(5000), { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool: vi.fn(), + }); + // Verify the initial message included context + // eslint-disable-next-line @typescript-eslint/unbound-method + const mockChatWithTools = adapter.chatWithTools as unknown as { mock: { calls: Array>> } }; + const firstCall = mockChatWithTools.mock.calls[0]; + const messages = firstCall[0]; + expect(messages[0].content).toContain('Created the model file'); + expect(messages[0].content).toContain('src/model.ts'); + expect(messages[0].content).toContain('barrel exports'); + }); + + it('accumulates token usage across rounds', async () => { + const adapter = makeMockAdapter([ + { content: 'Reading', toolCalls: [{ name: 'read_file', arguments: { path: 'src/a.ts' } }] }, + { content: 'Writing', toolCalls: [{ name: 'write_file', arguments: { path: 'src/b.ts', content: 'x' } }] }, + { content: 'Done', toolCalls: [] }, + ]); + const result = await executeWorker(subtask, context, AbortSignal.timeout(5000), { + lease: { adapter, release: vi.fn() }, + workspaceRoot: '/workspace', + workspaceProfile: mockProfile, + toolSchemas: [], + executeTool: vi.fn().mockResolvedValue('ok'), + }); + // 3 rounds * 150 tokens each = 450 + expect(result.tokensUsed.totalTokens).toBe(450); + expect(result.tokensUsed.promptTokens).toBe(300); + expect(result.tokensUsed.completionTokens).toBe(150); + }); +}); diff --git a/src/agent/worker.ts b/src/agent/worker.ts new file mode 100644 index 0000000..b755330 --- /dev/null +++ b/src/agent/worker.ts @@ -0,0 +1,177 @@ +import type { Message, TokenUsage, ToolDefinition } from '../providers/base.js'; +import type { Subtask, SubtaskContext, WorkerResult, FileChange, WorkspaceProfile } from './types.js'; +import type { ProviderLease } from './provider-pool.js'; +import { WorkingMemory } from '../utils/memory.js'; +import { formatProfileForPrompt } from './workspace-intel.js'; + +// ── Dependencies ───────────────────────────────────────────────────────────── + +export interface WorkerDeps { + lease: ProviderLease; // from ProviderPool.acquire() + workspaceRoot: string; + workspaceProfile: WorkspaceProfile; + toolSchemas: ToolDefinition[]; // tool definitions for chatWithTools + executeTool: (name: string, args: Record) => Promise; +} + +// ── Worker execution loop ──────────────────────────────────────────────────── + +export async function executeWorker( + subtask: Subtask, + context: SubtaskContext, + signal: AbortSignal, + deps: WorkerDeps, +): Promise { + const { lease, workspaceProfile, toolSchemas, executeTool } = deps; + const adapter = lease.adapter; + const maxRounds = subtask.estimatedRounds + 5; // budget + 5 bonus + const memory = new WorkingMemory(adapter, undefined, undefined); + + const totalUsage: TokenUsage = { promptTokens: 0, completionTokens: 0, totalTokens: 0 }; + const filesChanged: FileChange[] = []; + + // Build system prompt with workspace profile + subtask context + const systemPrompt = buildWorkerSystemPrompt(workspaceProfile, subtask, context); + + // Initial messages + const messages: Message[] = [ + { role: 'user', content: buildInitialPrompt(subtask, context) }, + ]; + + for (let round = 0; round < maxRounds; round++) { + // Check cancellation + if (signal.aborted) { + return { + subtaskId: subtask.id, + status: 'cancelled', + filesChanged, + summary: `Cancelled after ${round} rounds`, + tokensUsed: totalUsage, + }; + } + + // Context compaction check + if (memory.shouldCompact(messages)) { + messages.splice(0, messages.length, ...(await memory.compact(messages))); + } + + // Scratchpad checkpoint + if (memory.shouldScratchpad(round) && round > 0) { + messages.push(memory.scratchpadPrompt()); + } + + // Call provider + if (!adapter.chatWithTools) { + return { + subtaskId: subtask.id, + status: 'failed', + filesChanged, + summary: 'Provider does not support tool calling', + tokensUsed: totalUsage, + error: 'Provider does not support chatWithTools', + }; + } + + const response = await adapter.chatWithTools(messages, toolSchemas, { + systemPrompt, + temperature: 0.2, + }); + + // Track token usage + if (response.usage) { + totalUsage.promptTokens += response.usage.promptTokens; + totalUsage.completionTokens += response.usage.completionTokens; + totalUsage.totalTokens += response.usage.totalTokens; + } + + // No tool calls → worker is done + if (!response.toolCalls || response.toolCalls.length === 0) { + // Run validation command if provided + if (subtask.validationCommand) { + try { + await executeTool('run_command', { command: subtask.validationCommand }); + } catch { + // Validation failed — but worker completed its work + } + } + + return { + subtaskId: subtask.id, + status: 'completed', + filesChanged, + summary: response.content ?? `Completed in ${round + 1} rounds`, + tokensUsed: totalUsage, + }; + } + + // Execute tool calls + messages.push({ role: 'assistant', content: response.content ?? '' }); + + for (const tc of response.toolCalls) { + let output: string; + try { + output = await executeTool(tc.name, tc.arguments); + + // Track file changes for write tools + if (tc.name === 'write_file' && tc.arguments.path) { + filesChanged.push({ + path: String(tc.arguments.path), + action: 'created', // simplified — could check if file existed + diff: '', + }); + } + } catch (err) { + output = `Error: ${err instanceof Error ? err.message : String(err)}`; + } + + const capped = memory.processToolResult(tc.name, tc.arguments, output); + messages.push({ role: 'user', content: `[Tool result: ${tc.name}]\n${capped}` }); + } + + // Stuck detection: at half budget with no file changes + if (round === Math.floor(subtask.estimatedRounds * 0.5) && filesChanged.length === 0) { + messages.push({ + role: 'user', + content: '[HINT: You are halfway through your budget and have not made any file changes. Focus on the task and use write_file to make progress.]', + }); + } + } + + // Exceeded round budget + return { + subtaskId: subtask.id, + status: 'failed', + filesChanged, + summary: `Exceeded round budget (${maxRounds} rounds)`, + tokensUsed: totalUsage, + error: 'AGENT_WORKER_TIMEOUT', + }; +} + +// ── Prompt builders ────────────────────────────────────────────────────────── + +function buildWorkerSystemPrompt( + profile: WorkspaceProfile, + subtask: Subtask, + _context: SubtaskContext, +): string { + return [ + 'You are an AI coding agent executing a specific subtask.', + 'You MUST use tools to read and write files. Never output code blocks as a substitute for writing files.', + 'After writing a file, read it back to verify.', + '', + formatProfileForPrompt(profile), + '', + `Your task: ${subtask.description}`, + subtask.validationCommand ? `Validation: run \`${subtask.validationCommand}\` when done` : '', + `Files you may touch: ${subtask.files.map(f => `${f.path} (${f.mode})`).join(', ') || 'any'}`, + ].filter(Boolean).join('\n'); +} + +function buildInitialPrompt(subtask: Subtask, context: SubtaskContext): string { + const parts = [`Task: ${subtask.description}`]; + if (context.priorSummary) parts.push(`\nPrior context: ${context.priorSummary}`); + if (context.filesAvailable.length > 0) parts.push(`\nAvailable files from prior work: ${context.filesAvailable.join(', ')}`); + if (context.planReminder) parts.push(`\n${context.planReminder}`); + return parts.join('\n'); +} diff --git a/src/agent/workspace-intel.test.ts b/src/agent/workspace-intel.test.ts new file mode 100644 index 0000000..53f6eae --- /dev/null +++ b/src/agent/workspace-intel.test.ts @@ -0,0 +1,48 @@ +import { describe, it, expect } from 'vitest'; +import { buildWorkspaceProfile, formatProfileForPrompt, computeProfileHash } from './workspace-intel.js'; +import { resolve } from 'node:path'; + +const ROOT = resolve(import.meta.dirname, '../..'); + +describe('buildWorkspaceProfile', () => { + it('builds profile for jam-cli project', async () => { + const profile = await buildWorkspaceProfile(ROOT); + expect(profile.language).toBe('typescript'); + expect(profile.packageManager).toBe('npm'); + expect(profile.testFramework).toBe('vitest'); + expect(profile.codeStyle.quotes).toBe('single'); + }); + + it('detects structural fields', async () => { + const profile = await buildWorkspaceProfile(ROOT); + expect(profile.srcLayout).toBeTruthy(); + expect(profile.entryPoints.length).toBeGreaterThan(0); + expect(typeof profile.monorepo).toBe('boolean'); + }); + + it('returns cached profile on second call', async () => { + const p1 = await buildWorkspaceProfile(ROOT); + const p2 = await buildWorkspaceProfile(ROOT); + expect(p1.language).toBe(p2.language); + expect(p1.testFramework).toBe(p2.testFramework); + }); +}); + +describe('computeProfileHash', () => { + it('returns consistent hash for same project', async () => { + const h1 = await computeProfileHash(ROOT); + const h2 = await computeProfileHash(ROOT); + expect(h1).toBe(h2); + expect(h1.length).toBeGreaterThan(0); + }); +}); + +describe('formatProfileForPrompt', () => { + it('formats profile as readable string', async () => { + const profile = await buildWorkspaceProfile(ROOT); + const prompt = formatProfileForPrompt(profile); + expect(prompt).toContain('TypeScript'); + expect(prompt).toContain('vitest'); + expect(prompt).toContain('single quotes'); + }); +}); diff --git a/src/agent/workspace-intel.ts b/src/agent/workspace-intel.ts new file mode 100644 index 0000000..db4643b --- /dev/null +++ b/src/agent/workspace-intel.ts @@ -0,0 +1,323 @@ +// src/agent/workspace-intel.ts + +import { readFile, writeFile, mkdir, readdir, stat } from 'node:fs/promises'; +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { createHash } from 'node:crypto'; +import type { WorkspaceProfile } from './types.js'; +import { analyzeConventions } from '../intel/conventions.js'; +import type { ProviderAdapter } from '../providers/base.js'; +import type { SerializedGraph } from '../intel/types.js'; + +// ── Helpers ─────────────────────────────────────────────────────────────────── + +async function readTextFile(path: string): Promise { + try { + return await readFile(path, 'utf-8'); + } catch { + return null; + } +} + +async function isDir(path: string): Promise { + try { + return (await stat(path)).isDirectory(); + } catch { + return false; + } +} + +async function listDir(dir: string): Promise { + try { + return await readdir(dir); + } catch { + return []; + } +} + +// ── Hash ────────────────────────────────────────────────────────────────────── + +/** + * Compute a deterministic SHA-256 hash over: + * - package.json / pyproject.toml / Cargo.toml content + * - sorted list of filenames in src/ + * - config files: .eslintrc*, .prettierrc*, tsconfig.json, biome.json + */ +export async function computeProfileHash(root: string): Promise { + const hash = createHash('sha256'); + + // Manifest files + for (const name of ['package.json', 'pyproject.toml', 'Cargo.toml']) { + const content = await readTextFile(join(root, name)); + if (content !== null) { + hash.update(`${name}:${content}`); + } + } + + // Sorted list of file names in src/ (names only, not content) + const srcEntries = await listDir(join(root, 'src')); + hash.update(`src-files:${srcEntries.sort().join(',')}`); + + // Config files + const configGlobs = [ + '.eslintrc', '.eslintrc.js', '.eslintrc.cjs', '.eslintrc.json', '.eslintrc.yml', '.eslintrc.yaml', + '.prettierrc', '.prettierrc.js', '.prettierrc.cjs', '.prettierrc.json', '.prettierrc.yml', + 'tsconfig.json', 'biome.json', + ]; + for (const name of configGlobs) { + const content = await readTextFile(join(root, name)); + if (content !== null) { + hash.update(`${name}:${content}`); + } + } + + return hash.digest('hex'); +} + +// ── Cache ───────────────────────────────────────────────────────────────────── + +type CachedProfile = WorkspaceProfile & { _hash: string }; + +function cacheFilePath(root: string): string { + return join(root, '.jam', 'workspace-profile.json'); +} + +/** + * Read `.jam/workspace-profile.json` and return the profile (with its stored + * `_hash` field intact). Returns null if the file doesn't exist or is invalid. + * + * Hash validation is left to the caller (`buildWorkspaceProfile`) because + * computing the hash is async — this function just does the sync disk read. + */ +export function loadCachedProfile(root: string): WorkspaceProfile | null { + try { + const raw = readFileSync(cacheFilePath(root), 'utf-8'); + return JSON.parse(raw) as CachedProfile; + } catch { + return null; + } +} + +async function saveProfileCache(root: string, profile: WorkspaceProfile, hash: string): Promise { + try { + const dir = join(root, '.jam'); + await mkdir(dir, { recursive: true }); + const data: CachedProfile = { ...profile, _hash: hash }; + await writeFile(cacheFilePath(root), JSON.stringify(data, null, 2), 'utf-8'); + } catch { + // Cache write failures are non-fatal + } +} + +// ── Structural detection (static fallback) ──────────────────────────────────── + +async function detectFramework(root: string): Promise { + const pkgText = await readTextFile(join(root, 'package.json')); + if (pkgText) { + let pkg: Record = {}; + try { pkg = JSON.parse(pkgText) as Record; } catch { /* ignore */ } + const deps = { + ...((pkg.dependencies ?? {}) as Record), + ...((pkg.devDependencies ?? {}) as Record), + }; + // JS/TS frameworks + if ('next' in deps) return 'next'; + if ('express' in deps) return 'express'; + if ('react' in deps) return 'react'; + if ('vue' in deps) return 'vue'; + if ('angular' in deps || '@angular/core' in deps) return 'angular'; + } + + // Python + const pyproject = await readTextFile(join(root, 'pyproject.toml')); + if (pyproject) { + if (/fastapi/i.test(pyproject)) return 'fastapi'; + if (/flask/i.test(pyproject)) return 'flask'; + if (/django/i.test(pyproject)) return 'django'; + } + const requirements = await readTextFile(join(root, 'requirements.txt')); + if (requirements) { + if (/fastapi/i.test(requirements)) return 'fastapi'; + if (/flask/i.test(requirements)) return 'flask'; + if (/django/i.test(requirements)) return 'django'; + } + + // Go + const goMod = await readTextFile(join(root, 'go.mod')); + if (goMod && /gin-gonic\/gin/.test(goMod)) return 'gin'; + + // Rust + const cargoToml = await readTextFile(join(root, 'Cargo.toml')); + if (cargoToml && /actix-web/.test(cargoToml)) return 'actix'; + + return undefined; +} + +async function detectEntryPoints(root: string): Promise { + const candidates = [ + 'src/index.ts', 'src/main.ts', 'src/app.ts', + 'main.py', 'main.go', 'src/main.rs', + ]; + const found: string[] = []; + await Promise.all( + candidates.map(async (rel) => { + try { + await stat(join(root, rel)); + found.push(rel); + } catch { /* file doesn't exist */ } + }), + ); + // Preserve original candidate order + return candidates.filter(c => found.includes(c)); +} + +async function detectSrcLayout(root: string): Promise { + const topDirs = await listDir(root); + if (topDirs.includes('src')) return 'src/'; + if (topDirs.includes('lib')) return 'lib/'; + if (topDirs.includes('app')) return 'app/'; + return 'flat'; +} + +async function detectMonorepo(root: string): Promise { + if (await isDir(join(root, 'packages'))) return true; + if (await isDir(join(root, 'apps'))) return true; + + // Check for "workspaces" field in package.json + const pkgText = await readTextFile(join(root, 'package.json')); + if (pkgText) { + try { + const pkg = JSON.parse(pkgText) as Record; + if ('workspaces' in pkg) return true; + } catch { /* ignore */ } + } + return false; +} + +// ── Public API ──────────────────────────────────────────────────────────────── + +/** + * Build a complete WorkspaceProfile for the given root directory. + * + * Layer 1 — conventions (analyzeConventions) + * Layer 2 — structural data from intel graph if available, else static detection + * + * Results are cached in `.jam/workspace-profile.json` keyed by a hash of + * key project files. + */ +export async function buildWorkspaceProfile( + root: string, + _adapter?: ProviderAdapter, +): Promise { + // Compute hash first so we can validate the cache + const hash = await computeProfileHash(root); + + // Check cache + const cached = loadCachedProfile(root); + if (cached) { + const cachedWithHash = cached as CachedProfile; + if (cachedWithHash._hash === hash) { + // Return without the internal _hash field + const { _hash: _discard, ...profile } = cachedWithHash; + return profile as WorkspaceProfile; + } + } + + // Layer 1: conventions + const conventions = await analyzeConventions(root); + + // Layer 2: structural data + let framework: string | undefined; + let entryPoints: string[] = []; + + // Try intel graph first + const graphPath = join(root, '.jam', 'intel', 'graph.json'); + const graphText = await readTextFile(graphPath); + if (graphText) { + try { + const graph = JSON.parse(graphText) as SerializedGraph; + framework = graph.frameworks?.[0]; + // Derive entry points from graph nodes of type 'file' whose paths match + // common entry point names + const candidateNames = new Set(['index.ts', 'main.ts', 'app.ts', 'main.py', 'main.go', 'main.rs']); + entryPoints = (graph.nodes ?? []) + .filter(n => n.filePath && candidateNames.has(n.filePath.split('/').pop() ?? '')) + .map(n => n.filePath!) + .slice(0, 10); + } catch { /* fall through to static */ } + } + + // Static fallbacks when graph data is absent or incomplete + if (!framework) { + framework = await detectFramework(root); + } + if (entryPoints.length === 0) { + entryPoints = await detectEntryPoints(root); + } + + // These are always detected statically (graph doesn't encode them directly) + const srcLayout = await detectSrcLayout(root); + const monorepo = await detectMonorepo(root); + + const profile: WorkspaceProfile = { + ...conventions, + framework, + entryPoints, + srcLayout, + monorepo, + }; + + // Persist cache + await saveProfileCache(root, profile, hash); + + return profile; +} + +// ── Prompt formatter ────────────────────────────────────────────────────────── + +/** + * Format a WorkspaceProfile as a concise human-readable string suitable for + * injection into an agent system prompt. + */ +export function formatProfileForPrompt(profile: WorkspaceProfile): string { + const lang = profile.language === 'typescript' ? 'TypeScript' + : profile.language === 'javascript' ? 'JavaScript' + : profile.language === 'python' ? 'Python' + : profile.language === 'rust' ? 'Rust' + : profile.language === 'go' ? 'Go' + : profile.language; + + const frameworkPart = profile.framework ? `/${profile.framework}` : ''; + const header = `You are working in a ${lang}${frameworkPart} project.`; + + const style = profile.codeStyle; + const indentDesc = style.indent === 'tabs' ? 'tabs' : `${style.indentSize}-space indent`; + const quotesDesc = style.quotes === 'single' ? 'single quotes' : 'double quotes'; + const semiDesc = style.semicolons ? 'semicolons' : 'no semicolons'; + + const lines: string[] = [ + header, + `- Style: ${indentDesc}, ${quotesDesc}, ${semiDesc}, ${style.namingConvention}`, + `- Files: ${profile.fileNaming} with ${profile.exportStyle} exports`, + `- Imports: ${profile.importStyle} paths`, + `- Errors: ${profile.errorHandling} pattern`, + `- Tests: ${profile.testFramework}, ${profile.testLocation} ${profile.testNaming} files, ${profile.testStyle} style`, + `- Run tests: ${profile.testCommand}`, + `- Commits: ${profile.commitConvention}${profile.commitConvention === 'conventional' ? ' (feat:, fix:, chore:)' : ''}`, + ]; + + if (profile.entryPoints.length > 0) { + lines.push(`- Entry points: ${profile.entryPoints.join(', ')}`); + } + if (profile.monorepo) { + lines.push('- Monorepo: yes'); + } + if (profile.linter) { + lines.push(`- Linter: ${profile.linter}`); + } + if (profile.formatter) { + lines.push(`- Formatter: ${profile.formatter}`); + } + + return lines.join('\n'); +} diff --git a/src/commands/go.ts b/src/commands/go.ts index d424144..e00e7d4 100644 --- a/src/commands/go.ts +++ b/src/commands/go.ts @@ -1,69 +1,193 @@ /** - * `jam go` — Claude Code-like interactive agent session. + * `jam go` — interactive agent console. * - * Like `jam chat` but with full write tools (write_file, apply_patch, - * run_command, git operations). Permission prompts before dangerous operations. + * Full-featured agent REPL with orchestrator-backed task execution, + * MCP tool integration, session memory, and slash commands. */ +import * as readline from 'node:readline'; import { loadConfig, getActiveProfile } from '../config/loader.js'; import { createProvider } from '../providers/factory.js'; -import { createSession } from '../storage/history.js'; -import { getWorkspaceRoot } from '../utils/workspace.js'; -import { startChat } from '../ui/chat.js'; +import { Orchestrator } from '../agent/orchestrator.js'; +import { createProgressReporter } from '../agent/progress.js'; +import { ALL_TOOL_SCHEMAS, executeTool as executeBuiltinTool } from '../tools/all-tools.js'; import { createMcpManager } from '../mcp/manager.js'; +import { getWorkspaceRoot } from '../utils/workspace.js'; import { JamError } from '../utils/errors.js'; -import type { Message } from '../providers/base.js'; +import { renderMarkdown, printError } from '../ui/renderer.js'; +import type { CliOverrides } from '../config/schema.js'; -export interface GoCommandOptions { - profile?: string; - provider?: string; - model?: string; - baseUrl?: string; +export interface GoCommandOptions extends CliOverrides { name?: string; + auto?: boolean; + workers?: string; + image?: string[]; + noSandbox?: boolean; } export async function runGo(options: GoCommandOptions): Promise { try { - const config = await loadConfig(process.cwd(), { + const cliOverrides: CliOverrides = { profile: options.profile, provider: options.provider, model: options.model, baseUrl: options.baseUrl, - }); - + }; + const config = await loadConfig(process.cwd(), cliOverrides); const profile = getActiveProfile(config); const adapter = await createProvider(profile); - const workspaceRoot = await getWorkspaceRoot(process.cwd()); - const sessionName = - options.name ?? `Agent ${new Date().toLocaleString('en-US', { hour12: false })}`; - const session = await createSession(sessionName, workspaceRoot); - - let initialMessages: Message[] = []; - if (profile.systemPrompt) { - initialMessages = [{ role: 'system', content: profile.systemPrompt }]; - } + // MCP setup const mcpLog = (msg: string) => process.stderr.write(msg + '\n'); const mcpManager = await createMcpManager(config.mcpServers, mcpLog, config.mcpGroups); + const mcpSchemas = mcpManager.getToolSchemas(); + + // Merge MCP tool schemas with built-in tools + const toolSchemas = mcpSchemas.length > 0 + ? [...ALL_TOOL_SCHEMAS, ...mcpSchemas] + : ALL_TOOL_SCHEMAS; + + // Tool execution bridge: MCP tools routed to mcpManager, built-in tools to executeTool + const executeTool = async (name: string, args: Record): Promise => { + if (mcpManager.isOwnTool(name)) { + return mcpManager.executeTool(name, args); + } + return executeBuiltinTool(name, args, workspaceRoot); + }; + + // Create orchestrator + const orchestrator = new Orchestrator({ + adapter, + workspaceRoot, + toolSchemas, + executeTool, + }); + + const mode = options.auto + ? 'auto' as const + : (config.agent?.defaultMode ?? 'supervised'); + const maxWorkers = options.workers + ? parseInt(options.workers, 10) + : (config.agent?.maxWorkers ?? 3); + + // Print welcome + process.stderr.write('\njam go — interactive agent console\n'); + process.stderr.write(`Provider: ${profile.provider}, Model: ${profile.model ?? 'default'}\n`); + process.stderr.write(`Mode: ${mode} | Workers: ${maxWorkers}\n`); + process.stderr.write('Type a task, or /stop /status /exit\n\n'); + + // Interactive readline loop + const rl = readline.createInterface({ + input: process.stdin, + output: process.stderr, + prompt: 'jam> ', + }); + + let currentAbort: AbortController | null = null; + + rl.prompt(); + + rl.on('line', (line) => { + void (async () => { + const input = line.trim(); + if (!input) { rl.prompt(); return; } + + // Handle slash commands + if (input === '/exit' || input === '/quit') { + await mcpManager.shutdown(); + rl.close(); + return; + } + + if (input === '/stop') { + if (currentAbort) { + currentAbort.abort(); + process.stderr.write('Stopping current task...\n'); + } else { + process.stderr.write('No task running.\n'); + } + rl.prompt(); + return; + } + + if (input === '/status') { + process.stderr.write(`Mode: ${mode} | Workers: ${maxWorkers}\n`); + process.stderr.write(`Provider: ${profile.provider} | Model: ${profile.model ?? 'default'}\n`); + process.stderr.write(`Workspace: ${workspaceRoot}\n`); + rl.prompt(); + return; + } + + if (input === '/help') { + process.stderr.write('Commands:\n'); + process.stderr.write(' /status — show current mode, provider, workspace\n'); + process.stderr.write(' /stop — abort the running task\n'); + process.stderr.write(' /exit — quit the agent console\n'); + process.stderr.write(' /help — show this help\n'); + process.stderr.write('\nAnything else is sent as a task to the orchestrator.\n'); + rl.prompt(); + return; + } + + // Ignore unrecognized slash commands + if (input.startsWith('/')) { + process.stderr.write(`Unknown command: ${input}. Type /help for available commands.\n`); + rl.prompt(); + return; + } + + // Execute task via orchestrator + currentAbort = new AbortController(); + const reporter = createProgressReporter({ quiet: false }); + + try { + const result = await orchestrator.execute(input, { + mode, + maxWorkers, + images: options.image, + signal: currentAbort.signal, + onProgress: (event) => reporter.onEvent(event), + }); + + // Render result + if (result.summary) { + try { + const rendered = await renderMarkdown(result.summary); + process.stdout.write(rendered); + } catch { + process.stdout.write(result.summary + '\n'); + } + } + + if (result.filesChanged.length > 0) { + process.stderr.write(`\nFiles changed: ${result.filesChanged.join(', ')}\n`); + } + } catch (err) { + if ((err as Error).name === 'AbortError') { + process.stderr.write('Task aborted.\n'); + } else { + const jamErr = JamError.fromUnknown(err); + process.stderr.write(`Error: ${jamErr.message}\n`); + if (jamErr.hint) process.stderr.write(`Hint: ${jamErr.hint}\n`); + } + } finally { + currentAbort = null; + } + + process.stderr.write('\n'); + rl.prompt(); + })(); + }); + + rl.on('close', () => { + process.stderr.write('\nBye!\n'); + process.exit(0); + }); - try { - await startChat({ - provider: adapter, - config, - sessionId: session.id, - initialMessages, - mcpManager, - enableWriteTools: true, - toolPolicy: config.toolPolicy, - toolAllowlist: config.toolAllowlist, - }); - } finally { - await mcpManager.shutdown(); - } } catch (err) { const jamErr = JamError.fromUnknown(err); - process.stderr.write(`Error: ${jamErr.message}\n`); + await printError(jamErr.message, jamErr.hint); process.exit(1); } } diff --git a/src/commands/run.ts b/src/commands/run.ts index 971a18b..cd7bb9a 100644 --- a/src/commands/run.ts +++ b/src/commands/run.ts @@ -1,8 +1,9 @@ import { createInterface } from 'node:readline/promises'; +import { readFile } from 'node:fs/promises'; import { join } from 'node:path'; import { loadConfig, getActiveProfile } from '../config/loader.js'; import { createProvider, blockIfNoToolSupport } from '../providers/factory.js'; -import { printError, printWarning, renderMarkdown } from '../ui/renderer.js'; +import { printError, printWarning, printJsonResult, renderMarkdown } from '../ui/renderer.js'; import { JamError } from '../utils/errors.js'; import { getWorkspaceRoot } from '../utils/workspace.js'; import { ALL_TOOL_SCHEMAS, READONLY_TOOL_NAMES, executeTool } from '../tools/all-tools.js'; @@ -43,6 +44,16 @@ export interface RunOptions extends CliOverrides { quiet?: boolean; /** Auto-approve all write tool calls without prompting. */ yes?: boolean; + /** Fully autonomous mode (implies --yes). */ + auto?: boolean; + /** Max parallel workers for orchestrator. */ + workers?: string; + /** Attach image file paths for multimodal input. */ + image?: string[]; + /** Disable OS sandbox. */ + noSandbox?: boolean; + /** Read prompt from a file. */ + file?: string; } async function confirmToolCall( @@ -58,11 +69,126 @@ async function confirmToolCall( } export async function runRun(instruction: string | undefined, options: RunOptions): Promise { + // Support --file: read prompt from file if no inline instruction + if (!instruction && options.file) { + try { + instruction = (await readFile(options.file, 'utf-8')).trim(); + } catch { + await printError(`Cannot read file: ${options.file}`); + process.exit(1); + } + } + if (!instruction) { await printError('Provide an instruction. Usage: jam run ""'); process.exit(1); } + // --auto implies --yes (auto-approve all writes) + if (options.auto) { + options.yes = true; + } + + // Legacy mode: JAM_LEGACY_RUN=1 preserves the old agentic loop + if (process.env['JAM_LEGACY_RUN'] === '1') { + return legacyRun(instruction, options); + } + + // ── New Orchestrator path (default) ────────────────────────────────────── + try { + const { Orchestrator } = await import('../agent/orchestrator.js'); + const { createProgressReporter } = await import('../agent/progress.js'); + + const workspaceRoot = await getWorkspaceRoot(); + const rawConfig = await loadConfig(process.cwd(), options); + const config = options.yes ? { ...rawConfig, toolPolicy: 'always' as const } : rawConfig; + const profile = getActiveProfile(config); + const adapter = await createProvider(profile); + blockIfNoToolSupport(adapter, 'run'); + + const stderrLog = options.quiet ? (_msg: string) => {} : (msg: string) => process.stderr.write(msg); + + // Connect to MCP servers (non-fatal if any fail) + const mcpManager = await createMcpManager(config.mcpServers, stderrLog, config.mcpGroups); + + stderrLog(`Starting task: ${instruction}\n`); + stderrLog(`Provider: ${profile.provider}, Model: ${profile.model ?? 'default'}\n`); + + // Merge MCP tool schemas with built-in tools + const mcpSchemas = mcpManager.getToolSchemas(); + const allToolSchemas = mcpSchemas.length > 0 + ? [...ALL_TOOL_SCHEMAS, ...mcpSchemas] + : ALL_TOOL_SCHEMAS; + + // Build tool execution bridge: MCP tools routed to mcpManager, built-in tools to executeTool + const executeToolBridge = async (name: string, args: Record): Promise => { + if (mcpManager.isOwnTool(name)) { + return mcpManager.executeTool(name, args); + } + // Use the existing executeTool from all-tools (handles both read + write) + return executeTool(name, args, workspaceRoot); + }; + + // Create progress reporter + const reporter = createProgressReporter({ quiet: options.quiet, json: options.json }); + + // Determine agent mode + const mode = options.auto ? 'auto' : (config.agent?.defaultMode ?? 'auto'); + + // Determine max workers + const maxWorkers = options.workers + ? parseInt(options.workers, 10) + : (config.agent?.maxWorkers ?? 3); + + const orchestrator = new Orchestrator({ + adapter, + workspaceRoot, + toolSchemas: allToolSchemas, + executeTool: executeToolBridge, + }); + + const result = await orchestrator.execute(instruction, { + mode, + maxWorkers, + images: options.image, + signal: AbortSignal.timeout(600000), // 10 minutes + onProgress: (event) => reporter.onEvent(event), + }); + + // Render result + if (options.json) { + printJsonResult({ + response: result.summary, + usage: result.totalTokens + ? { promptTokens: result.totalTokens.promptTokens, completionTokens: result.totalTokens.completionTokens, totalTokens: result.totalTokens.totalTokens } + : undefined, + }); + } else { + if (result.summary) { + try { + const rendered = await renderMarkdown(result.summary); + process.stdout.write(rendered); + } catch { + process.stdout.write(result.summary + '\n'); + } + } + + if (result.filesChanged.length > 0) { + stderrLog(`\nFiles changed: ${result.filesChanged.join(', ')}\n`); + } + } + + await mcpManager.shutdown(); + stderrLog('\nTask complete.\n'); + } catch (err) { + const jamErr = JamError.fromUnknown(err); + await printError(jamErr.message, jamErr.hint); + process.exit(1); + } +} + +/** Legacy agentic loop — preserved for JAM_LEGACY_RUN=1 fallback. */ +async function legacyRun(instruction: string, options: RunOptions): Promise { try { const noColor = options.noColor ?? false; const stderrLog = options.quiet ? (_msg: string) => {} : (msg: string) => process.stderr.write(msg); diff --git a/src/config/defaults.ts b/src/config/defaults.ts index 5857da7..9e94410 100644 --- a/src/config/defaults.ts +++ b/src/config/defaults.ts @@ -26,4 +26,11 @@ export const CONFIG_DEFAULTS: JamConfig = { diagramFormat: 'mermaid', openBrowserOnScan: true, }, + agent: { + maxWorkers: 3, + defaultMode: 'supervised', + maxRoundsPerWorker: 20, + permissions: { safe: [], dangerous: [] }, + sandbox: { filesystem: 'workspace-only', network: 'allowed', timeout: 60000 }, + }, }; diff --git a/src/config/schema.ts b/src/config/schema.ts index a4714eb..e962b16 100644 --- a/src/config/schema.ts +++ b/src/config/schema.ts @@ -93,6 +93,25 @@ export const IntelConfigSchema = z.object({ }); export type IntelConfig = z.infer; +const AgentPermissionsSchema = z.object({ + safe: z.array(z.string()).default([]), + dangerous: z.array(z.string()).default([]), +}); + +const AgentSandboxSchema = z.object({ + filesystem: z.enum(['workspace-only', 'unrestricted']).default('workspace-only'), + network: z.enum(['allowed', 'blocked']).default('allowed'), + timeout: z.number().int().positive().default(60000), +}); + +export const AgentConfigSchema = z.object({ + maxWorkers: z.number().int().min(1).max(10).default(3), + defaultMode: z.enum(['supervised', 'auto']).default('supervised'), + maxRoundsPerWorker: z.number().int().min(1).max(50).default(20), + permissions: AgentPermissionsSchema.default({}), + sandbox: AgentSandboxSchema.default({}), +}); + export const JamConfigSchema = z.object({ defaultProfile: z.string().default('default'), profiles: z.record(ProfileSchema).default({}), @@ -121,6 +140,7 @@ export const JamConfigSchema = z.object({ /** Whether to prompt for @github/copilot CLI installation when not found (default: true). */ copilotAutoInstall: z.boolean().default(true), intel: IntelConfigSchema.default({}), + agent: AgentConfigSchema.default({}), }); export type JamConfig = z.infer; diff --git a/src/index.ts b/src/index.ts index 81cae66..9918660 100644 --- a/src/index.ts +++ b/src/index.ts @@ -95,8 +95,12 @@ program // ── go ─────────────────────────────────────────────────────────────────────── program .command('go') - .description('Interactive agent — reads, writes, and runs commands in your codebase') + .description('Interactive agent console — reads, writes, and runs commands in your codebase') .option('--name ', 'name for the session') + .option('--auto', 'fully autonomous mode (no confirmation prompts)') + .option('--workers ', 'max parallel workers for orchestrator') + .option('--image ', 'attach image(s) for multimodal input', collect) + .option('--no-sandbox', 'disable OS sandbox') .action(async (cmdOpts: Record) => { const g = globalOpts(); const { runGo } = await import('./commands/go.js'); @@ -106,14 +110,26 @@ program model: g.model, baseUrl: g.baseUrl, name: cmdOpts['name'] as string | undefined, + auto: cmdOpts['auto'] as boolean | undefined, + workers: cmdOpts['workers'] as string | undefined, + image: cmdOpts['image'] as string[] | undefined, + noSandbox: cmdOpts['sandbox'] === false, }); }); // ── run ─────────────────────────────────────────────────────────────────────── +function collect(val: string, memo: string[] = []) { memo.push(val); return memo; } + program .command('run [instruction]') .description('Execute a task workflow using AI and local tools') .option('-y, --yes', 'auto-approve all write tool calls without prompting') + .option('--auto', 'fully autonomous mode (implies --yes)') + .option('--workers ', 'max parallel workers for orchestrator') + .option('--image ', 'attach image(s) for multimodal input', collect) + .option('--no-sandbox', 'disable OS sandbox') + .option('--file ', 'read prompt from file') + .option('--json', 'output result as JSON') .action(async (instruction: string | undefined, cmdOpts: Record) => { const g = globalOpts(); const { runRun } = await import('./commands/run.js'); @@ -125,6 +141,12 @@ program noColor: g.color === false, quiet: g.quiet, yes: cmdOpts['yes'] === true, + auto: cmdOpts['auto'] as boolean | undefined, + workers: cmdOpts['workers'] as string | undefined, + image: cmdOpts['image'] as string[] | undefined, + noSandbox: cmdOpts['sandbox'] === false, + file: cmdOpts['file'] as string | undefined, + json: cmdOpts['json'] as boolean | undefined, }); }); diff --git a/src/intel/conventions.test.ts b/src/intel/conventions.test.ts new file mode 100644 index 0000000..9e35197 --- /dev/null +++ b/src/intel/conventions.test.ts @@ -0,0 +1,43 @@ +// src/intel/conventions.test.ts + +import { describe, it, expect } from 'vitest'; +import { analyzeConventions } from './conventions.js'; +import { resolve } from 'node:path'; + +const ROOT = resolve(import.meta.dirname, '../..'); + +describe('analyzeConventions', () => { + it('detects TypeScript language', async () => { + const profile = await analyzeConventions(ROOT); + expect(profile.language).toBe('typescript'); + }); + + it('detects npm package manager', async () => { + const profile = await analyzeConventions(ROOT); + expect(profile.packageManager).toBe('npm'); + }); + + it('detects vitest test framework', async () => { + const profile = await analyzeConventions(ROOT); + expect(profile.testFramework).toBe('vitest'); + }); + + it('detects code style', async () => { + const profile = await analyzeConventions(ROOT); + expect(profile.codeStyle.indent).toBe('spaces'); + expect(profile.codeStyle.indentSize).toBe(2); + expect(profile.codeStyle.quotes).toBe('single'); + expect(profile.codeStyle.semicolons).toBe(true); + }); + + it('detects test location as co-located', async () => { + const profile = await analyzeConventions(ROOT); + // jam-cli has co-located tests (*.test.ts next to source) + expect(profile.testNaming).toContain('.test.ts'); + }); + + it('detects conventional commit style', async () => { + const profile = await analyzeConventions(ROOT); + expect(profile.commitConvention).toBe('conventional'); + }); +}); diff --git a/src/intel/conventions.ts b/src/intel/conventions.ts new file mode 100644 index 0000000..9eb3d1a --- /dev/null +++ b/src/intel/conventions.ts @@ -0,0 +1,604 @@ +// src/intel/conventions.ts + +import { readFile, readdir, stat } from 'node:fs/promises'; +import { join, extname, basename } from 'node:path'; +import { exec } from 'node:child_process'; +import { promisify } from 'node:util'; +import type { WorkspaceProfile } from '../agent/types.js'; + +const execAsync = promisify(exec); + +/** Partial WorkspaceProfile with convention/style fields only. + * Structural fields (framework, entryPoints) come from intel graph. */ +export type ConventionProfile = Omit; + +// ── Helpers ────────────────────────────────────────────────────────────────── + +async function readTextFile(path: string): Promise { + try { + return await readFile(path, 'utf-8'); + } catch { + return null; + } +} + +async function listDir(dir: string): Promise { + try { + return await readdir(dir); + } catch { + return []; + } +} + +async function isDir(path: string): Promise { + try { + return (await stat(path)).isDirectory(); + } catch { + return false; + } +} + +/** Collect up to `max` source files by walking directories recursively. */ +async function collectSourceFiles(root: string, max: number): Promise { + const sourceExts = new Set(['.ts', '.tsx', '.js', '.jsx', '.py', '.go', '.rs']); + const skipDirs = new Set(['.git', 'node_modules', '.jam', 'dist', 'build', '.next', '.cache', 'coverage']); + // Prefer entry points that look like typical module files (not index or CLI entry) + const skipNames = new Set(['index.ts', 'index.js', 'index.tsx', 'index.jsx']); + const files: string[] = []; + + async function walk(dir: string): Promise { + if (files.length >= max) return; + const entries = await listDir(dir); + const fileEntries: string[] = []; + const dirEntries: string[] = []; + for (const entry of entries) { + const fullPath = join(dir, entry); + if (await isDir(fullPath)) { + if (!skipDirs.has(entry)) dirEntries.push(fullPath); + } else { + fileEntries.push(entry); + } + } + // Add non-test source files from this dir first + for (const entry of fileEntries) { + if (files.length >= max) return; + const ext = extname(entry); + if (!sourceExts.has(ext)) continue; + if (/\.(test|spec)\.[a-z]+$/.test(entry)) continue; + if (skipNames.has(entry)) continue; + files.push(join(dir, entry)); + } + // Then recurse into subdirectories + for (const subDir of dirEntries) { + if (files.length >= max) return; + await walk(subDir); + } + } + + // Walk src/, then lib/, then root as fallback + const candidates = ['src', 'lib', '.']; + for (const dir of candidates) { + if (files.length >= max) break; + const fullDir = dir === '.' ? root : join(root, dir); + if (!(await isDir(fullDir))) continue; + await walk(fullDir); + } + + return files.slice(0, max); +} + +// ── Language & Tooling ──────────────────────────────────────────────────────── + +interface ToolingInfo { + language: string; + packageManager: string; + testFramework: string; + testCommand: string; + linter?: string; + formatter?: string; + typeChecker?: string; + buildTool?: string; +} + +async function detectTooling(root: string): Promise { + let language = 'javascript'; + let packageManager = 'npm'; + let testFramework = 'unknown'; + let testCommand = 'npm test'; + let linter: string | undefined; + let formatter: string | undefined; + let typeChecker: string | undefined; + let buildTool: string | undefined; + + // package.json — JS/TS + const pkgText = await readTextFile(join(root, 'package.json')); + if (pkgText) { + let pkg: Record = {}; + try { pkg = JSON.parse(pkgText) as Record; } catch { /* ignore */ } + + const devDeps = (pkg.devDependencies ?? {}) as Record; + const deps = (pkg.dependencies ?? {}) as Record; + const allDeps = { ...deps, ...devDeps }; + const scripts = (pkg.scripts ?? {}) as Record; + + // Language + if ('typescript' in allDeps) { + language = 'typescript'; + typeChecker = 'typescript'; + } + + // Package manager — check lockfiles + const [yarnLock, pnpmLock, bunLock] = await Promise.all([ + readTextFile(join(root, 'yarn.lock')), + readTextFile(join(root, 'pnpm-lock.yaml')), + readTextFile(join(root, 'bun.lockb')).then(v => v).catch(() => null), + ]); + const packageManagerField = (pkg.packageManager as string | undefined) ?? ''; + if (packageManagerField.startsWith('yarn') || yarnLock !== null) { + packageManager = 'yarn'; + } else if (packageManagerField.startsWith('pnpm') || pnpmLock !== null) { + packageManager = 'pnpm'; + } else if (packageManagerField.startsWith('bun') || bunLock !== null) { + packageManager = 'bun'; + } + + // Test framework + if ('vitest' in allDeps) { + testFramework = 'vitest'; + } else if ('jest' in allDeps || '@jest/core' in allDeps) { + testFramework = 'jest'; + } else if ('mocha' in allDeps) { + testFramework = 'mocha'; + } + + // Test command — look at scripts + const testScript = scripts['test'] ?? ''; + if (testScript) { + testCommand = `${packageManager === 'npm' ? 'npm' : packageManager} run test`; + if (testScript.includes('vitest')) testCommand = 'npx vitest run'; + else if (testScript.includes('jest')) testCommand = 'npx jest'; + } + + // Linter + if ('eslint' in allDeps) linter = 'eslint'; + else if ('biome' in allDeps || '@biomejs/biome' in allDeps) linter = 'biome'; + else if ('oxlint' in allDeps) linter = 'oxlint'; + + // Formatter + if ('prettier' in allDeps) formatter = 'prettier'; + else if ('biome' in allDeps || '@biomejs/biome' in allDeps) formatter = formatter ?? 'biome'; + + // Build tool + if ('tsup' in allDeps) buildTool = 'tsup'; + else if ('vite' in allDeps) buildTool = 'vite'; + else if ('esbuild' in allDeps) buildTool = 'esbuild'; + else if ('webpack' in allDeps) buildTool = 'webpack'; + else if ('rollup' in allDeps) buildTool = 'rollup'; + } + + // pyproject.toml — Python + const pyproject = await readTextFile(join(root, 'pyproject.toml')); + if (pyproject && !pkgText) { + language = 'python'; + if (pyproject.includes('pytest')) testFramework = 'pytest'; + testCommand = 'pytest'; + } + + // Cargo.toml — Rust + const cargoToml = await readTextFile(join(root, 'Cargo.toml')); + if (cargoToml && !pkgText && !pyproject) { + language = 'rust'; + testFramework = 'cargo-test'; + testCommand = 'cargo test'; + } + + // go.mod — Go + const goMod = await readTextFile(join(root, 'go.mod')); + if (goMod && !pkgText && !pyproject && !cargoToml) { + language = 'go'; + testFramework = 'go-test'; + testCommand = 'go test ./...'; + } + + return { language, packageManager, testFramework, testCommand, linter, formatter, typeChecker, buildTool }; +} + +// ── Code Style ──────────────────────────────────────────────────────────────── + +interface StyleVote { + tabs: number; + spaces: number; + indentSizes: number[]; + singleQuote: number; + doubleQuote: number; + semiLines: number; + noSemiLines: number; + trailingCommaCount: number; + noTrailingCommaCount: number; + camelCase: number; + snakeCase: number; + pascalCase: number; +} + +function analyzeFileStyle(content: string): StyleVote { + const lines = content.split('\n'); + const vote: StyleVote = { + tabs: 0, + spaces: 0, + indentSizes: [], + singleQuote: 0, + doubleQuote: 0, + semiLines: 0, + noSemiLines: 0, + trailingCommaCount: 0, + noTrailingCommaCount: 0, + camelCase: 0, + snakeCase: 0, + pascalCase: 0, + }; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]!; + const trimmed = line.trimStart(); + + // Skip empty lines and comment lines + if (!trimmed || trimmed.startsWith('//') || trimmed.startsWith('*') || trimmed.startsWith('#')) { + continue; + } + + // Indentation + if (line !== trimmed) { + const leadingWhitespace = line.slice(0, line.length - trimmed.length); + if (leadingWhitespace.includes('\t')) { + vote.tabs++; + } else { + vote.spaces++; + const size = leadingWhitespace.length; + if (size > 0) vote.indentSizes.push(size); + } + } + + // Quotes — skip lines that are likely import paths or template literals + const noStrings = trimmed.replace(/`[^`]*`/g, ''); + const singleMatches = (noStrings.match(/'[^'\\n]{1,80}'/g) ?? []).length; + const doubleMatches = (noStrings.match(/"[^"\\n]{1,80}"/g) ?? []).length; + vote.singleQuote += singleMatches; + vote.doubleQuote += doubleMatches; + + // Semicolons — lines ending with ; + // Only count lines that look like complete statements (not method chain continuations) + const stripped = trimmed.replace(/\/\/.*$/, '').trimEnd(); + const isChainContinuation = stripped.startsWith('.'); + if (stripped.endsWith(';')) { + vote.semiLines++; + } else if ( + !isChainContinuation && + stripped.length > 0 && + !stripped.endsWith('{') && + !stripped.endsWith('}') && + !stripped.endsWith(',') && + !stripped.endsWith('(') && + !stripped.endsWith(':') && + !stripped.endsWith('|') && + !stripped.endsWith('&') && + !stripped.endsWith('\\') && + !stripped.endsWith(')') + ) { + vote.noSemiLines++; + } + + // Trailing commas — lines ending with , followed by } or ] on next line + if (stripped.endsWith(',') && i + 1 < lines.length) { + const nextTrimmed = lines[i + 1]!.trim(); + if (nextTrimmed.startsWith('}') || nextTrimmed.startsWith(']') || nextTrimmed.startsWith(')')) { + vote.trailingCommaCount++; + } + } + + // Naming convention — const/let/var/function declarations + const camelMatch = trimmed.match(/(?:const|let|var|function)\s+([a-z][a-zA-Z0-9]*[A-Z][a-zA-Z0-9]*)/); + const snakeMatch = trimmed.match(/(?:const|let|var|function)\s+([a-z][a-z0-9]*_[a-z][a-z0-9_]*)/); + const pascalMatch = trimmed.match(/(?:const|let|var|function)\s+([A-Z][a-zA-Z0-9]+)/); + if (camelMatch) vote.camelCase++; + if (snakeMatch) vote.snakeCase++; + if (pascalMatch) vote.pascalCase++; + } + + return vote; +} + +interface CodeStyle { + indent: 'tabs' | 'spaces'; + indentSize: number; + quotes: 'single' | 'double'; + semicolons: boolean; + trailingCommas: boolean; + namingConvention: 'camelCase' | 'snake_case' | 'PascalCase'; +} + +async function detectCodeStyle(root: string): Promise { + const files = await collectSourceFiles(root, 5); + + const totals: StyleVote = { + tabs: 0, + spaces: 0, + indentSizes: [], + singleQuote: 0, + doubleQuote: 0, + semiLines: 0, + noSemiLines: 0, + trailingCommaCount: 0, + noTrailingCommaCount: 0, + camelCase: 0, + snakeCase: 0, + pascalCase: 0, + }; + + for (const file of files) { + const content = await readTextFile(file); + if (!content) continue; + const vote = analyzeFileStyle(content); + totals.tabs += vote.tabs; + totals.spaces += vote.spaces; + totals.indentSizes.push(...vote.indentSizes); + totals.singleQuote += vote.singleQuote; + totals.doubleQuote += vote.doubleQuote; + totals.semiLines += vote.semiLines; + totals.noSemiLines += vote.noSemiLines; + totals.trailingCommaCount += vote.trailingCommaCount; + totals.noTrailingCommaCount += vote.noTrailingCommaCount; + totals.camelCase += vote.camelCase; + totals.snakeCase += vote.snakeCase; + totals.pascalCase += vote.pascalCase; + } + + // Indent type + const indent: 'tabs' | 'spaces' = totals.tabs > totals.spaces ? 'tabs' : 'spaces'; + + // Indent size — find the GCD or most common small size + let indentSize = 2; + if (totals.indentSizes.length > 0) { + const sizeCounts = new Map(); + for (const s of totals.indentSizes) { + if (s > 0 && s <= 8) { + sizeCounts.set(s, (sizeCounts.get(s) ?? 0) + 1); + } + } + // Most common indent sizes are multiples of the base; find the smallest common factor + const smallSizes = [2, 4, 3, 8]; + let bestSize = 2; + let bestCount = 0; + for (const sz of smallSizes) { + let count = 0; + for (const [s, c] of sizeCounts) { + if (s % sz === 0) count += c; + } + if (count > bestCount) { + bestCount = count; + bestSize = sz; + } + } + indentSize = bestSize; + } + + // Quotes + const quotes: 'single' | 'double' = totals.singleQuote >= totals.doubleQuote ? 'single' : 'double'; + + // Semicolons + const semicolons = totals.semiLines >= totals.noSemiLines; + + // Trailing commas + const trailingCommas = totals.trailingCommaCount > totals.noTrailingCommaCount; + + // Naming convention + let namingConvention: 'camelCase' | 'snake_case' | 'PascalCase' = 'camelCase'; + const maxNaming = Math.max(totals.camelCase, totals.snakeCase, totals.pascalCase); + if (maxNaming === totals.snakeCase) namingConvention = 'snake_case'; + else if (maxNaming === totals.pascalCase) namingConvention = 'PascalCase'; + + return { indent, indentSize, quotes, semicolons, trailingCommas, namingConvention }; +} + +// ── File Naming ─────────────────────────────────────────────────────────────── + +async function detectFileNaming(root: string): Promise { + const dirs = ['src', 'lib', '.']; + const counts = { kebab: 0, camel: 0, pascal: 0, snake: 0 }; + + for (const dir of dirs) { + const fullDir = dir === '.' ? root : join(root, dir); + const entries = await listDir(fullDir); + for (const entry of entries) { + const name = basename(entry, extname(entry)); + if (/^[a-z][a-z0-9]*(-[a-z0-9]+)+$/.test(name)) counts.kebab++; + else if (/^[a-z][a-zA-Z0-9]+$/.test(name)) counts.camel++; + else if (/^[A-Z][a-zA-Z0-9]+$/.test(name)) counts.pascal++; + else if (/^[a-z][a-z0-9]*(_[a-z0-9]+)+$/.test(name)) counts.snake++; + } + } + + const max = Math.max(counts.kebab, counts.camel, counts.pascal, counts.snake); + if (max === counts.pascal) return 'PascalCase'; + if (max === counts.camel) return 'camelCase'; + if (max === counts.snake) return 'snake_case'; + return 'kebab-case'; +} + +// ── Export Style ────────────────────────────────────────────────────────────── + +async function detectExportStyle(root: string): Promise<'named' | 'default' | 'barrel'> { + // Check for barrel exports (index.ts/js files) + const srcDir = join(root, 'src'); + const entries = await listDir(srcDir); + const hasBarrel = entries.some(e => e === 'index.ts' || e === 'index.js' || e === 'index.tsx'); + if (hasBarrel) return 'barrel'; + + // Sample a few files for default vs named + const files = await collectSourceFiles(root, 5); + let defaultCount = 0; + let namedCount = 0; + for (const file of files) { + const content = await readTextFile(file); + if (!content) continue; + if (/^export default /m.test(content)) defaultCount++; + if (/^export (?:const|function|class|interface|type|enum) /m.test(content)) namedCount++; + } + + return defaultCount > namedCount ? 'default' : 'named'; +} + +// ── Import Style ────────────────────────────────────────────────────────────── + +async function detectImportStyle(root: string): Promise<'relative' | 'alias'> { + const files = await collectSourceFiles(root, 5); + let aliasCount = 0; + let relativeCount = 0; + + for (const file of files) { + const content = await readTextFile(file); + if (!content) continue; + const lines = content.split('\n'); + for (const line of lines) { + if (!line.includes('import')) continue; + if (/from ['"](@\/|~\/|#)/.test(line)) aliasCount++; + else if (/from ['"](\.\.\.|\.\/|\.\.\/)/.test(line)) relativeCount++; + } + } + + return aliasCount > relativeCount ? 'alias' : 'relative'; +} + +// ── Test Patterns ───────────────────────────────────────────────────────────── + +interface TestInfo { + testLocation: string; + testNaming: string; + testStyle: string; +} + +async function detectTestPatterns(root: string): Promise { + // Check for dedicated test directories + const testDirs = ['tests', '__tests__', 'test', 'spec']; + for (const dir of testDirs) { + if (await isDir(join(root, dir))) { + // Determine naming pattern inside + const entries = await listDir(join(root, dir)); + const hasSpec = entries.some(e => e.includes('.spec.')); + const naming = hasSpec ? '*.spec.ts' : '*.test.ts'; + return { + testLocation: dir, + testNaming: naming, + testStyle: 'dedicated-directory', + }; + } + if (await isDir(join(root, 'src', dir))) { + const entries = await listDir(join(root, 'src', dir)); + const hasSpec = entries.some(e => e.includes('.spec.')); + const naming = hasSpec ? '*.spec.ts' : '*.test.ts'; + return { + testLocation: `src/${dir}`, + testNaming: naming, + testStyle: 'dedicated-directory', + }; + } + } + + // Check for co-located tests + const srcEntries = await listDir(join(root, 'src')); + const hasTestTs = srcEntries.some(e => e.endsWith('.test.ts') || e.endsWith('.test.tsx')); + const hasSpecTs = srcEntries.some(e => e.endsWith('.spec.ts') || e.endsWith('.spec.tsx')); + if (hasTestTs || hasSpecTs) { + return { + testLocation: 'co-located', + testNaming: hasSpecTs ? '*.spec.ts' : '*.test.ts', + testStyle: 'co-located', + }; + } + + return { + testLocation: 'unknown', + testNaming: '*.test.ts', + testStyle: 'unknown', + }; +} + +// ── Git Conventions ─────────────────────────────────────────────────────────── + +interface GitInfo { + commitConvention: string; + branchPattern: string; +} + +async function detectGitConventions(root: string): Promise { + let commitConvention = 'unknown'; + let branchPattern = 'unknown'; + + try { + const { stdout: logOut } = await execAsync('git log --oneline -20', { cwd: root }); + const commits = logOut.split('\n').filter(Boolean); + const conventionalPattern = /^[0-9a-f]+ (feat|fix|chore|docs|style|refactor|test|perf|ci|build|revert)(\(.+?\))?:/; + const conventionalCount = commits.filter(c => conventionalPattern.test(c)).length; + if (conventionalCount >= commits.length * 0.5 && conventionalCount > 0) { + commitConvention = 'conventional'; + } else { + commitConvention = 'freeform'; + } + } catch { + // not a git repo or no commits + } + + try { + const { stdout: branchOut } = await execAsync('git branch -a', { cwd: root }); + const branches = branchOut.split('\n').map(b => b.trim().replace(/^\* /, '')).filter(Boolean); + // Check for common patterns + const featurePattern = branches.filter(b => /^(feat|feature)\//.test(b)).length; + const gitflowPattern = branches.filter(b => /^(develop|release\/|hotfix\/)/.test(b)).length; + if (featurePattern > 0) branchPattern = 'feat/'; + else if (gitflowPattern > 0) branchPattern = 'gitflow'; + else branchPattern = '/'; + } catch { + // not a git repo + } + + return { commitConvention, branchPattern }; +} + +// ── Main ────────────────────────────────────────────────────────────────────── + +export async function analyzeConventions(root: string): Promise { + const [tooling, codeStyle, fileNaming, exportStyle, importStyle, testPatterns, gitInfo] = + await Promise.all([ + detectTooling(root), + detectCodeStyle(root), + detectFileNaming(root), + detectExportStyle(root), + detectImportStyle(root), + detectTestPatterns(root), + detectGitConventions(root), + ]); + + return { + language: tooling.language, + packageManager: tooling.packageManager, + testFramework: tooling.testFramework, + testCommand: tooling.testCommand, + linter: tooling.linter, + formatter: tooling.formatter, + typeChecker: tooling.typeChecker, + buildTool: tooling.buildTool, + codeStyle, + fileNaming, + exportStyle, + importStyle: importStyle, + testLocation: testPatterns.testLocation, + testNaming: testPatterns.testNaming, + testStyle: testPatterns.testStyle, + commitConvention: gitInfo.commitConvention, + branchPattern: gitInfo.branchPattern, + // Sensible defaults for fields not directly detectable here + errorHandling: 'try-catch', + logging: 'console', + configPattern: 'config-file', + }; +} diff --git a/src/intel/index.ts b/src/intel/index.ts index 045fbb9..960cd89 100644 --- a/src/intel/index.ts +++ b/src/intel/index.ts @@ -29,3 +29,5 @@ export type { } from './types.js'; export type { AnalyzerPlugin, FileAnalysis } from './analyzers/base.js'; export { AnalyzerRegistry, createDefaultRegistry } from './analyzers/registry.js'; +export { analyzeConventions } from './conventions.js'; +export type { ConventionProfile } from './conventions.js'; diff --git a/src/providers/base.ts b/src/providers/base.ts index 46be6ed..8b51617 100644 --- a/src/providers/base.ts +++ b/src/providers/base.ts @@ -67,6 +67,11 @@ export interface ProviderInfo { * on small/embedded models. Undefined means no enforced limit. */ contextWindow?: number; + /** + * Whether this provider supports vision (image) inputs. + * Defaults to false when absent. + */ + supportsVision?: boolean; } export interface ProviderAdapter { diff --git a/src/utils/errors.ts b/src/utils/errors.ts index 1ad216d..d97c8f6 100644 --- a/src/utils/errors.ts +++ b/src/utils/errors.ts @@ -13,7 +13,17 @@ export type ErrorCode = | 'TOOL_DENIED' | 'TOOL_NOT_FOUND' | 'TOOL_EXEC_ERROR' - | 'UNKNOWN'; + | 'UNKNOWN' + | 'AGENT_PLAN_FAILED' + | 'AGENT_PLAN_CYCLE' + | 'AGENT_WORKER_TIMEOUT' + | 'AGENT_WORKER_CANCELLED' + | 'AGENT_FILE_LOCK_CONFLICT' + | 'AGENT_FILE_LOCK_TIMEOUT' + | 'AGENT_BUDGET_EXCEEDED' + | 'AGENT_SANDBOX_UNAVAILABLE' + | 'AGENT_RATE_LIMITED' + | 'AGENT_MERGE_CONFLICT'; /** * Actionable hints for each error code. @@ -64,6 +74,26 @@ export const ERROR_HINTS: Partial> = { TOOL_EXEC_ERROR: 'A tool failed to execute. Check that git and other dependencies are installed.\n' + 'Run `jam doctor` for diagnostics.', + AGENT_PLAN_FAILED: + 'The AI could not generate a valid execution plan. Try simplifying your task or breaking it into smaller pieces.', + AGENT_PLAN_CYCLE: + 'The execution plan has circular dependencies. This is a bug — please report it.', + AGENT_WORKER_TIMEOUT: + 'A worker exceeded its round budget. Try increasing maxRoundsPerWorker in config.', + AGENT_WORKER_CANCELLED: + 'Worker was cancelled. This may be due to a dependency failure or user abort.', + AGENT_FILE_LOCK_CONFLICT: + 'Two workers tried to edit the same file simultaneously. The orchestrator resolved the conflict.', + AGENT_FILE_LOCK_TIMEOUT: + 'A file lock request timed out. Another worker may be stuck.', + AGENT_BUDGET_EXCEEDED: + 'Token budget exceeded. Reduce task scope or increase maxTotal in agent config.', + AGENT_SANDBOX_UNAVAILABLE: + 'OS sandbox not available. Running with permissions-only. Run jam doctor to check.', + AGENT_RATE_LIMITED: + 'Provider rate limit hit. Workers paused automatically. Wait and retry.', + AGENT_MERGE_CONFLICT: + 'Workers produced conflicting file edits. Manual resolution may be needed.', }; export class JamError extends Error {