Skip to content

Commit ae70809

Browse files
committed
Release v0.2.7
1 parent 0276c85 commit ae70809

49 files changed

Lines changed: 4957 additions & 411 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

app/cli/bin/fuc.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,16 +116,22 @@ program
116116

117117
program
118118
.command('ultracode <task>')
119-
.description('即时生成、即时执行、带任务账本和验收门的动态 workflow harness')
119+
.description('即时生成、即时执行、带任务账本、预算软停和验收门的动态 workflow harness')
120120
.option('-a, --adapter <adapter>', 'adapter override')
121121
.option('-m, --model <model>', 'model override (sonnet, opus, haiku, …)')
122122
.option('-p, --provider <id>', 'provider id (gateway routing)')
123123
.option('-o, --output <path>', 'write final result JSON to a file')
124124
.option('--interactive', 'enable terminal interaction')
125125
.option('--non-interactive', 'auto-skip interaction requests (default)')
126126
.option('--planner-only', 'only generate and persist harness.json, do not execute it')
127+
.option('--resume', 'resume from .fuc-run/<run-id>/result.json')
128+
.option('--from-harness <path>', 'reuse a saved harness.json and skip planning')
129+
.option('--trace', 'persist streaming events in events.jsonl')
127130
.option('--concurrency <n>', 'concurrency limit')
128131
.option('--max-retries <n>', 'max auto-retries per node')
132+
.option('--max-agent-calls <n>', 'override ultracode agent-call budget')
133+
.option('--max-rounds <n>', 'override ultracode repair-round budget')
134+
.option('--verify-command <command>', 'run a local verification command after ultracode; nonzero exit fails the run')
129135
.option('--timeout <seconds>', 'per-node timeout seconds')
130136
.option('--cwd <path>', 'working directory')
131137
.option('--run-id <id>', 'explicit run directory id under .fuc-run/')

app/cli/commands/ultracode.test.ts

Lines changed: 271 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -51,18 +51,249 @@ describe('fuc ultracode', () => {
5151
expect(result.success).toBe(true);
5252
expect(result.artifacts.verdict.pass).toBe(true);
5353
expect(result.artifacts.ledger.tasks[0].id).toBe('t1');
54+
expect(result.outputs.n_ledger).toBeTruthy();
55+
expect(result.nodeResults.n_ledger.status).toBe('success');
5456
expect(existsSync(join(runDir, 'request.json'))).toBe(true);
5557
expect(existsSync(join(runDir, 'harness.json'))).toBe(true);
5658
expect(existsSync(join(runDir, 'workflow.fuc.json'))).toBe(true);
5759
expect(existsSync(join(runDir, 'events.jsonl'))).toBe(true);
5860
expect(existsSync(join(runDir, 'status.json'))).toBe(true);
5961
expect(existsSync(join(runDir, 'result.json'))).toBe(true);
60-
expect(readFileSync(join(runDir, 'events.jsonl'), 'utf8')).toContain('node_success');
62+
const workflow = JSON.parse(readFileSync(join(runDir, 'workflow.fuc.json'), 'utf8'));
63+
expect(workflow.nodes.some((node: { id: string; type: string }) => node.id.includes('claims') && node.type === 'agent')).toBe(true);
64+
expect(workflow.nodes.some((node: { id: string; type: string }) => node.id.includes('verify') && node.type === 'parallel')).toBe(true);
65+
const events = readFileSync(join(runDir, 'events.jsonl'), 'utf8');
66+
expect(events).toContain('node_success');
67+
expect(events).not.toContain('stream_append');
6168
expect(calls.length).toBeGreaterThanOrEqual(6);
6269
});
70+
71+
it('soft-stops on budget exhaustion and still writes a partial result', async () => {
72+
const calls: string[] = [];
73+
const gateway = fakeUltracodeGateway(calls, { maxAgentCalls: 4, maxRounds: 2 });
74+
const code = await runUltracode('修复复杂失败并循环验收', {
75+
cwd: dir,
76+
runId: 'uc-budget',
77+
json: true,
78+
quiet: true,
79+
gateway,
80+
concurrency: '3',
81+
maxRetries: '0',
82+
});
83+
84+
expect(code).toBe(1);
85+
const result = JSON.parse(outBuf);
86+
const runDir = join(dir, '.fuc-run', 'uc-budget');
87+
expect(result.success).toBe(false);
88+
expect(result.budget.exhausted).toBe(true);
89+
expect(result.budget.spentAgentCalls).toBe(4);
90+
expect(result.artifacts.verdict.pass).toBe(false);
91+
expect(result.artifacts.verdict.gaps[0].reason).toContain('ULTRACODE_BUDGET_EXHAUSTED');
92+
expect(result.artifacts.report).toContain('预算已耗尽');
93+
expect(result.failedNodeId).toBeTruthy();
94+
expect(result.outputs.n_gate).toBeUndefined();
95+
expect(existsSync(join(runDir, 'result.json'))).toBe(true);
96+
expect(readFileSync(join(runDir, 'events.jsonl'), 'utf8')).toContain('budget_exhausted');
97+
});
98+
99+
it('resumes from persisted ultracode outputs without replanning completed work', async () => {
100+
const firstCalls: string[] = [];
101+
const firstGateway = fakeUltracodeGateway(firstCalls, { maxAgentCalls: 6, maxRounds: 2 });
102+
const firstCode = await runUltracode('修复复杂失败并循环验收', {
103+
cwd: dir,
104+
runId: 'uc-resume',
105+
json: true,
106+
quiet: true,
107+
gateway: firstGateway,
108+
concurrency: '3',
109+
maxRetries: '0',
110+
});
111+
expect(firstCode).toBe(1);
112+
outBuf = '';
113+
114+
const resumeCalls: string[] = [];
115+
const resumeGateway = fakeUltracodeGateway(resumeCalls, { maxAgentCalls: 20, maxRounds: 2 });
116+
const resumeCode = await runUltracode('修复复杂失败并循环验收', {
117+
cwd: dir,
118+
runId: 'uc-resume',
119+
resume: true,
120+
json: true,
121+
quiet: true,
122+
gateway: resumeGateway,
123+
concurrency: '3',
124+
maxRetries: '0',
125+
maxAgentCalls: '20',
126+
});
127+
128+
const result = JSON.parse(outBuf);
129+
expect(resumeCode).toBe(0);
130+
expect(result.success).toBe(true);
131+
expect(result.outputs.n_ledger).toBeTruthy();
132+
expect(resumeCalls.some((prompt) => prompt.includes('DYNAMIC_HARNESS'))).toBe(false);
133+
expect(resumeCalls.some((prompt) => prompt.includes('DYNAMIC_TASK_LEDGER'))).toBe(false);
134+
});
135+
136+
it('persists stream events only when trace is enabled', async () => {
137+
const calls: string[] = [];
138+
const gateway = fakeUltracodeGateway(calls);
139+
const code = await runUltracode('审查博客里的技术论断', {
140+
cwd: dir,
141+
runId: 'uc-trace',
142+
json: true,
143+
quiet: true,
144+
trace: true,
145+
gateway,
146+
concurrency: '3',
147+
maxRetries: '0',
148+
});
149+
150+
expect(code).toBe(0);
151+
const events = readFileSync(join(dir, '.fuc-run', 'uc-trace', 'events.jsonl'), 'utf8');
152+
expect(events).toContain('stream_append');
153+
});
154+
155+
it('skips repair rounds once an earlier acceptance gate passes', async () => {
156+
const calls: string[] = [];
157+
const gateway = fakeUltracodeGateway(calls, { maxAgentCalls: 20, maxRounds: 2 });
158+
const code = await runUltracode('修复复杂失败并循环验收', {
159+
cwd: dir,
160+
runId: 'uc-skip-repair',
161+
json: true,
162+
quiet: true,
163+
gateway,
164+
concurrency: '3',
165+
maxRetries: '0',
166+
});
167+
168+
expect(code).toBe(0);
169+
const repairCalls = calls.filter((prompt) => prompt.includes('返工轮次') || prompt.includes('第 2 轮返工'));
170+
expect(repairCalls).toHaveLength(0);
171+
const workflow = JSON.parse(readFileSync(join(dir, '.fuc-run', 'uc-skip-repair', 'workflow.fuc.json'), 'utf8'));
172+
expect(workflow.nodes.some((node: { id: string }) => node.id.includes('n_dyn_r2'))).toBe(true);
173+
});
174+
175+
it('records a planner_fallback event when planning yields no parseable spec', async () => {
176+
const calls: string[] = [];
177+
// A gateway whose planner output is NOT valid JSON ⇒ fallback spec.
178+
const gateway = fakeUltracodeGateway(calls, { maxAgentCalls: 20, maxRounds: 2 });
179+
const badPlanner: RunGateway = {
180+
...gateway,
181+
spawnCliAgent: async (prompt, adapter, opts) => {
182+
if (prompt.includes('DYNAMIC_HARNESS')) {
183+
opts.onProgress?.('chunk');
184+
return '抱歉,我无法生成规格,这只是一段普通说明文字。';
185+
}
186+
return gateway.spawnCliAgent(prompt, adapter, opts);
187+
},
188+
};
189+
const code = await runUltracode('审查博客里的技术论断', {
190+
cwd: dir,
191+
runId: 'uc-fallback',
192+
json: true,
193+
quiet: true,
194+
gateway: badPlanner,
195+
concurrency: '3',
196+
maxRetries: '0',
197+
});
198+
199+
// Fallback may or may not pass acceptance (it runs an inferred spec); the
200+
// point of this test is that the degraded planning is surfaced, not silent.
201+
expect([0, 1]).toContain(code);
202+
const events = readFileSync(join(dir, '.fuc-run', 'uc-fallback', 'events.jsonl'), 'utf8');
203+
expect(events).toContain('planner_fallback');
204+
});
205+
206+
it('reuses a saved harness.json with --from-harness and skips planning', async () => {
207+
// 1) plannerOnly run to produce harness.json.
208+
const planCalls: string[] = [];
209+
const planGateway = fakeUltracodeGateway(planCalls, { maxAgentCalls: 20, maxRounds: 2 });
210+
await runUltracode('审查博客里的技术论断', {
211+
cwd: dir,
212+
runId: 'uc-plan',
213+
json: true,
214+
quiet: true,
215+
gateway: planGateway,
216+
plannerOnly: true,
217+
});
218+
const harnessPath = join(dir, '.fuc-run', 'uc-plan', 'harness.json');
219+
expect(existsSync(harnessPath)).toBe(true);
220+
outBuf = '';
221+
222+
// 2) reuse it — the planner must not be invoked.
223+
const reuseCalls: string[] = [];
224+
const reuseGateway = fakeUltracodeGateway(reuseCalls, { maxAgentCalls: 20, maxRounds: 2 });
225+
const code = await runUltracode('审查博客里的技术论断', {
226+
cwd: dir,
227+
runId: 'uc-reuse',
228+
json: true,
229+
quiet: true,
230+
gateway: reuseGateway,
231+
fromHarness: harnessPath,
232+
concurrency: '3',
233+
maxRetries: '0',
234+
});
235+
236+
expect(code).toBe(0);
237+
expect(reuseCalls.some((p) => p.includes('DYNAMIC_HARNESS'))).toBe(false);
238+
const reuseEvents = readFileSync(join(dir, '.fuc-run', 'uc-reuse', 'events.jsonl'), 'utf8');
239+
expect(reuseEvents).toContain('planner_skipped');
240+
});
241+
242+
it('runs a graceful closing pass from the reserve when work budget exhausts', async () => {
243+
const calls: string[] = [];
244+
// maxAgentCalls 5 ⇒ closing reserve 2, work ceiling 3: work exhausts mid-run
245+
// and the gate/report run from the reserved pool instead of hard-aborting.
246+
const gateway = fakeUltracodeGateway(calls, { maxAgentCalls: 5, maxRounds: 1 });
247+
const code = await runUltracode('审查博客里的技术论断', {
248+
cwd: dir,
249+
runId: 'uc-closing',
250+
json: true,
251+
quiet: true,
252+
gateway,
253+
concurrency: '1',
254+
maxRetries: '0',
255+
});
256+
257+
expect([0, 1]).toContain(code);
258+
const events = readFileSync(join(dir, '.fuc-run', 'uc-closing', 'events.jsonl'), 'utf8');
259+
expect(events).toContain('work_budget_exhausted');
260+
expect(events).toContain('closing_pass');
261+
// The budget was NOT fully exhausted (reserve absorbed the closing pass).
262+
const result = JSON.parse(outBuf);
263+
expect(result.budget.maxAgentCalls).toBe(5);
264+
});
265+
266+
it('runs verify-command and fails the run when the command exits nonzero', async () => {
267+
const calls: string[] = [];
268+
const gateway = fakeUltracodeGateway(calls);
269+
const code = await runUltracode('审查博客里的技术论断', {
270+
cwd: dir,
271+
runId: 'uc-verify-fail',
272+
json: true,
273+
quiet: true,
274+
gateway,
275+
concurrency: '3',
276+
maxRetries: '0',
277+
verifyCommand: 'node -e "process.exit(7)"',
278+
});
279+
280+
const result = JSON.parse(outBuf);
281+
const runDir = join(dir, '.fuc-run', 'uc-verify-fail');
282+
expect(code).toBe(1);
283+
expect(result.success).toBe(false);
284+
expect(result.verification.command).toBe('node -e "process.exit(7)"');
285+
expect(result.verification.exitCode).toBe(7);
286+
expect(result.artifacts.verdict.pass).toBe(false);
287+
expect(result.artifacts.verdict.gaps.some((gap: { taskId: string }) => gap.taskId === 'verification')).toBe(true);
288+
expect(existsSync(join(runDir, 'verification.json'))).toBe(true);
289+
expect(readFileSync(join(runDir, 'events.jsonl'), 'utf8')).toContain('verification_complete');
290+
});
63291
});
64292

65-
function fakeUltracodeGateway(calls: string[]): RunGateway {
293+
function fakeUltracodeGateway(
294+
calls: string[],
295+
plannerBudget: { maxAgentCalls: number; maxRounds: number } = { maxAgentCalls: 20, maxRounds: 2 },
296+
): RunGateway {
66297
const respond = async (
67298
prompt: string,
68299
_adapter: string,
@@ -75,8 +306,44 @@ function fakeUltracodeGateway(calls: string[]): RunGateway {
75306
objective: '审查博客里的技术论断',
76307
nonGoals: ['不要改写博客风格'],
77308
successCriteria: ['每条技术论断都有证据'],
78-
budget: { maxAgentCalls: 20, maxRounds: 2 },
79-
strategies: ['fan-out-and-synthesize', 'adversarial-verification'],
309+
budget: plannerBudget,
310+
strategies:
311+
plannerBudget.maxRounds > 1
312+
? ['loop-until-done', 'adversarial-verification']
313+
: ['fan-out-and-synthesize', 'adversarial-verification'],
314+
plan: [
315+
{
316+
id: 'claims',
317+
kind: 'agent',
318+
title: '识别论断',
319+
focus: '找出需要核验的技术论断',
320+
deliverable: '论断清单',
321+
acceptance: '至少列出一条论断',
322+
evidenceRequired: '原文片段',
323+
},
324+
{
325+
id: 'verify',
326+
kind: 'parallel',
327+
title: '并行核验',
328+
dependsOn: ['claims'],
329+
branches: [
330+
{
331+
title: '代码核验',
332+
focus: '对照代码库核验论断',
333+
deliverable: '核验结果',
334+
acceptance: '每条结论都有文件路径或原因',
335+
evidenceRequired: '文件路径',
336+
},
337+
{
338+
title: '反面检查',
339+
focus: '寻找证据不足和过度声称',
340+
deliverable: '风险清单',
341+
acceptance: '风险有原因',
342+
evidenceRequired: '复核记录',
343+
},
344+
],
345+
},
346+
],
80347
workerGroups: [
81348
{
82349
id: 't1',

0 commit comments

Comments
 (0)