@@ -51,18 +51,249 @@ describe('fuc ultracode', () => {
5151 expect ( result . success ) . toBe ( true ) ;
5252 expect ( result . artifacts . verdict . pass ) . toBe ( true ) ;
5353 expect ( result . artifacts . ledger . tasks [ 0 ] . id ) . toBe ( 't1' ) ;
54+ expect ( result . outputs . n_ledger ) . toBeTruthy ( ) ;
55+ expect ( result . nodeResults . n_ledger . status ) . toBe ( 'success' ) ;
5456 expect ( existsSync ( join ( runDir , 'request.json' ) ) ) . toBe ( true ) ;
5557 expect ( existsSync ( join ( runDir , 'harness.json' ) ) ) . toBe ( true ) ;
5658 expect ( existsSync ( join ( runDir , 'workflow.fuc.json' ) ) ) . toBe ( true ) ;
5759 expect ( existsSync ( join ( runDir , 'events.jsonl' ) ) ) . toBe ( true ) ;
5860 expect ( existsSync ( join ( runDir , 'status.json' ) ) ) . toBe ( true ) ;
5961 expect ( existsSync ( join ( runDir , 'result.json' ) ) ) . toBe ( true ) ;
60- expect ( readFileSync ( join ( runDir , 'events.jsonl' ) , 'utf8' ) ) . toContain ( 'node_success' ) ;
62+ const workflow = JSON . parse ( readFileSync ( join ( runDir , 'workflow.fuc.json' ) , 'utf8' ) ) ;
63+ expect ( workflow . nodes . some ( ( node : { id : string ; type : string } ) => node . id . includes ( 'claims' ) && node . type === 'agent' ) ) . toBe ( true ) ;
64+ expect ( workflow . nodes . some ( ( node : { id : string ; type : string } ) => node . id . includes ( 'verify' ) && node . type === 'parallel' ) ) . toBe ( true ) ;
65+ const events = readFileSync ( join ( runDir , 'events.jsonl' ) , 'utf8' ) ;
66+ expect ( events ) . toContain ( 'node_success' ) ;
67+ expect ( events ) . not . toContain ( 'stream_append' ) ;
6168 expect ( calls . length ) . toBeGreaterThanOrEqual ( 6 ) ;
6269 } ) ;
70+
71+ it ( 'soft-stops on budget exhaustion and still writes a partial result' , async ( ) => {
72+ const calls : string [ ] = [ ] ;
73+ const gateway = fakeUltracodeGateway ( calls , { maxAgentCalls : 4 , maxRounds : 2 } ) ;
74+ const code = await runUltracode ( '修复复杂失败并循环验收' , {
75+ cwd : dir ,
76+ runId : 'uc-budget' ,
77+ json : true ,
78+ quiet : true ,
79+ gateway,
80+ concurrency : '3' ,
81+ maxRetries : '0' ,
82+ } ) ;
83+
84+ expect ( code ) . toBe ( 1 ) ;
85+ const result = JSON . parse ( outBuf ) ;
86+ const runDir = join ( dir , '.fuc-run' , 'uc-budget' ) ;
87+ expect ( result . success ) . toBe ( false ) ;
88+ expect ( result . budget . exhausted ) . toBe ( true ) ;
89+ expect ( result . budget . spentAgentCalls ) . toBe ( 4 ) ;
90+ expect ( result . artifacts . verdict . pass ) . toBe ( false ) ;
91+ expect ( result . artifacts . verdict . gaps [ 0 ] . reason ) . toContain ( 'ULTRACODE_BUDGET_EXHAUSTED' ) ;
92+ expect ( result . artifacts . report ) . toContain ( '预算已耗尽' ) ;
93+ expect ( result . failedNodeId ) . toBeTruthy ( ) ;
94+ expect ( result . outputs . n_gate ) . toBeUndefined ( ) ;
95+ expect ( existsSync ( join ( runDir , 'result.json' ) ) ) . toBe ( true ) ;
96+ expect ( readFileSync ( join ( runDir , 'events.jsonl' ) , 'utf8' ) ) . toContain ( 'budget_exhausted' ) ;
97+ } ) ;
98+
99+ it ( 'resumes from persisted ultracode outputs without replanning completed work' , async ( ) => {
100+ const firstCalls : string [ ] = [ ] ;
101+ const firstGateway = fakeUltracodeGateway ( firstCalls , { maxAgentCalls : 6 , maxRounds : 2 } ) ;
102+ const firstCode = await runUltracode ( '修复复杂失败并循环验收' , {
103+ cwd : dir ,
104+ runId : 'uc-resume' ,
105+ json : true ,
106+ quiet : true ,
107+ gateway : firstGateway ,
108+ concurrency : '3' ,
109+ maxRetries : '0' ,
110+ } ) ;
111+ expect ( firstCode ) . toBe ( 1 ) ;
112+ outBuf = '' ;
113+
114+ const resumeCalls : string [ ] = [ ] ;
115+ const resumeGateway = fakeUltracodeGateway ( resumeCalls , { maxAgentCalls : 20 , maxRounds : 2 } ) ;
116+ const resumeCode = await runUltracode ( '修复复杂失败并循环验收' , {
117+ cwd : dir ,
118+ runId : 'uc-resume' ,
119+ resume : true ,
120+ json : true ,
121+ quiet : true ,
122+ gateway : resumeGateway ,
123+ concurrency : '3' ,
124+ maxRetries : '0' ,
125+ maxAgentCalls : '20' ,
126+ } ) ;
127+
128+ const result = JSON . parse ( outBuf ) ;
129+ expect ( resumeCode ) . toBe ( 0 ) ;
130+ expect ( result . success ) . toBe ( true ) ;
131+ expect ( result . outputs . n_ledger ) . toBeTruthy ( ) ;
132+ expect ( resumeCalls . some ( ( prompt ) => prompt . includes ( 'DYNAMIC_HARNESS' ) ) ) . toBe ( false ) ;
133+ expect ( resumeCalls . some ( ( prompt ) => prompt . includes ( 'DYNAMIC_TASK_LEDGER' ) ) ) . toBe ( false ) ;
134+ } ) ;
135+
136+ it ( 'persists stream events only when trace is enabled' , async ( ) => {
137+ const calls : string [ ] = [ ] ;
138+ const gateway = fakeUltracodeGateway ( calls ) ;
139+ const code = await runUltracode ( '审查博客里的技术论断' , {
140+ cwd : dir ,
141+ runId : 'uc-trace' ,
142+ json : true ,
143+ quiet : true ,
144+ trace : true ,
145+ gateway,
146+ concurrency : '3' ,
147+ maxRetries : '0' ,
148+ } ) ;
149+
150+ expect ( code ) . toBe ( 0 ) ;
151+ const events = readFileSync ( join ( dir , '.fuc-run' , 'uc-trace' , 'events.jsonl' ) , 'utf8' ) ;
152+ expect ( events ) . toContain ( 'stream_append' ) ;
153+ } ) ;
154+
155+ it ( 'skips repair rounds once an earlier acceptance gate passes' , async ( ) => {
156+ const calls : string [ ] = [ ] ;
157+ const gateway = fakeUltracodeGateway ( calls , { maxAgentCalls : 20 , maxRounds : 2 } ) ;
158+ const code = await runUltracode ( '修复复杂失败并循环验收' , {
159+ cwd : dir ,
160+ runId : 'uc-skip-repair' ,
161+ json : true ,
162+ quiet : true ,
163+ gateway,
164+ concurrency : '3' ,
165+ maxRetries : '0' ,
166+ } ) ;
167+
168+ expect ( code ) . toBe ( 0 ) ;
169+ const repairCalls = calls . filter ( ( prompt ) => prompt . includes ( '返工轮次' ) || prompt . includes ( '第 2 轮返工' ) ) ;
170+ expect ( repairCalls ) . toHaveLength ( 0 ) ;
171+ const workflow = JSON . parse ( readFileSync ( join ( dir , '.fuc-run' , 'uc-skip-repair' , 'workflow.fuc.json' ) , 'utf8' ) ) ;
172+ expect ( workflow . nodes . some ( ( node : { id : string } ) => node . id . includes ( 'n_dyn_r2' ) ) ) . toBe ( true ) ;
173+ } ) ;
174+
175+ it ( 'records a planner_fallback event when planning yields no parseable spec' , async ( ) => {
176+ const calls : string [ ] = [ ] ;
177+ // A gateway whose planner output is NOT valid JSON ⇒ fallback spec.
178+ const gateway = fakeUltracodeGateway ( calls , { maxAgentCalls : 20 , maxRounds : 2 } ) ;
179+ const badPlanner : RunGateway = {
180+ ...gateway ,
181+ spawnCliAgent : async ( prompt , adapter , opts ) => {
182+ if ( prompt . includes ( 'DYNAMIC_HARNESS' ) ) {
183+ opts . onProgress ?.( 'chunk' ) ;
184+ return '抱歉,我无法生成规格,这只是一段普通说明文字。' ;
185+ }
186+ return gateway . spawnCliAgent ( prompt , adapter , opts ) ;
187+ } ,
188+ } ;
189+ const code = await runUltracode ( '审查博客里的技术论断' , {
190+ cwd : dir ,
191+ runId : 'uc-fallback' ,
192+ json : true ,
193+ quiet : true ,
194+ gateway : badPlanner ,
195+ concurrency : '3' ,
196+ maxRetries : '0' ,
197+ } ) ;
198+
199+ // Fallback may or may not pass acceptance (it runs an inferred spec); the
200+ // point of this test is that the degraded planning is surfaced, not silent.
201+ expect ( [ 0 , 1 ] ) . toContain ( code ) ;
202+ const events = readFileSync ( join ( dir , '.fuc-run' , 'uc-fallback' , 'events.jsonl' ) , 'utf8' ) ;
203+ expect ( events ) . toContain ( 'planner_fallback' ) ;
204+ } ) ;
205+
206+ it ( 'reuses a saved harness.json with --from-harness and skips planning' , async ( ) => {
207+ // 1) plannerOnly run to produce harness.json.
208+ const planCalls : string [ ] = [ ] ;
209+ const planGateway = fakeUltracodeGateway ( planCalls , { maxAgentCalls : 20 , maxRounds : 2 } ) ;
210+ await runUltracode ( '审查博客里的技术论断' , {
211+ cwd : dir ,
212+ runId : 'uc-plan' ,
213+ json : true ,
214+ quiet : true ,
215+ gateway : planGateway ,
216+ plannerOnly : true ,
217+ } ) ;
218+ const harnessPath = join ( dir , '.fuc-run' , 'uc-plan' , 'harness.json' ) ;
219+ expect ( existsSync ( harnessPath ) ) . toBe ( true ) ;
220+ outBuf = '' ;
221+
222+ // 2) reuse it — the planner must not be invoked.
223+ const reuseCalls : string [ ] = [ ] ;
224+ const reuseGateway = fakeUltracodeGateway ( reuseCalls , { maxAgentCalls : 20 , maxRounds : 2 } ) ;
225+ const code = await runUltracode ( '审查博客里的技术论断' , {
226+ cwd : dir ,
227+ runId : 'uc-reuse' ,
228+ json : true ,
229+ quiet : true ,
230+ gateway : reuseGateway ,
231+ fromHarness : harnessPath ,
232+ concurrency : '3' ,
233+ maxRetries : '0' ,
234+ } ) ;
235+
236+ expect ( code ) . toBe ( 0 ) ;
237+ expect ( reuseCalls . some ( ( p ) => p . includes ( 'DYNAMIC_HARNESS' ) ) ) . toBe ( false ) ;
238+ const reuseEvents = readFileSync ( join ( dir , '.fuc-run' , 'uc-reuse' , 'events.jsonl' ) , 'utf8' ) ;
239+ expect ( reuseEvents ) . toContain ( 'planner_skipped' ) ;
240+ } ) ;
241+
242+ it ( 'runs a graceful closing pass from the reserve when work budget exhausts' , async ( ) => {
243+ const calls : string [ ] = [ ] ;
244+ // maxAgentCalls 5 ⇒ closing reserve 2, work ceiling 3: work exhausts mid-run
245+ // and the gate/report run from the reserved pool instead of hard-aborting.
246+ const gateway = fakeUltracodeGateway ( calls , { maxAgentCalls : 5 , maxRounds : 1 } ) ;
247+ const code = await runUltracode ( '审查博客里的技术论断' , {
248+ cwd : dir ,
249+ runId : 'uc-closing' ,
250+ json : true ,
251+ quiet : true ,
252+ gateway,
253+ concurrency : '1' ,
254+ maxRetries : '0' ,
255+ } ) ;
256+
257+ expect ( [ 0 , 1 ] ) . toContain ( code ) ;
258+ const events = readFileSync ( join ( dir , '.fuc-run' , 'uc-closing' , 'events.jsonl' ) , 'utf8' ) ;
259+ expect ( events ) . toContain ( 'work_budget_exhausted' ) ;
260+ expect ( events ) . toContain ( 'closing_pass' ) ;
261+ // The budget was NOT fully exhausted (reserve absorbed the closing pass).
262+ const result = JSON . parse ( outBuf ) ;
263+ expect ( result . budget . maxAgentCalls ) . toBe ( 5 ) ;
264+ } ) ;
265+
266+ it ( 'runs verify-command and fails the run when the command exits nonzero' , async ( ) => {
267+ const calls : string [ ] = [ ] ;
268+ const gateway = fakeUltracodeGateway ( calls ) ;
269+ const code = await runUltracode ( '审查博客里的技术论断' , {
270+ cwd : dir ,
271+ runId : 'uc-verify-fail' ,
272+ json : true ,
273+ quiet : true ,
274+ gateway,
275+ concurrency : '3' ,
276+ maxRetries : '0' ,
277+ verifyCommand : 'node -e "process.exit(7)"' ,
278+ } ) ;
279+
280+ const result = JSON . parse ( outBuf ) ;
281+ const runDir = join ( dir , '.fuc-run' , 'uc-verify-fail' ) ;
282+ expect ( code ) . toBe ( 1 ) ;
283+ expect ( result . success ) . toBe ( false ) ;
284+ expect ( result . verification . command ) . toBe ( 'node -e "process.exit(7)"' ) ;
285+ expect ( result . verification . exitCode ) . toBe ( 7 ) ;
286+ expect ( result . artifacts . verdict . pass ) . toBe ( false ) ;
287+ expect ( result . artifacts . verdict . gaps . some ( ( gap : { taskId : string } ) => gap . taskId === 'verification' ) ) . toBe ( true ) ;
288+ expect ( existsSync ( join ( runDir , 'verification.json' ) ) ) . toBe ( true ) ;
289+ expect ( readFileSync ( join ( runDir , 'events.jsonl' ) , 'utf8' ) ) . toContain ( 'verification_complete' ) ;
290+ } ) ;
63291} ) ;
64292
65- function fakeUltracodeGateway ( calls : string [ ] ) : RunGateway {
293+ function fakeUltracodeGateway (
294+ calls : string [ ] ,
295+ plannerBudget : { maxAgentCalls : number ; maxRounds : number } = { maxAgentCalls : 20 , maxRounds : 2 } ,
296+ ) : RunGateway {
66297 const respond = async (
67298 prompt : string ,
68299 _adapter : string ,
@@ -75,8 +306,44 @@ function fakeUltracodeGateway(calls: string[]): RunGateway {
75306 objective : '审查博客里的技术论断' ,
76307 nonGoals : [ '不要改写博客风格' ] ,
77308 successCriteria : [ '每条技术论断都有证据' ] ,
78- budget : { maxAgentCalls : 20 , maxRounds : 2 } ,
79- strategies : [ 'fan-out-and-synthesize' , 'adversarial-verification' ] ,
309+ budget : plannerBudget ,
310+ strategies :
311+ plannerBudget . maxRounds > 1
312+ ? [ 'loop-until-done' , 'adversarial-verification' ]
313+ : [ 'fan-out-and-synthesize' , 'adversarial-verification' ] ,
314+ plan : [
315+ {
316+ id : 'claims' ,
317+ kind : 'agent' ,
318+ title : '识别论断' ,
319+ focus : '找出需要核验的技术论断' ,
320+ deliverable : '论断清单' ,
321+ acceptance : '至少列出一条论断' ,
322+ evidenceRequired : '原文片段' ,
323+ } ,
324+ {
325+ id : 'verify' ,
326+ kind : 'parallel' ,
327+ title : '并行核验' ,
328+ dependsOn : [ 'claims' ] ,
329+ branches : [
330+ {
331+ title : '代码核验' ,
332+ focus : '对照代码库核验论断' ,
333+ deliverable : '核验结果' ,
334+ acceptance : '每条结论都有文件路径或原因' ,
335+ evidenceRequired : '文件路径' ,
336+ } ,
337+ {
338+ title : '反面检查' ,
339+ focus : '寻找证据不足和过度声称' ,
340+ deliverable : '风险清单' ,
341+ acceptance : '风险有原因' ,
342+ evidenceRequired : '复核记录' ,
343+ } ,
344+ ] ,
345+ } ,
346+ ] ,
80347 workerGroups : [
81348 {
82349 id : 't1' ,
0 commit comments