@@ -23,16 +23,18 @@ public class BatchedExecutorMtmd
2323
2424 public static async Task Run ( )
2525 {
26+ // Load the base LLM and its clip/mtmd sidecar weights so the executor has everything it needs.
2627 var parameters = new ModelParams ( UserSettings . GetModelPath ( ) ) ;
2728 using var model = await LLamaWeights . LoadFromFileAsync ( parameters ) ;
28- var mtmdParams = MtmdContextParams . Default ( ) ;
29+ var mtmdParams = MtmdContextParams . Default ( ) ; // reuse llama.cpp defaults for helper settings
2930 mtmdParams . UseGpu = false ;
3031 var marker = mtmdParams . MediaMarker ?? NativeApi . MtmdDefaultMarker ( ) ?? "<media>" ;
3132
32- using var mtmd = await SafeMtmdWeights . LoadFromFileAsync ( UserSettings . GetMMProjPath ( ) , model , mtmdParams ) ;
33+ using var mtmd = await SafeMtmdWeights . LoadFromFileAsync ( UserSettings . GetMMProjPath ( ) , model , mtmdParams ) ; // multimodal helper weights
3334
34- using var executor = new BatchedExecutor ( model , parameters , mtmd ) ;
35+ using var executor = new BatchedExecutor ( model , parameters , mtmd ) ; // drives batched token + chunk evaluation
3536
37+ // Prepend the media marker so the helper knows where to inject the encoded image tokens.
3638 var defaultPrompt = "\n USER: Provide a full description of the image.\n ASSISTANT: " ;
3739 var promptSuffix = AnsiConsole . Ask ( "Prompt (or ENTER for default):" , defaultPrompt ) ;
3840 var promptText = string . Concat ( marker , promptSuffix ) ;
@@ -42,32 +44,38 @@ public static async Task Run()
4244
4345 var vocab = executor . Context . NativeHandle . ModelHandle . Vocab ;
4446
47+ // Simple low-temperature sampler keeps the demo deterministic-ish.
4548 var sampler = new DefaultSamplingPipeline
4649 {
4750 Temperature = 0.1f
4851 } ;
4952
53+ // Stream decoded text to the console as soon as tokens arrive.
5054 var decoder = new StreamingTokenDecoder ( executor . Context )
5155 {
5256 DecodeSpecialTokens = false
5357 } ;
5458
5559 try
5660 {
61+ // Each conversation tracks its own KV cache sequence IDs.
5762 var conversation = executor . Create ( ) ;
58- conversation . QueueMedia ( imagePath ) ;
59- conversation . Prompt ( promptText , addBos : true , special : true ) ;
63+ // enqueue the image so MtmdHelper sees it
64+ conversation . QueueMedia ( imagePath ) ;
65+ // schedule multimodal prompt
66+ conversation . Prompt ( promptText , addBos : true , special : true ) ;
6067
6168 Console . ForegroundColor = ConsoleColor . Yellow ;
6269 Console . WriteLine ( "Prompt queued with multimodal chunks. Generating response...\n " ) ;
6370 Console . ResetColor ( ) ;
6471
6572 var remaining = TokenCount ;
6673
74+ // Run one decode/sampling/prompt cycle – mirrors the batched executor inner loop.
6775 async Task < bool > ProcessNextAsync ( )
6876 {
6977 var decodeResult = await executor . Infer ( ) ;
70- if ( decodeResult == DecodeResult . NoKvSlot )
78+ if ( decodeResult == DecodeResult . NoKvSlot ) // KV cache exhausted – surface to the user
7179 {
7280 Console . ForegroundColor = ConsoleColor . Red ;
7381 Console . WriteLine ( "Insufficient KV cache space for multimodal evaluation." ) ;
@@ -78,10 +86,10 @@ async Task<bool> ProcessNextAsync()
7886 if ( decodeResult != DecodeResult . Ok )
7987 throw new RuntimeError ( $ "Failed to evaluate batch: { decodeResult } .") ;
8088
81- if ( ! conversation . RequiresSampling )
89+ if ( ! conversation . RequiresSampling ) // another conversation may still be queued
8290 return true ;
8391
84- var token = conversation . Sample ( sampler ) ;
92+ var token = conversation . Sample ( sampler ) ; // pull logits (or -1 for mtmd chunk) and sample
8593 if ( token . IsEndOfGeneration ( vocab ) )
8694 return false ;
8795
@@ -90,13 +98,13 @@ async Task<bool> ProcessNextAsync()
9098 if ( ! string . IsNullOrEmpty ( delta ) )
9199 Console . Write ( delta ) ;
92100
93- sampler . Accept ( token ) ;
94- conversation . Prompt ( token ) ;
101+ sampler . Accept ( token ) ; // keep sampler state in sync
102+ conversation . Prompt ( token ) ; // feed the accepted token back into the batch
95103 remaining -- ;
96104 return remaining > 0 ;
97105 }
98106
99- while ( remaining > 0 && await ProcessNextAsync ( ) )
107+ while ( remaining > 0 && await ProcessNextAsync ( ) ) // continue until EOS or budget is reached
100108 {
101109 }
102110
0 commit comments