Review Example

SignalRT · SignalRT · commit fcce175340f7 · 2025-09-28T15:29:25.000+02:00
diff --git a/LLama.Examples/Examples/BatchedExecutorMtmd.cs b/LLama.Examples/Examples/BatchedExecutorMtmd.cs
@@ -23,16 +23,18 @@ public class BatchedExecutorMtmd
 
     public static async Task Run()
     {
+        // Load the base LLM and its clip/mtmd sidecar weights so the executor has everything it needs.
         var parameters = new ModelParams(UserSettings.GetModelPath());
         using var model = await LLamaWeights.LoadFromFileAsync(parameters);
-        var mtmdParams = MtmdContextParams.Default();
+        var mtmdParams = MtmdContextParams.Default(); // reuse llama.cpp defaults for helper settings
         mtmdParams.UseGpu = false;
         var marker = mtmdParams.MediaMarker ?? NativeApi.MtmdDefaultMarker() ?? "<media>";
 
-        using var mtmd = await SafeMtmdWeights.LoadFromFileAsync(UserSettings.GetMMProjPath(), model, mtmdParams);
+        using var mtmd = await SafeMtmdWeights.LoadFromFileAsync(UserSettings.GetMMProjPath(), model, mtmdParams); // multimodal helper weights
 
-        using var executor = new BatchedExecutor(model, parameters, mtmd);
+        using var executor = new BatchedExecutor(model, parameters, mtmd); // drives batched token + chunk evaluation
 
+        // Prepend the media marker so the helper knows where to inject the encoded image tokens.
         var defaultPrompt = "\nUSER: Provide a full description of the image.\nASSISTANT: ";
         var promptSuffix = AnsiConsole.Ask("Prompt (or ENTER for default):", defaultPrompt);
         var promptText = string.Concat(marker, promptSuffix);
@@ -42,32 +44,38 @@ public static async Task Run()
 
         var vocab = executor.Context.NativeHandle.ModelHandle.Vocab;
 
+        // Simple low-temperature sampler keeps the demo deterministic-ish.
         var sampler = new DefaultSamplingPipeline
         {
             Temperature = 0.1f
         };
 
+        // Stream decoded text to the console as soon as tokens arrive.
         var decoder = new StreamingTokenDecoder(executor.Context)
         {
             DecodeSpecialTokens = false
         };
 
         try
         {
+            // Each conversation tracks its own KV cache sequence IDs.
             var conversation = executor.Create();
-            conversation.QueueMedia(imagePath);
-            conversation.Prompt(promptText, addBos: true, special: true);
+            // enqueue the image so MtmdHelper sees it
+            conversation.QueueMedia(imagePath); 
+            // schedule multimodal prompt
+            conversation.Prompt(promptText, addBos: true, special: true); 
 
             Console.ForegroundColor = ConsoleColor.Yellow;
             Console.WriteLine("Prompt queued with multimodal chunks. Generating response...\n");
             Console.ResetColor();
 
             var remaining = TokenCount;
 
+            // Run one decode/sampling/prompt cycle – mirrors the batched executor inner loop.
             async Task<bool> ProcessNextAsync()
             {
                 var decodeResult = await executor.Infer();
-                if (decodeResult == DecodeResult.NoKvSlot)
+                if (decodeResult == DecodeResult.NoKvSlot) // KV cache exhausted – surface to the user
                 {
                     Console.ForegroundColor = ConsoleColor.Red;
                     Console.WriteLine("Insufficient KV cache space for multimodal evaluation.");
@@ -78,10 +86,10 @@ async Task<bool> ProcessNextAsync()
                 if (decodeResult != DecodeResult.Ok)
                     throw new RuntimeError($"Failed to evaluate batch: {decodeResult}.");
 
-                if (!conversation.RequiresSampling)
+                if (!conversation.RequiresSampling) // another conversation may still be queued
                     return true;
 
-                var token = conversation.Sample(sampler);
+                var token = conversation.Sample(sampler); // pull logits (or -1 for mtmd chunk) and sample
                 if (token.IsEndOfGeneration(vocab))
                     return false;
 
@@ -90,13 +98,13 @@ async Task<bool> ProcessNextAsync()
                 if (!string.IsNullOrEmpty(delta))
                     Console.Write(delta);
 
-                sampler.Accept(token);
-                conversation.Prompt(token);
+                sampler.Accept(token); // keep sampler state in sync
+                conversation.Prompt(token); // feed the accepted token back into the batch
                 remaining--;
                 return remaining > 0;
             }
 
-            while (remaining > 0 && await ProcessNextAsync())
+            while (remaining > 0 && await ProcessNextAsync()) // continue until EOS or budget is reached
             {
             }