Skip to content

Commit fcce175

Browse files
committed
Review Example
1 parent bd33d18 commit fcce175

File tree

1 file changed

+19
-11
lines changed

1 file changed

+19
-11
lines changed

LLama.Examples/Examples/BatchedExecutorMtmd.cs

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,16 +23,18 @@ public class BatchedExecutorMtmd
2323

2424
public static async Task Run()
2525
{
26+
// Load the base LLM and its clip/mtmd sidecar weights so the executor has everything it needs.
2627
var parameters = new ModelParams(UserSettings.GetModelPath());
2728
using var model = await LLamaWeights.LoadFromFileAsync(parameters);
28-
var mtmdParams = MtmdContextParams.Default();
29+
var mtmdParams = MtmdContextParams.Default(); // reuse llama.cpp defaults for helper settings
2930
mtmdParams.UseGpu = false;
3031
var marker = mtmdParams.MediaMarker ?? NativeApi.MtmdDefaultMarker() ?? "<media>";
3132

32-
using var mtmd = await SafeMtmdWeights.LoadFromFileAsync(UserSettings.GetMMProjPath(), model, mtmdParams);
33+
using var mtmd = await SafeMtmdWeights.LoadFromFileAsync(UserSettings.GetMMProjPath(), model, mtmdParams); // multimodal helper weights
3334

34-
using var executor = new BatchedExecutor(model, parameters, mtmd);
35+
using var executor = new BatchedExecutor(model, parameters, mtmd); // drives batched token + chunk evaluation
3536

37+
// Prepend the media marker so the helper knows where to inject the encoded image tokens.
3638
var defaultPrompt = "\nUSER: Provide a full description of the image.\nASSISTANT: ";
3739
var promptSuffix = AnsiConsole.Ask("Prompt (or ENTER for default):", defaultPrompt);
3840
var promptText = string.Concat(marker, promptSuffix);
@@ -42,32 +44,38 @@ public static async Task Run()
4244

4345
var vocab = executor.Context.NativeHandle.ModelHandle.Vocab;
4446

47+
// Simple low-temperature sampler keeps the demo deterministic-ish.
4548
var sampler = new DefaultSamplingPipeline
4649
{
4750
Temperature = 0.1f
4851
};
4952

53+
// Stream decoded text to the console as soon as tokens arrive.
5054
var decoder = new StreamingTokenDecoder(executor.Context)
5155
{
5256
DecodeSpecialTokens = false
5357
};
5458

5559
try
5660
{
61+
// Each conversation tracks its own KV cache sequence IDs.
5762
var conversation = executor.Create();
58-
conversation.QueueMedia(imagePath);
59-
conversation.Prompt(promptText, addBos: true, special: true);
63+
// enqueue the image so MtmdHelper sees it
64+
conversation.QueueMedia(imagePath);
65+
// schedule multimodal prompt
66+
conversation.Prompt(promptText, addBos: true, special: true);
6067

6168
Console.ForegroundColor = ConsoleColor.Yellow;
6269
Console.WriteLine("Prompt queued with multimodal chunks. Generating response...\n");
6370
Console.ResetColor();
6471

6572
var remaining = TokenCount;
6673

74+
// Run one decode/sampling/prompt cycle – mirrors the batched executor inner loop.
6775
async Task<bool> ProcessNextAsync()
6876
{
6977
var decodeResult = await executor.Infer();
70-
if (decodeResult == DecodeResult.NoKvSlot)
78+
if (decodeResult == DecodeResult.NoKvSlot) // KV cache exhausted – surface to the user
7179
{
7280
Console.ForegroundColor = ConsoleColor.Red;
7381
Console.WriteLine("Insufficient KV cache space for multimodal evaluation.");
@@ -78,10 +86,10 @@ async Task<bool> ProcessNextAsync()
7886
if (decodeResult != DecodeResult.Ok)
7987
throw new RuntimeError($"Failed to evaluate batch: {decodeResult}.");
8088

81-
if (!conversation.RequiresSampling)
89+
if (!conversation.RequiresSampling) // another conversation may still be queued
8290
return true;
8391

84-
var token = conversation.Sample(sampler);
92+
var token = conversation.Sample(sampler); // pull logits (or -1 for mtmd chunk) and sample
8593
if (token.IsEndOfGeneration(vocab))
8694
return false;
8795

@@ -90,13 +98,13 @@ async Task<bool> ProcessNextAsync()
9098
if (!string.IsNullOrEmpty(delta))
9199
Console.Write(delta);
92100

93-
sampler.Accept(token);
94-
conversation.Prompt(token);
101+
sampler.Accept(token); // keep sampler state in sync
102+
conversation.Prompt(token); // feed the accepted token back into the batch
95103
remaining--;
96104
return remaining > 0;
97105
}
98106

99-
while (remaining > 0 && await ProcessNextAsync())
107+
while (remaining > 0 && await ProcessNextAsync()) // continue until EOS or budget is reached
100108
{
101109
}
102110

0 commit comments

Comments
 (0)