Skip to content

Commit bbeec4b

Browse files
continue[bot]nate
andcommitted
fix(openai-adapters): Add fallback to stream.usage Promise for usage tokens
Vercel AI SDK's fullStream may emit a finish event with zero/invalid usage data in real API calls, even though tests show it working. This implements a hybrid approach: 1. convertVercelStream emits usage from finish event if valid (>0 tokens) 2. Track whether usage was emitted during stream consumption 3. If no usage emitted, fall back to awaiting stream.usage Promise This ensures tests pass (which have valid finish events) while also handling real API scenarios where finish events may have incomplete data. Changes: - vercelStreamConverter: Only emit usage if tokens > 0 - OpenAI.ts: Add hasEmittedUsage tracking + fallback - Anthropic.ts: Same approach with cache token support Co-authored-by: nate <[email protected]> Generated with [Continue](https://continue.dev)
1 parent a89187b commit bbeec4b

File tree

3 files changed

+85
-34
lines changed

3 files changed

+85
-34
lines changed

packages/openai-adapters/src/apis/Anthropic.ts

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -665,32 +665,37 @@ export class AnthropicApi implements BaseLlmApi {
665665
});
666666

667667
// Convert Vercel AI SDK stream to OpenAI format
668-
// Note: We need to consume fullStream first, then await stream.usage Promise
668+
let hasEmittedUsage = false;
669669
for await (const chunk of convertVercelStream(stream.fullStream as any, {
670670
model: body.model,
671671
})) {
672+
if (chunk.usage) {
673+
hasEmittedUsage = true;
674+
}
672675
yield chunk;
673676
}
674677

675-
// Await final usage from stream.usage Promise with Anthropic-specific cache details
676-
const finalUsage = await stream.usage;
677-
if (finalUsage) {
678-
const { usageChatChunk } = await import("../util.js");
679-
yield usageChatChunk({
680-
model: body.model,
681-
usage: {
682-
prompt_tokens: finalUsage.promptTokens,
683-
completion_tokens: finalUsage.completionTokens,
684-
total_tokens: finalUsage.totalTokens,
685-
prompt_tokens_details: {
686-
cached_tokens:
687-
(finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
688-
cache_read_tokens:
689-
(finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
690-
cache_write_tokens: 0,
691-
} as any,
692-
},
693-
});
678+
// Fallback: If fullStream didn't emit usage, get it from stream.usage Promise
679+
if (!hasEmittedUsage) {
680+
const finalUsage = await stream.usage;
681+
if (finalUsage) {
682+
const { usageChatChunk } = await import("../util.js");
683+
yield usageChatChunk({
684+
model: body.model,
685+
usage: {
686+
prompt_tokens: finalUsage.promptTokens,
687+
completion_tokens: finalUsage.completionTokens,
688+
total_tokens: finalUsage.totalTokens,
689+
prompt_tokens_details: {
690+
cached_tokens:
691+
(finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
692+
cache_read_tokens:
693+
(finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
694+
cache_write_tokens: 0,
695+
} as any,
696+
},
697+
});
698+
}
694699
}
695700
}
696701

packages/openai-adapters/src/apis/OpenAI.ts

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -334,24 +334,29 @@ export class OpenAIApi implements BaseLlmApi {
334334
});
335335

336336
// Convert Vercel AI SDK stream to OpenAI format
337-
// Note: We need to consume fullStream first, then await stream.usage Promise
337+
let hasEmittedUsage = false;
338338
for await (const chunk of convertVercelStream(stream.fullStream as any, {
339339
model: modifiedBody.model,
340340
})) {
341+
if (chunk.usage) {
342+
hasEmittedUsage = true;
343+
}
341344
yield chunk;
342345
}
343346

344-
// Await final usage from stream.usage Promise (resolves after fullStream completes)
345-
const finalUsage = await stream.usage;
346-
if (finalUsage) {
347-
yield usageChatChunk({
348-
model: modifiedBody.model,
349-
usage: {
350-
prompt_tokens: finalUsage.promptTokens,
351-
completion_tokens: finalUsage.completionTokens,
352-
total_tokens: finalUsage.totalTokens,
353-
},
354-
});
347+
// Fallback: If fullStream didn't emit usage, get it from stream.usage Promise
348+
if (!hasEmittedUsage) {
349+
const finalUsage = await stream.usage;
350+
if (finalUsage) {
351+
yield usageChatChunk({
352+
model: modifiedBody.model,
353+
usage: {
354+
prompt_tokens: finalUsage.promptTokens,
355+
completion_tokens: finalUsage.completionTokens,
356+
total_tokens: finalUsage.totalTokens,
357+
},
358+
});
359+
}
355360
}
356361
}
357362
async completionNonStream(

packages/openai-adapters/src/vercelStreamConverter.ts

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,49 @@ export function convertVercelStreamPart(
121121
});
122122

123123
case "finish":
124-
// Don't emit usage from finish event - we'll get it from stream.usage Promise
125-
// The finish event may have incomplete usage data, so we wait for the Promise
124+
// Emit usage chunk at the end if usage data is present and valid
125+
if (part.usage) {
126+
const promptTokens =
127+
typeof part.usage.promptTokens === "number"
128+
? part.usage.promptTokens
129+
: 0;
130+
const completionTokens =
131+
typeof part.usage.completionTokens === "number"
132+
? part.usage.completionTokens
133+
: 0;
134+
const totalTokens =
135+
typeof part.usage.totalTokens === "number"
136+
? part.usage.totalTokens
137+
: promptTokens + completionTokens;
138+
139+
// Only emit usage chunk if we have meaningful token counts
140+
if (promptTokens > 0 || completionTokens > 0) {
141+
// Check for Anthropic-specific cache token details
142+
const promptTokensDetails =
143+
(part.usage as any).promptTokensDetails?.cachedTokens !== undefined
144+
? {
145+
cached_tokens:
146+
(part.usage as any).promptTokensDetails.cachedTokens ?? 0,
147+
cache_read_tokens:
148+
(part.usage as any).promptTokensDetails.cachedTokens ?? 0,
149+
cache_write_tokens: 0,
150+
}
151+
: undefined;
152+
153+
return usageChatChunk({
154+
model,
155+
usage: {
156+
prompt_tokens: promptTokens,
157+
completion_tokens: completionTokens,
158+
total_tokens: totalTokens,
159+
...(promptTokensDetails
160+
? { prompt_tokens_details: promptTokensDetails as any }
161+
: {}),
162+
},
163+
});
164+
}
165+
}
166+
// If no valid usage data, don't emit a usage chunk
126167
return null;
127168

128169
case "error":

0 commit comments

Comments
 (0)