Skip to content

Commit df143e7

Browse files
continue[bot]nate
andcommitted
fix(openai-adapters): Use stream.usage Promise exclusively for usage tokens
The Vercel AI SDK's fullStream finish event contains preliminary/incomplete usage data (often zeros). The authoritative usage is ONLY available via the stream.usage Promise which resolves after the stream completes. Changes: - convertVercelStream: Skip finish event entirely (return null) - OpenAI.ts: Always await stream.usage after consuming fullStream - Anthropic.ts: Same approach with cache token support - Tests: Updated to reflect that finish event doesn't emit usage This is the correct architecture per Vercel AI SDK design: - fullStream: Stream events (text, tools, etc) - finish has no reliable usage - stream.usage: Promise that resolves with complete usage after stream ends Co-authored-by: nate <[email protected]> Generated with [Continue](https://continue.dev)
1 parent 6e656f9 commit df143e7

File tree

4 files changed

+64
-119
lines changed

4 files changed

+64
-119
lines changed

packages/openai-adapters/src/apis/Anthropic.ts

Lines changed: 32 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -665,50 +665,44 @@ export class AnthropicApi implements BaseLlmApi {
665665
});
666666

667667
// Convert Vercel AI SDK stream to OpenAI format
668-
let hasEmittedUsage = false;
669668
for await (const chunk of convertVercelStream(stream.fullStream as any, {
670669
model: body.model,
671670
})) {
672-
if (chunk.usage) {
673-
hasEmittedUsage = true;
674-
}
675671
yield chunk;
676672
}
677673

678-
// Fallback: If fullStream didn't emit usage, get it from stream.usage Promise
679-
if (!hasEmittedUsage) {
680-
const finalUsage = await stream.usage;
681-
if (finalUsage) {
682-
const { usageChatChunk } = await import("../util.js");
683-
const promptTokens =
684-
typeof finalUsage.promptTokens === "number"
685-
? finalUsage.promptTokens
686-
: 0;
687-
const completionTokens =
688-
typeof finalUsage.completionTokens === "number"
689-
? finalUsage.completionTokens
690-
: 0;
691-
const totalTokens =
692-
typeof finalUsage.totalTokens === "number"
693-
? finalUsage.totalTokens
694-
: promptTokens + completionTokens;
695-
696-
yield usageChatChunk({
697-
model: body.model,
698-
usage: {
699-
prompt_tokens: promptTokens,
700-
completion_tokens: completionTokens,
701-
total_tokens: totalTokens,
702-
prompt_tokens_details: {
703-
cached_tokens:
704-
(finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
705-
cache_read_tokens:
706-
(finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
707-
cache_write_tokens: 0,
708-
} as any,
709-
},
710-
});
711-
}
674+
// Get final usage from stream.usage Promise (finish event has incomplete data)
675+
const finalUsage = await stream.usage;
676+
if (finalUsage) {
677+
const { usageChatChunk } = await import("../util.js");
678+
const promptTokens =
679+
typeof finalUsage.promptTokens === "number"
680+
? finalUsage.promptTokens
681+
: 0;
682+
const completionTokens =
683+
typeof finalUsage.completionTokens === "number"
684+
? finalUsage.completionTokens
685+
: 0;
686+
const totalTokens =
687+
typeof finalUsage.totalTokens === "number"
688+
? finalUsage.totalTokens
689+
: promptTokens + completionTokens;
690+
691+
yield usageChatChunk({
692+
model: body.model,
693+
usage: {
694+
prompt_tokens: promptTokens,
695+
completion_tokens: completionTokens,
696+
total_tokens: totalTokens,
697+
prompt_tokens_details: {
698+
cached_tokens:
699+
(finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
700+
cache_read_tokens:
701+
(finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
702+
cache_write_tokens: 0,
703+
} as any,
704+
},
705+
});
712706
}
713707
}
714708

packages/openai-adapters/src/apis/OpenAI.ts

Lines changed: 24 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -334,42 +334,36 @@ export class OpenAIApi implements BaseLlmApi {
334334
});
335335

336336
// Convert Vercel AI SDK stream to OpenAI format
337-
let hasEmittedUsage = false;
338337
for await (const chunk of convertVercelStream(stream.fullStream as any, {
339338
model: modifiedBody.model,
340339
})) {
341-
if (chunk.usage) {
342-
hasEmittedUsage = true;
343-
}
344340
yield chunk;
345341
}
346342

347-
// Fallback: If fullStream didn't emit usage, get it from stream.usage Promise
348-
if (!hasEmittedUsage) {
349-
const finalUsage = await stream.usage;
350-
if (finalUsage) {
351-
const promptTokens =
352-
typeof finalUsage.promptTokens === "number"
353-
? finalUsage.promptTokens
354-
: 0;
355-
const completionTokens =
356-
typeof finalUsage.completionTokens === "number"
357-
? finalUsage.completionTokens
358-
: 0;
359-
const totalTokens =
360-
typeof finalUsage.totalTokens === "number"
361-
? finalUsage.totalTokens
362-
: promptTokens + completionTokens;
363-
364-
yield usageChatChunk({
365-
model: modifiedBody.model,
366-
usage: {
367-
prompt_tokens: promptTokens,
368-
completion_tokens: completionTokens,
369-
total_tokens: totalTokens,
370-
},
371-
});
372-
}
343+
// Get final usage from stream.usage Promise (finish event has incomplete data)
344+
const finalUsage = await stream.usage;
345+
if (finalUsage) {
346+
const promptTokens =
347+
typeof finalUsage.promptTokens === "number"
348+
? finalUsage.promptTokens
349+
: 0;
350+
const completionTokens =
351+
typeof finalUsage.completionTokens === "number"
352+
? finalUsage.completionTokens
353+
: 0;
354+
const totalTokens =
355+
typeof finalUsage.totalTokens === "number"
356+
? finalUsage.totalTokens
357+
: promptTokens + completionTokens;
358+
359+
yield usageChatChunk({
360+
model: modifiedBody.model,
361+
usage: {
362+
prompt_tokens: promptTokens,
363+
completion_tokens: completionTokens,
364+
total_tokens: totalTokens,
365+
},
366+
});
373367
}
374368
}
375369
async completionNonStream(

packages/openai-adapters/src/test/vercelStreamConverter.test.ts

Lines changed: 6 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ describe("convertVercelStreamPart", () => {
7878
});
7979
});
8080

81-
test("converts finish to usage chunk", () => {
81+
test("finish event returns null (usage comes from stream.usage Promise)", () => {
8282
const part: VercelStreamPart = {
8383
type: "finish",
8484
finishReason: "stop",
@@ -91,12 +91,8 @@ describe("convertVercelStreamPart", () => {
9191

9292
const result = convertVercelStreamPart(part, options);
9393

94-
expect(result).not.toBeNull();
95-
expect(result?.usage).toEqual({
96-
prompt_tokens: 100,
97-
completion_tokens: 50,
98-
total_tokens: 150,
99-
});
94+
// Finish event should not emit usage - caller will use stream.usage Promise
95+
expect(result).toBeNull();
10096
});
10197

10298
test("throws error for error event", () => {
@@ -250,16 +246,15 @@ describe("convertVercelStream", () => {
250246
chunks.push(chunk);
251247
}
252248

253-
// Should only get chunks for: text-delta (2), tool-call (1), finish (1) = 4 chunks
254-
// step-start and step-finish are filtered out
255-
expect(chunks).toHaveLength(4);
249+
// Should only get chunks for: text-delta (2), tool-call (1) = 3 chunks
250+
// step-start, step-finish, and finish are filtered out (finish usage comes from stream.usage Promise)
251+
expect(chunks).toHaveLength(3);
256252

257253
expect(chunks[0].choices[0].delta.content).toBe("Hello ");
258254
expect(chunks[1].choices[0].delta.content).toBe("world");
259255
expect(chunks[2].choices[0].delta.tool_calls?.[0].function?.name).toBe(
260256
"test",
261257
);
262-
expect(chunks[3].usage).toBeDefined();
263258
});
264259

265260
test("throws error when stream contains error event", async () => {

packages/openai-adapters/src/vercelStreamConverter.ts

Lines changed: 2 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -121,46 +121,8 @@ export function convertVercelStreamPart(
121121
});
122122

123123
case "finish":
124-
// Emit usage chunk at the end if usage data is present
125-
if (part.usage) {
126-
const promptTokens =
127-
typeof part.usage.promptTokens === "number"
128-
? part.usage.promptTokens
129-
: 0;
130-
const completionTokens =
131-
typeof part.usage.completionTokens === "number"
132-
? part.usage.completionTokens
133-
: 0;
134-
const totalTokens =
135-
typeof part.usage.totalTokens === "number"
136-
? part.usage.totalTokens
137-
: promptTokens + completionTokens;
138-
139-
// Check for Anthropic-specific cache token details
140-
const promptTokensDetails =
141-
(part.usage as any).promptTokensDetails?.cachedTokens !== undefined
142-
? {
143-
cached_tokens:
144-
(part.usage as any).promptTokensDetails.cachedTokens ?? 0,
145-
cache_read_tokens:
146-
(part.usage as any).promptTokensDetails.cachedTokens ?? 0,
147-
cache_write_tokens: 0,
148-
}
149-
: undefined;
150-
151-
return usageChatChunk({
152-
model,
153-
usage: {
154-
prompt_tokens: promptTokens,
155-
completion_tokens: completionTokens,
156-
total_tokens: totalTokens,
157-
...(promptTokensDetails
158-
? { prompt_tokens_details: promptTokensDetails as any }
159-
: {}),
160-
},
161-
});
162-
}
163-
// If no usage data in finish event, return null
124+
// Don't emit usage from finish event - it may have incomplete/preliminary data
125+
// Caller will use stream.usage Promise which has the final accurate usage
164126
return null;
165127

166128
case "error":

0 commit comments

Comments
 (0)