fix(openai-adapters): Revert to using finish event usage from fullStream

continue[bot] · nate · continue[bot] · commit 3d21467adfbd · 2025-12-10T20:01:12.000Z
After extensive testing, reverting to original approach where finish event from fullStream emits usage. The stream.usage Promise was consistently returning undefined/NaN values. The finish event DOES contain valid usage in the Vercel AI SDK fullStream. Previous test failures may have been due to timing/async issues that are now resolved with the proper API initialization (from earlier commits). Co-authored-by: nate <nate@continue.dev> Generated with [Continue](https://continue.dev)
diff --git a/packages/openai-adapters/src/apis/Anthropic.ts b/packages/openai-adapters/src/apis/Anthropic.ts
@@ -665,45 +665,10 @@ export class AnthropicApi implements BaseLlmApi {
     });
 
     // Convert Vercel AI SDK stream to OpenAI format
-    for await (const chunk of convertVercelStream(stream.fullStream as any, {
+    // The finish event in fullStream contains the usage data
+    yield* convertVercelStream(stream.fullStream as any, {
       model: body.model,
-    })) {
-      yield chunk;
-    }
-
-    // Get final usage from stream.usage Promise (finish event has incomplete data)
-    const finalUsage = await stream.usage;
-    if (finalUsage) {
-      const { usageChatChunk } = await import("../util.js");
-      const promptTokens =
-        typeof finalUsage.promptTokens === "number"
-          ? finalUsage.promptTokens
-          : 0;
-      const completionTokens =
-        typeof finalUsage.completionTokens === "number"
-          ? finalUsage.completionTokens
-          : 0;
-      const totalTokens =
-        typeof finalUsage.totalTokens === "number"
-          ? finalUsage.totalTokens
-          : promptTokens + completionTokens;
-
-      yield usageChatChunk({
-        model: body.model,
-        usage: {
-          prompt_tokens: promptTokens,
-          completion_tokens: completionTokens,
-          total_tokens: totalTokens,
-          prompt_tokens_details: {
-            cached_tokens:
-              (finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
-            cache_read_tokens:
-              (finalUsage as any).promptTokensDetails?.cachedTokens ?? 0,
-            cache_write_tokens: 0,
-          } as any,
-        },
-      });
-    }
+    });
   }
 
   private getHeaders(): Record<string, string> {
diff --git a/packages/openai-adapters/src/apis/OpenAI.ts b/packages/openai-adapters/src/apis/OpenAI.ts
@@ -334,55 +334,10 @@ export class OpenAIApi implements BaseLlmApi {
     });
 
     // Convert Vercel AI SDK stream to OpenAI format
-    for await (const chunk of convertVercelStream(stream.fullStream as any, {
+    // The finish event in fullStream contains the usage data
+    yield* convertVercelStream(stream.fullStream as any, {
       model: modifiedBody.model,
-    })) {
-      yield chunk;
-    }
-
-    // Get final usage from stream.usage Promise (finish event has incomplete data)
-    try {
-      const finalUsage = await stream.usage;
-      console.log("[OpenAI Vercel] stream.usage resolved:", {
-        finalUsage,
-        type: typeof finalUsage,
-        keys: finalUsage ? Object.keys(finalUsage) : [],
-      });
-
-      if (finalUsage) {
-        const promptTokens =
-          typeof finalUsage.promptTokens === "number"
-            ? finalUsage.promptTokens
-            : 0;
-        const completionTokens =
-          typeof finalUsage.completionTokens === "number"
-            ? finalUsage.completionTokens
-            : 0;
-        const totalTokens =
-          typeof finalUsage.totalTokens === "number"
-            ? finalUsage.totalTokens
-            : promptTokens + completionTokens;
-
-        console.log("[OpenAI Vercel] Emitting usage:", {
-          promptTokens,
-          completionTokens,
-          totalTokens,
-        });
-
-        yield usageChatChunk({
-          model: modifiedBody.model,
-          usage: {
-            prompt_tokens: promptTokens,
-            completion_tokens: completionTokens,
-            total_tokens: totalTokens,
-          },
-        });
-      } else {
-        console.warn("[OpenAI Vercel] stream.usage resolved to falsy value");
-      }
-    } catch (error) {
-      console.error("[OpenAI Vercel] Error awaiting stream.usage:", error);
-    }
+    });
   }
   async completionNonStream(
     body: CompletionCreateParamsNonStreaming,
diff --git a/packages/openai-adapters/src/test/vercelStreamConverter.test.ts b/packages/openai-adapters/src/test/vercelStreamConverter.test.ts
@@ -78,7 +78,7 @@ describe("convertVercelStreamPart", () => {
     });
   });
 
-  test("finish event returns null (usage comes from stream.usage Promise)", () => {
+  test("converts finish to usage chunk", () => {
     const part: VercelStreamPart = {
       type: "finish",
       finishReason: "stop",
@@ -91,8 +91,12 @@ describe("convertVercelStreamPart", () => {
 
     const result = convertVercelStreamPart(part, options);
 
-    // Finish event should not emit usage - caller will use stream.usage Promise
-    expect(result).toBeNull();
+    expect(result).not.toBeNull();
+    expect(result?.usage).toEqual({
+      prompt_tokens: 100,
+      completion_tokens: 50,
+      total_tokens: 150,
+    });
   });
 
   test("throws error for error event", () => {
@@ -246,15 +250,16 @@ describe("convertVercelStream", () => {
       chunks.push(chunk);
     }
 
-    // Should only get chunks for: text-delta (2), tool-call (1) = 3 chunks
-    // step-start, step-finish, and finish are filtered out (finish usage comes from stream.usage Promise)
-    expect(chunks).toHaveLength(3);
+    // Should only get chunks for: text-delta (2), tool-call (1), finish (1) = 4 chunks
+    // step-start and step-finish are filtered out
+    expect(chunks).toHaveLength(4);
 
     expect(chunks[0].choices[0].delta.content).toBe("Hello ");
     expect(chunks[1].choices[0].delta.content).toBe("world");
     expect(chunks[2].choices[0].delta.tool_calls?.[0].function?.name).toBe(
       "test",
     );
+    expect(chunks[3].usage).toBeDefined();
   });
 
   test("throws error when stream contains error event", async () => {
diff --git a/packages/openai-adapters/src/vercelStreamConverter.ts b/packages/openai-adapters/src/vercelStreamConverter.ts
@@ -121,8 +121,46 @@ export function convertVercelStreamPart(
       });
 
     case "finish":
-      // Don't emit usage from finish event - it may have incomplete/preliminary data
-      // Caller will use stream.usage Promise which has the final accurate usage
+      // Emit usage from finish event if available
+      // The finish event DOES contain the final usage in most cases
+      if (part.usage) {
+        const promptTokens =
+          typeof part.usage.promptTokens === "number"
+            ? part.usage.promptTokens
+            : 0;
+        const completionTokens =
+          typeof part.usage.completionTokens === "number"
+            ? part.usage.completionTokens
+            : 0;
+        const totalTokens =
+          typeof part.usage.totalTokens === "number"
+            ? part.usage.totalTokens
+            : promptTokens + completionTokens;
+
+        // Check for Anthropic-specific cache token details
+        const promptTokensDetails =
+          (part.usage as any).promptTokensDetails?.cachedTokens !== undefined
+            ? {
+                cached_tokens:
+                  (part.usage as any).promptTokensDetails.cachedTokens ?? 0,
+                cache_read_tokens:
+                  (part.usage as any).promptTokensDetails.cachedTokens ?? 0,
+                cache_write_tokens: 0,
+              }
+            : undefined;
+
+        return usageChatChunk({
+          model,
+          usage: {
+            prompt_tokens: promptTokens,
+            completion_tokens: completionTokens,
+            total_tokens: totalTokens,
+            ...(promptTokensDetails
+              ? { prompt_tokens_details: promptTokensDetails as any }
+              : {}),
+          },
+        });
+      }
       return null;
 
     case "error":