Skip to content

Commit 576ff71

Browse files
authored
feat: Add usage metrics handling to streaming chat completions and update tests (#10)
* feat: Add usage metrics handling to streaming chat completions and update tests * docs: Add usage metrics handling to README.md
1 parent d3a7a96 commit 576ff71

File tree

4 files changed

+76
-1
lines changed

4 files changed

+76
-1
lines changed

.devcontainer/devcontainer.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,8 @@
1616
"eamodio.gitlens",
1717
"VisualStudioExptTeam.vscodeintellicode",
1818
"christian-kohler.path-intellisense",
19-
"christian-kohler.npm-intellisense"
19+
"christian-kohler.npm-intellisense",
20+
"orta.vscode-jest"
2021
],
2122
"settings": {
2223
"terminal.integrated.defaultProfile.linux": "zsh",

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ try {
127127
onOpen: () => console.log('Stream opened'),
128128
onContent: (content) => process.stdout.write(content),
129129
onChunk: (chunk) => console.log('Received chunk:', chunk.id),
130+
onUsageMetrics: (metrics) => console.log('Usage metrics:', metrics),
130131
onFinish: () => console.log('\nStream completed'),
131132
onError: (error) => console.error('Stream error:', error),
132133
},

src/client.ts

+6
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import type {
22
Provider,
33
SchemaChatCompletionMessageToolCall,
4+
SchemaCompletionUsage,
45
SchemaCreateChatCompletionRequest,
56
SchemaCreateChatCompletionResponse,
67
SchemaCreateChatCompletionStreamResponse,
@@ -15,6 +16,7 @@ interface ChatCompletionStreamCallbacks {
1516
onReasoning?: (reasoningContent: string) => void;
1617
onContent?: (content: string) => void;
1718
onTool?: (toolCall: SchemaChatCompletionMessageToolCall) => void;
19+
onUsageMetrics?: (usage: SchemaCompletionUsage) => void;
1820
onFinish?: (
1921
response: SchemaCreateChatCompletionStreamResponse | null
2022
) => void;
@@ -258,6 +260,10 @@ export class InferenceGatewayClient {
258260
JSON.parse(data);
259261
callbacks.onChunk?.(chunk);
260262

263+
if (chunk.usage && callbacks.onUsageMetrics) {
264+
callbacks.onUsageMetrics(chunk.usage);
265+
}
266+
261267
const reasoning_content =
262268
chunk.choices[0]?.delta?.reasoning_content;
263269
if (reasoning_content !== undefined) {

tests/client.test.ts

+67
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,73 @@ describe('InferenceGatewayClient', () => {
415415

416416
expect(callbacks.onError).toHaveBeenCalledTimes(1);
417417
});
418+
419+
it('should handle streaming chat completions with usage metrics', async () => {
420+
const mockRequest = {
421+
model: 'gpt-4o',
422+
messages: [{ role: MessageRole.user, content: 'Hello' }],
423+
stream: true,
424+
stream_options: {
425+
include_usage: true,
426+
},
427+
};
428+
429+
const mockStream = new TransformStream();
430+
const writer = mockStream.writable.getWriter();
431+
const encoder = new TextEncoder();
432+
433+
mockFetch.mockResolvedValueOnce({
434+
ok: true,
435+
body: mockStream.readable,
436+
});
437+
438+
const callbacks = {
439+
onOpen: jest.fn(),
440+
onChunk: jest.fn(),
441+
onContent: jest.fn(),
442+
onUsageMetrics: jest.fn(),
443+
onFinish: jest.fn(),
444+
onError: jest.fn(),
445+
};
446+
447+
const streamPromise = client.streamChatCompletion(mockRequest, callbacks);
448+
449+
await writer.write(
450+
encoder.encode(
451+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
452+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
453+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
454+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n' +
455+
'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":8,"total_tokens":18}}\n\n' +
456+
'data: [DONE]\n\n'
457+
)
458+
);
459+
460+
await writer.close();
461+
await streamPromise;
462+
463+
expect(callbacks.onOpen).toHaveBeenCalledTimes(1);
464+
expect(callbacks.onChunk).toHaveBeenCalledTimes(5);
465+
expect(callbacks.onContent).toHaveBeenCalledWith('Hello');
466+
expect(callbacks.onContent).toHaveBeenCalledWith('!');
467+
expect(callbacks.onUsageMetrics).toHaveBeenCalledTimes(1);
468+
expect(callbacks.onUsageMetrics).toHaveBeenCalledWith({
469+
prompt_tokens: 10,
470+
completion_tokens: 8,
471+
total_tokens: 18,
472+
});
473+
expect(callbacks.onFinish).toHaveBeenCalledTimes(1);
474+
expect(mockFetch).toHaveBeenCalledWith(
475+
'http://localhost:8080/v1/chat/completions',
476+
expect.objectContaining({
477+
method: 'POST',
478+
body: JSON.stringify({
479+
...mockRequest,
480+
stream: true,
481+
}),
482+
})
483+
);
484+
});
418485
});
419486

420487
describe('proxy', () => {

0 commit comments

Comments
 (0)