@@ -415,6 +415,73 @@ describe('InferenceGatewayClient', () => {
415
415
416
416
expect ( callbacks . onError ) . toHaveBeenCalledTimes ( 1 ) ;
417
417
} ) ;
418
+
419
+ it ( 'should handle streaming chat completions with usage metrics' , async ( ) => {
420
+ const mockRequest = {
421
+ model : 'gpt-4o' ,
422
+ messages : [ { role : MessageRole . user , content : 'Hello' } ] ,
423
+ stream : true ,
424
+ stream_options : {
425
+ include_usage : true ,
426
+ } ,
427
+ } ;
428
+
429
+ const mockStream = new TransformStream ( ) ;
430
+ const writer = mockStream . writable . getWriter ( ) ;
431
+ const encoder = new TextEncoder ( ) ;
432
+
433
+ mockFetch . mockResolvedValueOnce ( {
434
+ ok : true ,
435
+ body : mockStream . readable ,
436
+ } ) ;
437
+
438
+ const callbacks = {
439
+ onOpen : jest . fn ( ) ,
440
+ onChunk : jest . fn ( ) ,
441
+ onContent : jest . fn ( ) ,
442
+ onUsageMetrics : jest . fn ( ) ,
443
+ onFinish : jest . fn ( ) ,
444
+ onError : jest . fn ( ) ,
445
+ } ;
446
+
447
+ const streamPromise = client . streamChatCompletion ( mockRequest , callbacks ) ;
448
+
449
+ await writer . write (
450
+ encoder . encode (
451
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant"},"finish_reason":null}]}\n\n' +
452
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}\n\n' +
453
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":null}]}\n\n' +
454
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}\n\n' +
455
+ 'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","created":1677652288,"model":"gpt-4o","choices":[],"usage":{"prompt_tokens":10,"completion_tokens":8,"total_tokens":18}}\n\n' +
456
+ 'data: [DONE]\n\n'
457
+ )
458
+ ) ;
459
+
460
+ await writer . close ( ) ;
461
+ await streamPromise ;
462
+
463
+ expect ( callbacks . onOpen ) . toHaveBeenCalledTimes ( 1 ) ;
464
+ expect ( callbacks . onChunk ) . toHaveBeenCalledTimes ( 5 ) ;
465
+ expect ( callbacks . onContent ) . toHaveBeenCalledWith ( 'Hello' ) ;
466
+ expect ( callbacks . onContent ) . toHaveBeenCalledWith ( '!' ) ;
467
+ expect ( callbacks . onUsageMetrics ) . toHaveBeenCalledTimes ( 1 ) ;
468
+ expect ( callbacks . onUsageMetrics ) . toHaveBeenCalledWith ( {
469
+ prompt_tokens : 10 ,
470
+ completion_tokens : 8 ,
471
+ total_tokens : 18 ,
472
+ } ) ;
473
+ expect ( callbacks . onFinish ) . toHaveBeenCalledTimes ( 1 ) ;
474
+ expect ( mockFetch ) . toHaveBeenCalledWith (
475
+ 'http://localhost:8080/v1/chat/completions' ,
476
+ expect . objectContaining ( {
477
+ method : 'POST' ,
478
+ body : JSON . stringify ( {
479
+ ...mockRequest ,
480
+ stream : true ,
481
+ } ) ,
482
+ } )
483
+ ) ;
484
+ } ) ;
418
485
} ) ;
419
486
420
487
describe ( 'proxy' , ( ) => {
0 commit comments