33// SPDX-License-Identifier: Apache-2.0
44import {
55 AudioFrame ,
6+ AudioMixer ,
67 AudioSource ,
78 LocalAudioTrack ,
89 type LocalTrackPublication ,
@@ -57,7 +58,7 @@ export interface BackgroundAudioPlayerOptions {
5758
5859 /**
5960 * Sound to play when the agent is thinking.
60- * TODO (Brian): Implement thinking sound when AudioMixer becomes available
61+ * Plays when agent state changes to ' thinking' and stops when it changes to other states.
6162 */
6263 thinkingSound ?: AudioSourceType | AudioConfig | AudioConfig [ ] ;
6364
@@ -113,15 +114,16 @@ export class PlayHandle {
113114 * This class handles playing ambient sounds and manages audio track publishing.
114115 * It supports:
115116 * - Continuous ambient sound playback with looping
117+ * - Thinking sound playback during agent processing
118+ * - Multiple simultaneous audio streams via AudioMixer
116119 * - Volume control and probability-based sound selection
117120 * - Integration with LiveKit rooms and agent sessions
118121 *
119- * Note: Thinking sound not yet supported
120- *
121122 * @example
122123 * ```typescript
123124 * const player = new BackgroundAudioPlayer({
124125 * ambientSound: { source: BuiltinAudioClip.OFFICE_AMBIENCE, volume: 0.8 },
126+ * thinkingSound: { source: BuiltinAudioClip.KEYBOARD_TYPING, volume: 0.6 },
125127 * });
126128 *
127129 * await player.start({ room, agentSession });
@@ -130,9 +132,12 @@ export class PlayHandle {
130132export class BackgroundAudioPlayer {
131133 private ambientSound ?: AudioSourceType | AudioConfig | AudioConfig [ ] ;
132134 private thinkingSound ?: AudioSourceType | AudioConfig | AudioConfig [ ] ;
135+ private streamTimeoutMs : number ;
133136
134137 private playTasks : Task < void > [ ] = [ ] ;
135138 private audioSource = new AudioSource ( 48000 , 1 , AUDIO_SOURCE_BUFFER_MS ) ;
139+ private audioMixer : AudioMixer ;
140+ private mixerTask ?: Task < void > ;
136141
137142 private room ?: Room ;
138143 private agentSession ?: AgentSession ;
@@ -148,15 +153,17 @@ export class BackgroundAudioPlayer {
148153 #logger = log ( ) ;
149154
150155 constructor ( options ?: BackgroundAudioPlayerOptions ) {
151- const { ambientSound, thinkingSound } = options || { } ;
156+ const { ambientSound, thinkingSound, streamTimeoutMs = 200 } = options || { } ;
152157
153158 this . ambientSound = ambientSound ;
154159 this . thinkingSound = thinkingSound ;
160+ this . streamTimeoutMs = streamTimeoutMs ;
155161
156- if ( this . thinkingSound ) {
157- this . #logger. warn ( 'thinkingSound is not yet supported' ) ;
158- // TODO: Implement thinking sound when AudioMixer becomes available
159- }
162+ this . audioMixer = new AudioMixer ( 48000 , 1 , {
163+ blocksize : 4800 ,
164+ capacity : 1 ,
165+ streamTimeoutMs : this . streamTimeoutMs ,
166+ } ) ;
160167 }
161168
162169 /**
@@ -282,7 +289,11 @@ export class BackgroundAudioPlayer {
282289
283290 // TODO (Brian): check job context is not fake
284291
285- // TODO (Brian): start audio mixer task
292+ // Start audio mixer task
293+ this . mixerTask = Task . from ( async ( ) => {
294+ await this . runMixerTask ( ) ;
295+ } ) ;
296+
286297 this . room . on ( 'reconnected' , this . onReconnected ) ;
287298
288299 this . agentSession ?. on ( AgentSessionEventTypes . AgentStateChanged , this . onAgentStateChanged ) ;
@@ -307,8 +318,12 @@ export class BackgroundAudioPlayer {
307318 await this . republishTask . cancelAndWait ( TASK_TIMEOUT_MS ) ;
308319 }
309320
310- // TODO (Brian): cancel audio mixer task and close audio mixer
321+ // Cancel audio mixer task and close audio mixer
322+ if ( this . mixerTask ) {
323+ await this . mixerTask . cancelAndWait ( TASK_TIMEOUT_MS ) ;
324+ }
311325
326+ await this . audioMixer . aclose ( ) ;
312327 await this . audioSource . close ( ) ;
313328
314329 this . agentSession ?. off ( AgentSessionEventTypes . AgentStateChanged , this . onAgentStateChanged ) ;
@@ -372,12 +387,43 @@ export class BackgroundAudioPlayer {
372387 return ;
373388 }
374389
375- // TODO (Brian): play thinking sound and assign to thinkingHandle
390+ const normalized = this . normalizeSoundSource ( this . thinkingSound ) ;
391+ if ( normalized ) {
392+ const { source, volume } = normalized ;
393+ const selectedSound : AudioConfig = { source, volume, probability : 1.0 } ;
394+ this . thinkingHandle = this . play ( selectedSound ) ;
395+ }
376396 } else {
377397 this . thinkingHandle ?. stop ( ) ;
378398 }
379399 } ;
380400
401+ private applyVolumeToFrame ( frame : AudioFrame , volume : number ) : AudioFrame {
402+ const int16Data = new Int16Array (
403+ frame . data . buffer ,
404+ frame . data . byteOffset ,
405+ frame . data . byteLength / 2 ,
406+ ) ;
407+ const float32Data = new Float32Array ( int16Data . length ) ;
408+
409+ for ( let i = 0 ; i < int16Data . length ; i ++ ) {
410+ float32Data [ i ] = int16Data [ i ] ! ;
411+ }
412+
413+ const volumeFactor = 10 ** Math . log10 ( volume ) ;
414+ for ( let i = 0 ; i < float32Data . length ; i ++ ) {
415+ float32Data [ i ] ! *= volumeFactor ;
416+ }
417+
418+ const outputData = new Int16Array ( float32Data . length ) ;
419+ for ( let i = 0 ; i < float32Data . length ; i ++ ) {
420+ const clipped = Math . max ( - 32768 , Math . min ( 32767 , float32Data [ i ] ! ) ) ;
421+ outputData [ i ] = Math . round ( clipped ) ;
422+ }
423+
424+ return new AudioFrame ( outputData , frame . sampleRate , frame . channels , frame . samplesPerChannel ) ;
425+ }
426+
381427 private async playTask ( {
382428 playHandle,
383429 sound,
@@ -395,57 +441,44 @@ export class BackgroundAudioPlayer {
395441 sound = getBuiltinAudioPath ( sound ) ;
396442 }
397443
444+ let audioStream : AsyncIterable < AudioFrame > ;
398445 if ( typeof sound === 'string' ) {
399- sound = loop
446+ audioStream = loop
400447 ? loopAudioFramesFromFile ( sound , { abortSignal : signal } )
401448 : audioFramesFromFile ( sound , { abortSignal : signal } ) ;
449+ } else {
450+ audioStream = sound ;
402451 }
403452
404- try {
405- for await ( const frame of sound ) {
406- if ( signal . aborted || playHandle . done ( ) ) break ;
453+ const applyVolume = this . applyVolumeToFrame . bind ( this ) ;
407454
408- let processedFrame : AudioFrame ;
409-
410- if ( volume !== 1.0 ) {
411- const int16Data = new Int16Array (
412- frame . data . buffer ,
413- frame . data . byteOffset ,
414- frame . data . byteLength / 2 ,
415- ) ;
416- const float32Data = new Float32Array ( int16Data . length ) ;
417-
418- for ( let i = 0 ; i < int16Data . length ; i ++ ) {
419- float32Data [ i ] = int16Data [ i ] ! ;
420- }
421-
422- const volumeFactor = 10 ** Math . log10 ( volume ) ;
423- for ( let i = 0 ; i < float32Data . length ; i ++ ) {
424- float32Data [ i ] ! *= volumeFactor ;
425- }
426-
427- const outputData = new Int16Array ( float32Data . length ) ;
428- for ( let i = 0 ; i < float32Data . length ; i ++ ) {
429- const clipped = Math . max ( - 32768 , Math . min ( 32767 , float32Data [ i ] ! ) ) ;
430- outputData [ i ] = Math . round ( clipped ) ;
431- }
432-
433- processedFrame = new AudioFrame (
434- outputData ,
435- frame . sampleRate ,
436- frame . channels ,
437- frame . samplesPerChannel ,
438- ) ;
439- } else {
440- processedFrame = frame ;
441- }
442-
443- // TODO (Brian): use AudioMixer to add/remove frame streams
444- await this . audioSource . captureFrame ( processedFrame ) ;
455+ async function * genWrapper ( ) : AsyncGenerator < AudioFrame > {
456+ for await ( const frame of audioStream ) {
457+ if ( signal . aborted || playHandle . done ( ) ) break ;
458+ yield volume !== 1.0 ? applyVolume ( frame , volume ) : frame ;
445459 }
460+ // TODO: the waitForPlayout() may be inaccurate by 400ms
461+ playHandle . _markPlayoutDone ( ) ;
462+ }
463+
464+ const gen = genWrapper ( ) ;
465+ try {
466+ this . audioMixer . addStream ( gen ) ;
467+ await playHandle . waitForPlayout ( ) ; // wait for playout or interruption
446468 } finally {
447- // TODO: the waitForPlayout() may be innaccurate by 400ms
469+ this . audioMixer . removeStream ( gen ) ;
448470 playHandle . _markPlayoutDone ( ) ;
471+
472+ // Close the generator if it was stopped early
473+ if ( playHandle . done ( ) ) {
474+ await gen . return ( undefined ) ;
475+ }
476+ }
477+ }
478+
479+ private async runMixerTask ( ) : Promise < void > {
480+ for await ( const frame of this . audioMixer ) {
481+ await this . audioSource . captureFrame ( frame ) ;
449482 }
450483 }
451484}
0 commit comments