Fix: Race Condition When Participant Disconnects During STT/TTS Processing (#861)

Devesh36 · web-flow · commit aed026cfb210 · 2025-11-25T04:33:03.000-08:00
diff --git a/.changeset/calm-rivers-flow.md b/.changeset/calm-rivers-flow.md
@@ -0,0 +1,9 @@
+---
+"@livekit/agents": patch
+"@livekit/agents-plugin-cartesia": patch
+"@livekit/agents-plugin-deepgram": patch
+"@livekit/agents-plugin-elevenlabs": patch
+"@livekit/agents-plugin-neuphonic": patch
+---
+
+Fix race condition where STT/TTS processing could throw "Queue is closed" error when a participant disconnects. These events are now logged as warnings instead of errors.
diff --git a/agents/src/inference/stt.ts b/agents/src/inference/stt.ts
@@ -459,52 +459,67 @@ export class SpeechStream<TModel extends STTModels> extends BaseSpeechStream {
   }
 
   private processTranscript(data: Record<string, any>, isFinal: boolean) {
+    // Check if queue is closed to avoid race condition during disconnect
+    if (this.queue.closed) return;
+
     const requestId = data.request_id ?? this.requestId;
     const text = data.transcript ?? '';
     const language = data.language ?? this.opts.language ?? 'en';
 
     if (!text && !isFinal) return;
 
-    // We'll have a more accurate way of detecting when speech started when we have VAD
-    if (!this.speaking) {
-      this.speaking = true;
-      this.queue.put({ type: SpeechEventType.START_OF_SPEECH });
-    }
+    try {
+      // We'll have a more accurate way of detecting when speech started when we have VAD
+      if (!this.speaking) {
+        this.speaking = true;
+        this.queue.put({ type: SpeechEventType.START_OF_SPEECH });
+      }
 
-    const speechData: SpeechData = {
-      language,
-      startTime: data.start ?? 0,
-      endTime: data.duration ?? 0,
-      confidence: data.confidence ?? 1.0,
-      text,
-    };
+      const speechData: SpeechData = {
+        language,
+        startTime: data.start ?? 0,
+        endTime: data.duration ?? 0,
+        confidence: data.confidence ?? 1.0,
+        text,
+      };
+
+      if (isFinal) {
+        if (this.speechDuration > 0) {
+          this.queue.put({
+            type: SpeechEventType.RECOGNITION_USAGE,
+            requestId,
+            recognitionUsage: { audioDuration: this.speechDuration },
+          });
+          this.speechDuration = 0;
+        }
 
-    if (isFinal) {
-      if (this.speechDuration > 0) {
         this.queue.put({
-          type: SpeechEventType.RECOGNITION_USAGE,
+          type: SpeechEventType.FINAL_TRANSCRIPT,
           requestId,
-          recognitionUsage: { audioDuration: this.speechDuration },
+          alternatives: [speechData],
         });
-        this.speechDuration = 0;
-      }
-
-      this.queue.put({
-        type: SpeechEventType.FINAL_TRANSCRIPT,
-        requestId,
-        alternatives: [speechData],
-      });
 
-      if (this.speaking) {
-        this.speaking = false;
-        this.queue.put({ type: SpeechEventType.END_OF_SPEECH });
+        if (this.speaking) {
+          this.speaking = false;
+          this.queue.put({ type: SpeechEventType.END_OF_SPEECH });
+        }
+      } else {
+        this.queue.put({
+          type: SpeechEventType.INTERIM_TRANSCRIPT,
+          requestId,
+          alternatives: [speechData],
+        });
+      }
+    } catch (e) {
+      if (e instanceof Error && e.message.includes('Queue is closed')) {
+        // Expected behavior on disconnect, log as warning
+        this.#logger.warn(
+          { err: e },
+          'Queue closed during transcript processing (expected during disconnect)',
+        );
+      } else {
+        this.#logger.error({ err: e }, 'Error putting transcript to queue');
       }
-    } else {
-      this.queue.put({
-        type: SpeechEventType.INTERIM_TRANSCRIPT,
-        requestId,
-        alternatives: [speechData],
-      });
     }
   }
 }
diff --git a/agents/src/stt/stt.ts b/agents/src/stt/stt.ts
@@ -257,7 +257,18 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
 
   protected async monitorMetrics() {
     for await (const event of this.queue) {
-      this.output.put(event);
+      if (!this.output.closed) {
+        try {
+          this.output.put(event);
+        } catch (e) {
+          if (e instanceof Error && e.message.includes('Queue is closed')) {
+            this.logger.warn(
+              { err: e },
+              'Queue closed during transcript processing (expected during disconnect)',
+            );
+          }
+        }
+      }
       if (event.type !== SpeechEventType.RECOGNITION_USAGE) continue;
       const metrics: STTMetrics = {
         type: 'stt_metrics',
@@ -270,7 +281,9 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
       };
       this.#stt.emit('metrics_collected', metrics);
     }
-    this.output.close();
+    if (!this.output.closed) {
+      this.output.close();
+    }
   }
 
   protected abstract run(): Promise<void>;
@@ -336,9 +349,9 @@ export abstract class SpeechStream implements AsyncIterableIterator<SpeechEvent>
 
   /** Close both the input and output of the STT stream */
   close() {
-    this.input.close();
-    this.queue.close();
-    this.output.close();
+    if (!this.input.closed) this.input.close();
+    if (!this.queue.closed) this.queue.close();
+    if (!this.output.closed) this.output.close();
     this.closed = true;
   }
 
diff --git a/plugins/cartesia/src/tts.ts b/plugins/cartesia/src/tts.ts
@@ -305,7 +305,14 @@ export class SynthesizeStream extends tts.SynthesizeStream {
         } catch (err) {
           // skip log error for normal websocket close
           if (err instanceof Error && !err.message.includes('WebSocket closed')) {
-            this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');
+            if (err.message.includes('Queue is closed')) {
+              this.#logger.warn(
+                { err },
+                'Queue closed during transcript processing (expected during disconnect)',
+              );
+            } else {
+              this.#logger.error({ err }, 'Error in recvTask from Cartesia WebSocket');
+            }
           }
           clearTTSChunkTimeout();
           break;
diff --git a/plugins/deepgram/src/stt.ts b/plugins/deepgram/src/stt.ts
@@ -315,7 +315,13 @@ export class SpeechStream extends stt.SpeechStream {
                 // It's also possible we receive a transcript without a SpeechStarted event.
                 if (this.#speaking) return;
                 this.#speaking = true;
-                this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
+                if (!this.queue.closed) {
+                  try {
+                    this.queue.put({ type: stt.SpeechEventType.START_OF_SPEECH });
+                  } catch (e) {
+                    // ignore
+                  }
+                }
                 break;
               }
               // see this page:
diff --git a/plugins/elevenlabs/src/tts.ts b/plugins/elevenlabs/src/tts.ts
@@ -344,8 +344,10 @@ export class SynthesizeStream extends tts.SynthesizeStream {
             const json = JSON.parse(msg.toString());
             // remove the "audio" field from the json object when printing
             if ('audio' in json && json.audio !== null) {
-              const data = new Int8Array(Buffer.from(json.audio, 'base64'));
-              for (const frame of bstream.write(data)) {
+              const data = Buffer.from(json.audio, 'base64');
+              for (const frame of bstream.write(
+                data.buffer.slice(data.byteOffset, data.byteOffset + data.byteLength),
+              )) {
                 sendLastFrame(segmentId, false);
                 lastFrame = frame;
               }
@@ -367,7 +369,14 @@ export class SynthesizeStream extends tts.SynthesizeStream {
         } catch (err) {
           // skip log error for normal websocket close
           if (err instanceof Error && !err.message.includes('WebSocket closed')) {
-            this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');
+            if (err.message.includes('Queue is closed')) {
+              this.#logger.warn(
+                { err },
+                'Queue closed during transcript processing (expected during disconnect)',
+              );
+            } else {
+              this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');
+            }
           }
           break;
         }
diff --git a/plugins/neuphonic/src/tts.ts b/plugins/neuphonic/src/tts.ts
@@ -237,7 +237,14 @@ export class SynthesizeStream extends tts.SynthesizeStream {
           });
         } catch (err) {
           if (err instanceof Error && !err.message.includes('WebSocket closed prematurely')) {
-            this.#logger.error({ err }, 'Error in recvTask from Neuphonic WebSocket');
+            if (err.message.includes('Queue is closed')) {
+              this.#logger.warn(
+                { err },
+                'Queue closed during transcript processing (expected during disconnect)',
+              );
+            } else {
+              this.#logger.error({ err }, 'Error in recvTask from Neuphonic WebSocket');
+            }
           }
           break;
         }