From d0d7a16fa713e8cb77dcf75f99d387b76125df44 Mon Sep 17 00:00:00 2001
From: Devesh36 <142524747+Devesh36@users.noreply.github.com>
Date: Fri, 7 Nov 2025 00:27:30 +0530
Subject: [PATCH 1/9] refactor: improve interruption detection word count logic

- Remove condition that skipped word count check for empty/undefined STT text
- Apply minInterruptionWords filtering uniformly to all speech scenarios
- Normalize undefined/null transcripts to empty string for consistent handling
- Update onEndOfTurn to use same splitWords logic as onVADInferenceDone
- Add comprehensive test suite with 23 test cases covering:
  * Empty and undefined transcript handling
  * Word count threshold logic
  * Punctuation and whitespace handling
  * Integration scenarios between both methods

This ensures consistent interruption behavior regardless of transcript content,
preventing unwanted interruptions from silence or very short utterances.

All 23 tests pass successfully.
---
 REFACTORING_SUMMARY.md                        | 189 ++++++++
 agents/src/voice/agent_activity.ts            |  37 +-
 .../src/voice/interruption_detection.test.ts  | 435 ++++++++++++++++++
 3 files changed, 653 insertions(+), 8 deletions(-)
 create mode 100644 REFACTORING_SUMMARY.md
 create mode 100644 agents/src/voice/interruption_detection.test.ts

diff --git a/REFACTORING_SUMMARY.md b/REFACTORING_SUMMARY.md
new file mode 100644
index 00000000..a7084ed5
--- /dev/null
+++ b/REFACTORING_SUMMARY.md
@@ -0,0 +1,189 @@
+# Interruption Detection Refactoring - Summary
+
+## Overview
+This document describes the refactoring of the interruption detection logic in the LiveKit Agents framework, specifically in the `AgentActivity` class.
+
+## Problem Statement
+Previously, the `minInterruptionWords` check was only applied when the STT text result was non-empty. This created inconsistent behavior:
+- Empty strings and undefined transcripts always allowed interruptions (bypassing word count validation)
+- Only non-empty transcripts were subject to the word count minimum threshold
+- This inconsistency could allow unwanted interruptions from silence or very short utterances
+
+## Solution
+The refactored logic ensures that **all interruptions are filtered based on word count**, including:
+- Empty strings (0 words)
+- Undefined/null transcripts (normalized to 0 words)
+- Short utterances (fewer than `minInterruptionWords`)
+- Exact matches (exactly `minInterruptionWords`)
+- Full speech (more than `minInterruptionWords`)
+
+## Changes Made
+
+### 1. File: `agents/src/voice/agent_activity.ts`
+
+#### Method: `onVADInferenceDone` (lines 613-653)
+**Before:**
+```typescript
+if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
+  const text = this.audioRecognition.currentTranscript;
+  
+  // Only checked if text was truthy
+  if (text && splitWords(text, true).length < this.agentSession.options.minInterruptionWords) {
+    return;
+  }
+}
+```
+
+**After:**
+```typescript
+if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
+  const text = this.audioRecognition.currentTranscript;
+  
+  // Normalize text: convert undefined/null to empty string for consistent word counting
+  const normalizedText = text ?? '';
+  const wordCount = splitWords(normalizedText, true).length;
+  
+  // Only allow interruption if word count meets or exceeds minInterruptionWords
+  if (wordCount < this.agentSession.options.minInterruptionWords) {
+    return;
+  }
+}
+```
+
+**Key Changes:**
+- Removed the `text &&` condition that skipped checking empty strings
+- Added explicit normalization: `text ?? ''` converts undefined/null to empty string
+- Calculate word count on normalized text for all cases
+- Apply the same threshold comparison uniformly
+
+#### Method: `onEndOfTurn` (lines 770-809)
+**Before:**
+```typescript
+if (
+  this.stt &&
+  this.turnDetection !== 'manual' &&
+  this._currentSpeech &&
+  this._currentSpeech.allowInterruptions &&
+  !this._currentSpeech.interrupted &&
+  this.agentSession.options.minInterruptionWords > 0 &&
+  info.newTranscript.split(' ').length < this.agentSession.options.minInterruptionWords
+) {
+  // avoid interruption if the new_transcript is too short
+  this.cancelPreemptiveGeneration();
+  this.logger.info('skipping user input, new_transcript is too short');
+  return false;
+}
+```
+
+**After:**
+```typescript
+if (
+  this.stt &&
+  this.turnDetection !== 'manual' &&
+  this._currentSpeech &&
+  this._currentSpeech.allowInterruptions &&
+  !this._currentSpeech.interrupted &&
+  this.agentSession.options.minInterruptionWords > 0
+) {
+  const wordCount = splitWords(info.newTranscript, true).length;
+  if (wordCount < this.agentSession.options.minInterruptionWords) {
+    // avoid interruption if the new_transcript contains fewer words than minInterruptionWords
+    this.cancelPreemptiveGeneration();
+    this.logger.info(
+      {
+        wordCount,
+        minInterruptionWords: this.agentSession.options.minInterruptionWords,
+      },
+      'skipping user input, word count below minimum interruption threshold',
+    );
+    return false;
+  }
+}
+```
+
+**Key Changes:**
+- Updated to use consistent `splitWords` function (was using `split(' ')` before)
+- Separated the word count check from the condition block for clarity
+- Added detailed logging with word count and threshold values
+- Ensures consistency with `onVADInferenceDone` logic
+
+### 2. File: `agents/src/voice/interruption_detection.test.ts` (NEW)
+Comprehensive unit test suite with 23 tests covering:
+
+#### Word Splitting Tests (8 tests)
+- Empty string handling
+- Single word detection
+- Multiple word counting
+- Punctuation handling
+- Multiple spaces between words
+- Whitespace-only strings
+- Leading/trailing whitespace
+
+#### Interruption Threshold Logic (5 tests)
+- Word count below threshold (should block)
+- Word count at threshold (should allow)
+- Word count above threshold (should allow)
+- Zero threshold behavior (check disabled)
+- High threshold behavior
+
+#### Undefined/Null Handling (4 tests)
+- Undefined normalization
+- Null normalization
+- Empty string preservation
+- Valid string preservation
+
+#### Integration Tests (6 tests)
+- Complete flow for empty string
+- Complete flow for undefined
+- Complete flow for single word
+- Complete flow for exact threshold match
+- Complete flow for exceeding threshold
+- Consistency between `onVADInferenceDone` and `onEndOfTurn`
+
+## Test Results
+```
+✓ |nodejs| agents/src/voice/interruption_detection.test.ts (23 tests) 4ms
+
+Test Files  1 passed (1)
+      Tests  23 passed (23)
+```
+
+All 23 tests pass successfully!
+
+## Impact
+
+### Behavioral Changes
+1. **Empty/Undefined Transcripts**: Now blocked by default when `minInterruptionWords > 0`
+   - Before: Allowed interruption
+   - After: Blocked (0 words < threshold)
+
+2. **Short Utterances**: Consistently blocked based on word count
+   - Before: Only blocked for non-empty strings
+   - After: All utterances checked uniformly
+
+3. **Word Counting Logic**: Now uses `splitWords()` consistently
+   - Before: `onEndOfTurn` used basic `split(' ')`
+   - After: Both methods use `splitWords()` with proper punctuation handling
+
+### Configuration
+- Applications can still disable word count checking by setting `minInterruptionWords: 0`
+- Default value remains `minInterruptionWords: 0` (check disabled by default)
+
+## Benefits
+1. **Consistency**: Uniform behavior across all code paths
+2. **Predictability**: No edge cases where empty speech bypasses word count check
+3. **Robustness**: Explicit normalization prevents undefined/null related issues
+4. **Maintainability**: Clear, well-documented code with comprehensive test coverage
+5. **Logging**: Enhanced debug information for troubleshooting interruption issues
+
+## Migration Guide
+No action required for most users. However, if your application relies on the previous behavior where empty speech could interrupt:
+- Set `minInterruptionWords: 0` explicitly to disable word count checking
+- Or adjust `minInterruptionWords` to accommodate shorter utterances
+
+## Files Modified
+- `agents/src/voice/agent_activity.ts` - Refactored interruption logic
+- `agents/src/voice/interruption_detection.test.ts` - NEW comprehensive test suite
+
+## Branch
+Created on branch: `mini-interruption`
diff --git a/agents/src/voice/agent_activity.ts b/agents/src/voice/agent_activity.ts
index 137b38dc..4a03460e 100644
--- a/agents/src/voice/agent_activity.ts
+++ b/agents/src/voice/agent_activity.ts
@@ -625,11 +625,21 @@ export class AgentActivity implements RecognitionHooks {
       return;
     }
 
+    // Refactored interruption word count check:
+    // - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
+    // - Apply check to all STT results: empty string, undefined, or any length
+    // - This ensures consistent behavior across all interruption scenarios
     if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
       const text = this.audioRecognition.currentTranscript;
-
       // TODO(shubhra): better word splitting for multi-language
-      if (text && splitWords(text, true).length < this.agentSession.options.minInterruptionWords) {
+
+      // Normalize text: convert undefined/null to empty string for consistent word counting
+      const normalizedText = text ?? '';
+      const wordCount = splitWords(normalizedText, true).length;
+
+      // Only allow interruption if word count meets or exceeds minInterruptionWords
+      // This applies to all cases: empty strings, partial speech, and full speech
+      if (wordCount < this.agentSession.options.minInterruptionWords) {
         return;
       }
     }
@@ -767,19 +777,30 @@ export class AgentActivity implements RecognitionHooks {
       return true;
     }
 
+    // Refactored interruption word count check for consistency with onVADInferenceDone:
+    // - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
+    // - Use consistent word splitting logic with splitWords (matching onVADInferenceDone pattern)
     if (
       this.stt &&
       this.turnDetection !== 'manual' &&
       this._currentSpeech &&
       this._currentSpeech.allowInterruptions &&
       !this._currentSpeech.interrupted &&
-      this.agentSession.options.minInterruptionWords > 0 &&
-      info.newTranscript.split(' ').length < this.agentSession.options.minInterruptionWords
+      this.agentSession.options.minInterruptionWords > 0
     ) {
-      // avoid interruption if the new_transcript is too short
-      this.cancelPreemptiveGeneration();
-      this.logger.info('skipping user input, new_transcript is too short');
-      return false;
+      const wordCount = splitWords(info.newTranscript, true).length;
+      if (wordCount < this.agentSession.options.minInterruptionWords) {
+        // avoid interruption if the new_transcript contains fewer words than minInterruptionWords
+        this.cancelPreemptiveGeneration();
+        this.logger.info(
+          {
+            wordCount,
+            minInterruptionWords: this.agentSession.options.minInterruptionWords,
+          },
+          'skipping user input, word count below minimum interruption threshold',
+        );
+        return false;
+      }
     }
 
     const oldTask = this._userTurnCompletedTask;
diff --git a/agents/src/voice/interruption_detection.test.ts b/agents/src/voice/interruption_detection.test.ts
new file mode 100644
index 00000000..6fe5b243
--- /dev/null
+++ b/agents/src/voice/interruption_detection.test.ts
@@ -0,0 +1,435 @@
+// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+/**
+ * @fileoverview Unit tests for interruption detection logic in AgentActivity.
+ *
+ * Tests the refactored minInterruptionWords check which ensures:
+ * - Consistent word count filtering across all speech scenarios
+ * - Proper handling of empty strings, undefined, and short speech
+ * - Interruptions are only allowed when word count >= minInterruptionWords
+ *
+ * Key test scenarios:
+ * 1. Empty string STT result - should be blocked (0 words < threshold)
+ * 2. Undefined STT result - should be blocked (0 words < threshold)
+ * 3. Fewer words than minimum - should be blocked (e.g., 1 word < 2 minimum)
+ * 4. Exactly minimum words - should be allowed (e.g., 2 words >= 2 minimum)
+ * 5. More than minimum words - should be allowed (e.g., 5 words >= 2 minimum)
+ */
+
+import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { splitWords } from '../tokenize/basic/word.js';
+
+/**
+ * Test Suite: Word Splitting and Counting Logic
+ *
+ * These tests verify that the word splitting function works correctly
+ * with various input formats that might be received from STT engines.
+ */
+describe('Interruption Detection - Word Counting', () => {
+  describe('Word Splitting Behavior', () => {
+    /**
+     * Test Case 1: Empty String
+     *
+     * Input: Empty string ""
+     * Expected: Word count = 0
+     * Implication: Should NOT allow interruption when minInterruptionWords > 0
+     */
+    it('should count empty string as 0 words', () => {
+      const text = '';
+      const wordCount = splitWords(text, true).length;
+      expect(wordCount).toBe(0);
+    });
+
+    /**
+     * Test Case 2: Single Word
+     *
+     * Input: "hello"
+     * Expected: Word count = 1
+     * Implication: Should NOT allow interruption when minInterruptionWords >= 2
+     */
+    it('should count single word correctly', () => {
+      const text = 'hello';
+      const wordCount = splitWords(text, true).length;
+      expect(wordCount).toBe(1);
+    });
+
+    /**
+     * Test Case 3: Two Words
+     *
+     * Input: "hello world"
+     * Expected: Word count = 2
+     * Implication: Should ALLOW interruption when minInterruptionWords = 2
+     */
+    it('should count two words correctly', () => {
+      const text = 'hello world';
+      const wordCount = splitWords(text, true).length;
+      expect(wordCount).toBe(2);
+    });
+
+    /**
+     * Test Case 4: Multiple Words
+     *
+     * Input: "hello this is a full sentence"
+     * Expected: Word count = 6
+     * Implication: Should ALLOW interruption for any minInterruptionWords <= 6
+     */
+    it('should count multiple words correctly', () => {
+      const text = 'hello this is a full sentence';
+      const wordCount = splitWords(text, true).length;
+      expect(wordCount).toBe(6);
+    });
+
+    /**
+     * Test Case 5: Punctuation Handling
+     *
+     * Input: "hello, world!"
+     * Expected: Word count = 2 (punctuation stripped)
+     * Implication: Punctuation should not artificially inflate word count
+     */
+    it('should handle punctuation correctly', () => {
+      const text = 'hello, world!';
+      const wordCount = splitWords(text, true).length;
+      expect(wordCount).toBe(2);
+    });
+
+    /**
+     * Test Case 6: Extra Whitespace
+     *
+     * Input: "hello  world" (double space)
+     * Expected: Word count = 2 (multiple spaces treated as single separator)
+     * Implication: Robust handling of inconsistent spacing
+     */
+    it('should handle multiple spaces between words', () => {
+      const text = 'hello  world';
+      const wordCount = splitWords(text, true).length;
+      expect(wordCount).toBe(2);
+    });
+
+    /**
+     * Test Case 7: Whitespace-Only String
+     *
+     * Input: "   " (only spaces)
+     * Expected: Word count = 0
+     * Implication: Should NOT allow interruption (functionally empty)
+     */
+    it('should count whitespace-only string as 0 words', () => {
+      const text = '   ';
+      const wordCount = splitWords(text, true).length;
+      expect(wordCount).toBe(0);
+    });
+
+    /**
+     * Test Case 8: Leading and Trailing Whitespace
+     *
+     * Input: "  hello world  " (spaces before and after)
+     * Expected: Word count = 2 (whitespace trimmed)
+     * Implication: Edge whitespace should not affect word count
+     */
+    it('should handle leading and trailing whitespace', () => {
+      const text = '  hello world  ';
+      const wordCount = splitWords(text, true).length;
+      expect(wordCount).toBe(2);
+    });
+  });
+
+  describe('Interruption Threshold Logic', () => {
+    /**
+     * Test Case 9: Word Count Comparison - Below Threshold
+     *
+     * Scenario: minInterruptionWords = 2
+     * Input: "hello" (1 word)
+     * Check: 1 < 2 should be TRUE (block interruption)
+     */
+    it('should block interruption when word count is below threshold', () => {
+      const minInterruptionWords = 2;
+      const wordCount = 1;
+      const shouldBlock = wordCount < minInterruptionWords;
+      expect(shouldBlock).toBe(true);
+    });
+
+    /**
+     * Test Case 10: Word Count Comparison - At Threshold
+     *
+     * Scenario: minInterruptionWords = 2
+     * Input: "hello world" (2 words)
+     * Check: 2 < 2 should be FALSE (allow interruption)
+     */
+    it('should allow interruption when word count meets threshold', () => {
+      const minInterruptionWords = 2;
+      const wordCount = 2;
+      const shouldBlock = wordCount < minInterruptionWords;
+      expect(shouldBlock).toBe(false);
+    });
+
+    /**
+     * Test Case 11: Word Count Comparison - Above Threshold
+     *
+     * Scenario: minInterruptionWords = 2
+     * Input: "hello this is a test" (5 words)
+     * Check: 5 < 2 should be FALSE (allow interruption)
+     */
+    it('should allow interruption when word count exceeds threshold', () => {
+      const minInterruptionWords = 2;
+      const wordCount = 5;
+      const shouldBlock = wordCount < minInterruptionWords;
+      expect(shouldBlock).toBe(false);
+    });
+
+    /**
+     * Test Case 12: Zero Threshold (Disabled Check)
+     *
+     * Scenario: minInterruptionWords = 0 (check disabled)
+     * Input: "" (empty)
+     * Expected: Word count check should be skipped entirely
+     * Implication: When threshold is 0, any speech should allow interruption
+     */
+    it('should skip word count check when minInterruptionWords is 0', () => {
+      const minInterruptionWords = 0;
+      const wordCount = 0;
+      // When minInterruptionWords is 0, the check is not performed at all
+      const shouldPerformCheck = minInterruptionWords > 0;
+      expect(shouldPerformCheck).toBe(false);
+    });
+
+    /**
+     * Test Case 13: High Threshold
+     *
+     * Scenario: minInterruptionWords = 5
+     * Input: "hello world" (2 words)
+     * Check: 2 < 5 should be TRUE (block interruption)
+     */
+    it('should respect high minInterruptionWords threshold', () => {
+      const minInterruptionWords = 5;
+      const wordCount = 2;
+      const shouldBlock = wordCount < minInterruptionWords;
+      expect(shouldBlock).toBe(true);
+    });
+  });
+
+  describe('Undefined and Null Handling', () => {
+    /**
+     * Test Case 14: Undefined Normalization
+     *
+     * Behavior: undefined ?? '' converts undefined to empty string
+     * Expected: Normalized value is ""
+     * Implication: Undefined is treated as empty string (0 words)
+     */
+    it('should normalize undefined to empty string', () => {
+      const text: string | undefined = undefined;
+      const normalizedText = text ?? '';
+      expect(normalizedText).toBe('');
+    });
+
+    /**
+     * Test Case 15: Null Normalization
+     *
+     * Behavior: null ?? '' converts null to empty string
+     * Expected: Normalized value is ""
+     * Implication: Null is treated as empty string (0 words)
+     */
+    it('should normalize null to empty string', () => {
+      const text: string | null = null;
+      const normalizedText = text ?? '';
+      expect(normalizedText).toBe('');
+    });
+
+    /**
+     * Test Case 16: Empty String Pass-Through
+     *
+     * Behavior: '' ?? '' remains as empty string
+     * Expected: Normalized value is ""
+     * Implication: Empty string is preserved and counted as 0 words
+     */
+    it('should preserve empty string during normalization', () => {
+      const text = '';
+      const normalizedText = text ?? '';
+      expect(normalizedText).toBe('');
+    });
+
+    /**
+     * Test Case 17: Valid String Pass-Through
+     *
+     * Behavior: 'hello' ?? '' remains as 'hello'
+     * Expected: Normalized value is "hello"
+     * Implication: Valid strings are preserved during normalization
+     */
+    it('should preserve valid string during normalization', () => {
+      const text = 'hello';
+      const normalizedText = text ?? '';
+      expect(normalizedText).toBe('hello');
+    });
+  });
+
+  describe('Integration: Full Interruption Check Logic', () => {
+    /**
+     * Test Case 18: Complete Logic Flow - Empty String Should Block
+     *
+     * Scenario:
+     * - STT is available
+     * - minInterruptionWords = 2
+     * - currentTranscript = ""
+     *
+     * Expected Flow:
+     * 1. text = ""
+     * 2. normalizedText = "" ?? '' = ""
+     * 3. wordCount = splitWords("", true).length = 0
+     * 4. Check: 0 < 2 = true → BLOCK interruption
+     */
+    it('should block interruption for empty transcript with threshold 2', () => {
+      const text = '';
+      const minInterruptionWords = 2;
+
+      // Simulate refactored logic
+      const normalizedText = text ?? '';
+      const wordCount = splitWords(normalizedText, true).length;
+      const shouldBlock = wordCount < minInterruptionWords;
+
+      expect(normalizedText).toBe('');
+      expect(wordCount).toBe(0);
+      expect(shouldBlock).toBe(true);
+    });
+
+    /**
+     * Test Case 19: Complete Logic Flow - Undefined Should Block
+     *
+     * Scenario:
+     * - STT is available
+     * - minInterruptionWords = 2
+     * - currentTranscript = undefined
+     *
+     * Expected Flow:
+     * 1. text = undefined
+     * 2. normalizedText = undefined ?? '' = ""
+     * 3. wordCount = splitWords("", true).length = 0
+     * 4. Check: 0 < 2 = true → BLOCK interruption
+     */
+    it('should block interruption for undefined transcript with threshold 2', () => {
+      const text: string | undefined = undefined;
+      const minInterruptionWords = 2;
+
+      // Simulate refactored logic
+      const normalizedText = text ?? '';
+      const wordCount = splitWords(normalizedText, true).length;
+      const shouldBlock = wordCount < minInterruptionWords;
+
+      expect(normalizedText).toBe('');
+      expect(wordCount).toBe(0);
+      expect(shouldBlock).toBe(true);
+    });
+
+    /**
+     * Test Case 20: Complete Logic Flow - One Word Should Block
+     *
+     * Scenario:
+     * - STT is available
+     * - minInterruptionWords = 2
+     * - currentTranscript = "hello"
+     *
+     * Expected Flow:
+     * 1. text = "hello"
+     * 2. normalizedText = "hello" ?? '' = "hello"
+     * 3. wordCount = splitWords("hello", true).length = 1
+     * 4. Check: 1 < 2 = true → BLOCK interruption
+     */
+    it('should block interruption for single word with threshold 2', () => {
+      const text = 'hello';
+      const minInterruptionWords = 2;
+
+      // Simulate refactored logic
+      const normalizedText = text ?? '';
+      const wordCount = splitWords(normalizedText, true).length;
+      const shouldBlock = wordCount < minInterruptionWords;
+
+      expect(normalizedText).toBe('hello');
+      expect(wordCount).toBe(1);
+      expect(shouldBlock).toBe(true);
+    });
+
+    /**
+     * Test Case 21: Complete Logic Flow - Exact Match Should Allow
+     *
+     * Scenario:
+     * - STT is available
+     * - minInterruptionWords = 2
+     * - currentTranscript = "hello world"
+     *
+     * Expected Flow:
+     * 1. text = "hello world"
+     * 2. normalizedText = "hello world" ?? '' = "hello world"
+     * 3. wordCount = splitWords("hello world", true).length = 2
+     * 4. Check: 2 < 2 = false → ALLOW interruption
+     */
+    it('should allow interruption when word count exactly meets threshold', () => {
+      const text = 'hello world';
+      const minInterruptionWords = 2;
+
+      // Simulate refactored logic
+      const normalizedText = text ?? '';
+      const wordCount = splitWords(normalizedText, true).length;
+      const shouldBlock = wordCount < minInterruptionWords;
+
+      expect(normalizedText).toBe('hello world');
+      expect(wordCount).toBe(2);
+      expect(shouldBlock).toBe(false);
+    });
+
+    /**
+     * Test Case 22: Complete Logic Flow - Exceeding Threshold Should Allow
+     *
+     * Scenario:
+     * - STT is available
+     * - minInterruptionWords = 2
+     * - currentTranscript = "hello this is a full sentence"
+     *
+     * Expected Flow:
+     * 1. text = "hello this is a full sentence"
+     * 2. normalizedText = "hello this is a full sentence" ?? '' = "hello this is a full sentence"
+     * 3. wordCount = splitWords("hello this is a full sentence", true).length = 6
+     * 4. Check: 6 < 2 = false → ALLOW interruption
+     */
+    it('should allow interruption when word count exceeds threshold', () => {
+      const text = 'hello this is a full sentence';
+      const minInterruptionWords = 2;
+
+      // Simulate refactored logic
+      const normalizedText = text ?? '';
+      const wordCount = splitWords(normalizedText, true).length;
+      const shouldBlock = wordCount < minInterruptionWords;
+
+      expect(normalizedText).toBe('hello this is a full sentence');
+      expect(wordCount).toBe(6);
+      expect(shouldBlock).toBe(false);
+    });
+
+    /**
+     * Test Case 23: Consistency Between onVADInferenceDone and onEndOfTurn
+     *
+     * Both methods should use the same word-splitting logic and comparison.
+     * They should produce identical results for the same transcript and threshold.
+     *
+     * Scenario: Compare word counting in both contexts
+     */
+    it('should apply consistent word counting logic in both methods', () => {
+      const transcripts = ['', 'hello', 'hello world', 'this is a longer sentence'];
+      const threshold = 2;
+
+      transcripts.forEach((transcript) => {
+        // Simulate onVADInferenceDone logic
+        const text1 = transcript;
+        const normalizedText1 = text1 ?? '';
+        const wordCount1 = splitWords(normalizedText1, true).length;
+        const shouldBlock1 = wordCount1 < threshold;
+
+        // Simulate onEndOfTurn logic (which now uses splitWords directly)
+        const wordCount2 = splitWords(transcript, true).length;
+        const shouldBlock2 = wordCount2 < threshold;
+
+        // Results should be identical
+        expect(wordCount1).toBe(wordCount2);
+        expect(shouldBlock1).toBe(shouldBlock2);
+      });
+    });
+  });
+});

From 9039f2f951c0cca3074efc2050e1ead1f01fc8af Mon Sep 17 00:00:00 2001
From: Devesh36 <142524747+Devesh36@users.noreply.github.com>
Date: Fri, 7 Nov 2025 00:55:01 +0530
Subject: [PATCH 2/9] refactor: apply minInterruptionWords check consistently;
 add comprehensive interruption detection tests

---
 .../src/voice/interruption_detection.test.ts  | 230 +-----------------
 1 file changed, 4 insertions(+), 226 deletions(-)

diff --git a/agents/src/voice/interruption_detection.test.ts b/agents/src/voice/interruption_detection.test.ts
index 6fe5b243..dd10df9d 100644
--- a/agents/src/voice/interruption_detection.test.ts
+++ b/agents/src/voice/interruption_detection.test.ts
@@ -3,130 +3,60 @@
 // SPDX-License-Identifier: Apache-2.0
 
 /**
- * @fileoverview Unit tests for interruption detection logic in AgentActivity.
+ * Unit tests for interruption detection logic in AgentActivity.
  *
  * Tests the refactored minInterruptionWords check which ensures:
  * - Consistent word count filtering across all speech scenarios
  * - Proper handling of empty strings, undefined, and short speech
- * - Interruptions are only allowed when word count >= minInterruptionWords
- *
- * Key test scenarios:
- * 1. Empty string STT result - should be blocked (0 words < threshold)
- * 2. Undefined STT result - should be blocked (0 words < threshold)
- * 3. Fewer words than minimum - should be blocked (e.g., 1 word < 2 minimum)
- * 4. Exactly minimum words - should be allowed (e.g., 2 words >= 2 minimum)
- * 5. More than minimum words - should be allowed (e.g., 5 words >= 2 minimum)
+ * - Interruptions allowed only when word count meets or exceeds minInterruptionWords threshold
  */
-
-import { describe, it, expect, beforeEach, vi } from 'vitest';
+import { describe, expect, it } from 'vitest';
 import { splitWords } from '../tokenize/basic/word.js';
 
-/**
- * Test Suite: Word Splitting and Counting Logic
- *
- * These tests verify that the word splitting function works correctly
- * with various input formats that might be received from STT engines.
- */
 describe('Interruption Detection - Word Counting', () => {
   describe('Word Splitting Behavior', () => {
-    /**
-     * Test Case 1: Empty String
-     *
-     * Input: Empty string ""
-     * Expected: Word count = 0
-     * Implication: Should NOT allow interruption when minInterruptionWords > 0
-     */
     it('should count empty string as 0 words', () => {
       const text = '';
       const wordCount = splitWords(text, true).length;
       expect(wordCount).toBe(0);
     });
 
-    /**
-     * Test Case 2: Single Word
-     *
-     * Input: "hello"
-     * Expected: Word count = 1
-     * Implication: Should NOT allow interruption when minInterruptionWords >= 2
-     */
     it('should count single word correctly', () => {
       const text = 'hello';
       const wordCount = splitWords(text, true).length;
       expect(wordCount).toBe(1);
     });
 
-    /**
-     * Test Case 3: Two Words
-     *
-     * Input: "hello world"
-     * Expected: Word count = 2
-     * Implication: Should ALLOW interruption when minInterruptionWords = 2
-     */
     it('should count two words correctly', () => {
       const text = 'hello world';
       const wordCount = splitWords(text, true).length;
       expect(wordCount).toBe(2);
     });
 
-    /**
-     * Test Case 4: Multiple Words
-     *
-     * Input: "hello this is a full sentence"
-     * Expected: Word count = 6
-     * Implication: Should ALLOW interruption for any minInterruptionWords <= 6
-     */
     it('should count multiple words correctly', () => {
       const text = 'hello this is a full sentence';
       const wordCount = splitWords(text, true).length;
       expect(wordCount).toBe(6);
     });
 
-    /**
-     * Test Case 5: Punctuation Handling
-     *
-     * Input: "hello, world!"
-     * Expected: Word count = 2 (punctuation stripped)
-     * Implication: Punctuation should not artificially inflate word count
-     */
     it('should handle punctuation correctly', () => {
       const text = 'hello, world!';
       const wordCount = splitWords(text, true).length;
       expect(wordCount).toBe(2);
     });
 
-    /**
-     * Test Case 6: Extra Whitespace
-     *
-     * Input: "hello  world" (double space)
-     * Expected: Word count = 2 (multiple spaces treated as single separator)
-     * Implication: Robust handling of inconsistent spacing
-     */
     it('should handle multiple spaces between words', () => {
       const text = 'hello  world';
       const wordCount = splitWords(text, true).length;
       expect(wordCount).toBe(2);
     });
 
-    /**
-     * Test Case 7: Whitespace-Only String
-     *
-     * Input: "   " (only spaces)
-     * Expected: Word count = 0
-     * Implication: Should NOT allow interruption (functionally empty)
-     */
     it('should count whitespace-only string as 0 words', () => {
       const text = '   ';
       const wordCount = splitWords(text, true).length;
       expect(wordCount).toBe(0);
     });
 
-    /**
-     * Test Case 8: Leading and Trailing Whitespace
-     *
-     * Input: "  hello world  " (spaces before and after)
-     * Expected: Word count = 2 (whitespace trimmed)
-     * Implication: Edge whitespace should not affect word count
-     */
     it('should handle leading and trailing whitespace', () => {
       const text = '  hello world  ';
       const wordCount = splitWords(text, true).length;
@@ -135,13 +65,6 @@ describe('Interruption Detection - Word Counting', () => {
   });
 
   describe('Interruption Threshold Logic', () => {
-    /**
-     * Test Case 9: Word Count Comparison - Below Threshold
-     *
-     * Scenario: minInterruptionWords = 2
-     * Input: "hello" (1 word)
-     * Check: 1 < 2 should be TRUE (block interruption)
-     */
     it('should block interruption when word count is below threshold', () => {
       const minInterruptionWords = 2;
       const wordCount = 1;
@@ -149,13 +72,6 @@ describe('Interruption Detection - Word Counting', () => {
       expect(shouldBlock).toBe(true);
     });
 
-    /**
-     * Test Case 10: Word Count Comparison - At Threshold
-     *
-     * Scenario: minInterruptionWords = 2
-     * Input: "hello world" (2 words)
-     * Check: 2 < 2 should be FALSE (allow interruption)
-     */
     it('should allow interruption when word count meets threshold', () => {
       const minInterruptionWords = 2;
       const wordCount = 2;
@@ -163,43 +79,19 @@ describe('Interruption Detection - Word Counting', () => {
       expect(shouldBlock).toBe(false);
     });
 
-    /**
-     * Test Case 11: Word Count Comparison - Above Threshold
-     *
-     * Scenario: minInterruptionWords = 2
-     * Input: "hello this is a test" (5 words)
-     * Check: 5 < 2 should be FALSE (allow interruption)
-     */
     it('should allow interruption when word count exceeds threshold', () => {
       const minInterruptionWords = 2;
-      const wordCount = 5;
+      const wordCount = 6;
       const shouldBlock = wordCount < minInterruptionWords;
       expect(shouldBlock).toBe(false);
     });
 
-    /**
-     * Test Case 12: Zero Threshold (Disabled Check)
-     *
-     * Scenario: minInterruptionWords = 0 (check disabled)
-     * Input: "" (empty)
-     * Expected: Word count check should be skipped entirely
-     * Implication: When threshold is 0, any speech should allow interruption
-     */
     it('should skip word count check when minInterruptionWords is 0', () => {
       const minInterruptionWords = 0;
-      const wordCount = 0;
-      // When minInterruptionWords is 0, the check is not performed at all
       const shouldPerformCheck = minInterruptionWords > 0;
       expect(shouldPerformCheck).toBe(false);
     });
 
-    /**
-     * Test Case 13: High Threshold
-     *
-     * Scenario: minInterruptionWords = 5
-     * Input: "hello world" (2 words)
-     * Check: 2 < 5 should be TRUE (block interruption)
-     */
     it('should respect high minInterruptionWords threshold', () => {
       const minInterruptionWords = 5;
       const wordCount = 2;
@@ -209,52 +101,24 @@ describe('Interruption Detection - Word Counting', () => {
   });
 
   describe('Undefined and Null Handling', () => {
-    /**
-     * Test Case 14: Undefined Normalization
-     *
-     * Behavior: undefined ?? '' converts undefined to empty string
-     * Expected: Normalized value is ""
-     * Implication: Undefined is treated as empty string (0 words)
-     */
     it('should normalize undefined to empty string', () => {
       const text: string | undefined = undefined;
       const normalizedText = text ?? '';
       expect(normalizedText).toBe('');
     });
 
-    /**
-     * Test Case 15: Null Normalization
-     *
-     * Behavior: null ?? '' converts null to empty string
-     * Expected: Normalized value is ""
-     * Implication: Null is treated as empty string (0 words)
-     */
     it('should normalize null to empty string', () => {
       const text: string | null = null;
       const normalizedText = text ?? '';
       expect(normalizedText).toBe('');
     });
 
-    /**
-     * Test Case 16: Empty String Pass-Through
-     *
-     * Behavior: '' ?? '' remains as empty string
-     * Expected: Normalized value is ""
-     * Implication: Empty string is preserved and counted as 0 words
-     */
     it('should preserve empty string during normalization', () => {
       const text = '';
       const normalizedText = text ?? '';
       expect(normalizedText).toBe('');
     });
 
-    /**
-     * Test Case 17: Valid String Pass-Through
-     *
-     * Behavior: 'hello' ?? '' remains as 'hello'
-     * Expected: Normalized value is "hello"
-     * Implication: Valid strings are preserved during normalization
-     */
     it('should preserve valid string during normalization', () => {
       const text = 'hello';
       const normalizedText = text ?? '';
@@ -263,25 +127,10 @@ describe('Interruption Detection - Word Counting', () => {
   });
 
   describe('Integration: Full Interruption Check Logic', () => {
-    /**
-     * Test Case 18: Complete Logic Flow - Empty String Should Block
-     *
-     * Scenario:
-     * - STT is available
-     * - minInterruptionWords = 2
-     * - currentTranscript = ""
-     *
-     * Expected Flow:
-     * 1. text = ""
-     * 2. normalizedText = "" ?? '' = ""
-     * 3. wordCount = splitWords("", true).length = 0
-     * 4. Check: 0 < 2 = true → BLOCK interruption
-     */
     it('should block interruption for empty transcript with threshold 2', () => {
       const text = '';
       const minInterruptionWords = 2;
 
-      // Simulate refactored logic
       const normalizedText = text ?? '';
       const wordCount = splitWords(normalizedText, true).length;
       const shouldBlock = wordCount < minInterruptionWords;
@@ -291,25 +140,10 @@ describe('Interruption Detection - Word Counting', () => {
       expect(shouldBlock).toBe(true);
     });
 
-    /**
-     * Test Case 19: Complete Logic Flow - Undefined Should Block
-     *
-     * Scenario:
-     * - STT is available
-     * - minInterruptionWords = 2
-     * - currentTranscript = undefined
-     *
-     * Expected Flow:
-     * 1. text = undefined
-     * 2. normalizedText = undefined ?? '' = ""
-     * 3. wordCount = splitWords("", true).length = 0
-     * 4. Check: 0 < 2 = true → BLOCK interruption
-     */
     it('should block interruption for undefined transcript with threshold 2', () => {
       const text: string | undefined = undefined;
       const minInterruptionWords = 2;
 
-      // Simulate refactored logic
       const normalizedText = text ?? '';
       const wordCount = splitWords(normalizedText, true).length;
       const shouldBlock = wordCount < minInterruptionWords;
@@ -319,25 +153,10 @@ describe('Interruption Detection - Word Counting', () => {
       expect(shouldBlock).toBe(true);
     });
 
-    /**
-     * Test Case 20: Complete Logic Flow - One Word Should Block
-     *
-     * Scenario:
-     * - STT is available
-     * - minInterruptionWords = 2
-     * - currentTranscript = "hello"
-     *
-     * Expected Flow:
-     * 1. text = "hello"
-     * 2. normalizedText = "hello" ?? '' = "hello"
-     * 3. wordCount = splitWords("hello", true).length = 1
-     * 4. Check: 1 < 2 = true → BLOCK interruption
-     */
     it('should block interruption for single word with threshold 2', () => {
       const text = 'hello';
       const minInterruptionWords = 2;
 
-      // Simulate refactored logic
       const normalizedText = text ?? '';
       const wordCount = splitWords(normalizedText, true).length;
       const shouldBlock = wordCount < minInterruptionWords;
@@ -347,25 +166,10 @@ describe('Interruption Detection - Word Counting', () => {
       expect(shouldBlock).toBe(true);
     });
 
-    /**
-     * Test Case 21: Complete Logic Flow - Exact Match Should Allow
-     *
-     * Scenario:
-     * - STT is available
-     * - minInterruptionWords = 2
-     * - currentTranscript = "hello world"
-     *
-     * Expected Flow:
-     * 1. text = "hello world"
-     * 2. normalizedText = "hello world" ?? '' = "hello world"
-     * 3. wordCount = splitWords("hello world", true).length = 2
-     * 4. Check: 2 < 2 = false → ALLOW interruption
-     */
     it('should allow interruption when word count exactly meets threshold', () => {
       const text = 'hello world';
       const minInterruptionWords = 2;
 
-      // Simulate refactored logic
       const normalizedText = text ?? '';
       const wordCount = splitWords(normalizedText, true).length;
       const shouldBlock = wordCount < minInterruptionWords;
@@ -375,25 +179,10 @@ describe('Interruption Detection - Word Counting', () => {
       expect(shouldBlock).toBe(false);
     });
 
-    /**
-     * Test Case 22: Complete Logic Flow - Exceeding Threshold Should Allow
-     *
-     * Scenario:
-     * - STT is available
-     * - minInterruptionWords = 2
-     * - currentTranscript = "hello this is a full sentence"
-     *
-     * Expected Flow:
-     * 1. text = "hello this is a full sentence"
-     * 2. normalizedText = "hello this is a full sentence" ?? '' = "hello this is a full sentence"
-     * 3. wordCount = splitWords("hello this is a full sentence", true).length = 6
-     * 4. Check: 6 < 2 = false → ALLOW interruption
-     */
     it('should allow interruption when word count exceeds threshold', () => {
       const text = 'hello this is a full sentence';
       const minInterruptionWords = 2;
 
-      // Simulate refactored logic
       const normalizedText = text ?? '';
       const wordCount = splitWords(normalizedText, true).length;
       const shouldBlock = wordCount < minInterruptionWords;
@@ -403,30 +192,19 @@ describe('Interruption Detection - Word Counting', () => {
       expect(shouldBlock).toBe(false);
     });
 
-    /**
-     * Test Case 23: Consistency Between onVADInferenceDone and onEndOfTurn
-     *
-     * Both methods should use the same word-splitting logic and comparison.
-     * They should produce identical results for the same transcript and threshold.
-     *
-     * Scenario: Compare word counting in both contexts
-     */
     it('should apply consistent word counting logic in both methods', () => {
       const transcripts = ['', 'hello', 'hello world', 'this is a longer sentence'];
       const threshold = 2;
 
       transcripts.forEach((transcript) => {
-        // Simulate onVADInferenceDone logic
         const text1 = transcript;
         const normalizedText1 = text1 ?? '';
         const wordCount1 = splitWords(normalizedText1, true).length;
         const shouldBlock1 = wordCount1 < threshold;
 
-        // Simulate onEndOfTurn logic (which now uses splitWords directly)
         const wordCount2 = splitWords(transcript, true).length;
         const shouldBlock2 = wordCount2 < threshold;
 
-        // Results should be identical
         expect(wordCount1).toBe(wordCount2);
         expect(shouldBlock1).toBe(shouldBlock2);
       });

From a87516a9dad127736a8a80ea602c7e7ecb458798 Mon Sep 17 00:00:00 2001
From: Devesh36 <142524747+Devesh36@users.noreply.github.com>
Date: Fri, 7 Nov 2025 00:59:16 +0530
Subject: [PATCH 3/9] refactor: apply minInterruptionWords check consistently;
 add comprehensive interruption detection tests

- Refactored onVADInferenceDone() to normalize undefined/null text and apply word count check consistently
- Refactored onEndOfTurn() to use splitWords() for consistent word splitting with onVADInferenceDone()
- Added 23 comprehensive unit tests covering all scenarios: empty strings, undefined, short/long speech, thresholds
- All tests passing (23/23)
- Added SPDX headers to REFACTORING_SUMMARY.md for REUSE compliance
---
 REFACTORING_SUMMARY.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/REFACTORING_SUMMARY.md b/REFACTORING_SUMMARY.md
index a7084ed5..25544319 100644
--- a/REFACTORING_SUMMARY.md
+++ b/REFACTORING_SUMMARY.md
@@ -1,3 +1,9 @@
+<!--
+SPDX-FileCopyrightText: 2025 LiveKit, Inc.
+
+SPDX-License-Identifier: Apache-2.0
+-->
+
 # Interruption Detection Refactoring - Summary
 
 ## Overview

From b3e511e42ed9bf9090caa2a90ff39cad001b0492 Mon Sep 17 00:00:00 2001
From: Devesh <deveshrathod047@gmail.com>
Date: Mon, 10 Nov 2025 22:34:30 +0530
Subject: [PATCH 4/9] Delete REFACTORING_SUMMARY.md

---
 REFACTORING_SUMMARY.md | 195 -----------------------------------------
 1 file changed, 195 deletions(-)
 delete mode 100644 REFACTORING_SUMMARY.md

diff --git a/REFACTORING_SUMMARY.md b/REFACTORING_SUMMARY.md
deleted file mode 100644
index 25544319..00000000
--- a/REFACTORING_SUMMARY.md
+++ /dev/null
@@ -1,195 +0,0 @@
-<!--
-SPDX-FileCopyrightText: 2025 LiveKit, Inc.
-
-SPDX-License-Identifier: Apache-2.0
--->
-
-# Interruption Detection Refactoring - Summary
-
-## Overview
-This document describes the refactoring of the interruption detection logic in the LiveKit Agents framework, specifically in the `AgentActivity` class.
-
-## Problem Statement
-Previously, the `minInterruptionWords` check was only applied when the STT text result was non-empty. This created inconsistent behavior:
-- Empty strings and undefined transcripts always allowed interruptions (bypassing word count validation)
-- Only non-empty transcripts were subject to the word count minimum threshold
-- This inconsistency could allow unwanted interruptions from silence or very short utterances
-
-## Solution
-The refactored logic ensures that **all interruptions are filtered based on word count**, including:
-- Empty strings (0 words)
-- Undefined/null transcripts (normalized to 0 words)
-- Short utterances (fewer than `minInterruptionWords`)
-- Exact matches (exactly `minInterruptionWords`)
-- Full speech (more than `minInterruptionWords`)
-
-## Changes Made
-
-### 1. File: `agents/src/voice/agent_activity.ts`
-
-#### Method: `onVADInferenceDone` (lines 613-653)
-**Before:**
-```typescript
-if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
-  const text = this.audioRecognition.currentTranscript;
-  
-  // Only checked if text was truthy
-  if (text && splitWords(text, true).length < this.agentSession.options.minInterruptionWords) {
-    return;
-  }
-}
-```
-
-**After:**
-```typescript
-if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
-  const text = this.audioRecognition.currentTranscript;
-  
-  // Normalize text: convert undefined/null to empty string for consistent word counting
-  const normalizedText = text ?? '';
-  const wordCount = splitWords(normalizedText, true).length;
-  
-  // Only allow interruption if word count meets or exceeds minInterruptionWords
-  if (wordCount < this.agentSession.options.minInterruptionWords) {
-    return;
-  }
-}
-```
-
-**Key Changes:**
-- Removed the `text &&` condition that skipped checking empty strings
-- Added explicit normalization: `text ?? ''` converts undefined/null to empty string
-- Calculate word count on normalized text for all cases
-- Apply the same threshold comparison uniformly
-
-#### Method: `onEndOfTurn` (lines 770-809)
-**Before:**
-```typescript
-if (
-  this.stt &&
-  this.turnDetection !== 'manual' &&
-  this._currentSpeech &&
-  this._currentSpeech.allowInterruptions &&
-  !this._currentSpeech.interrupted &&
-  this.agentSession.options.minInterruptionWords > 0 &&
-  info.newTranscript.split(' ').length < this.agentSession.options.minInterruptionWords
-) {
-  // avoid interruption if the new_transcript is too short
-  this.cancelPreemptiveGeneration();
-  this.logger.info('skipping user input, new_transcript is too short');
-  return false;
-}
-```
-
-**After:**
-```typescript
-if (
-  this.stt &&
-  this.turnDetection !== 'manual' &&
-  this._currentSpeech &&
-  this._currentSpeech.allowInterruptions &&
-  !this._currentSpeech.interrupted &&
-  this.agentSession.options.minInterruptionWords > 0
-) {
-  const wordCount = splitWords(info.newTranscript, true).length;
-  if (wordCount < this.agentSession.options.minInterruptionWords) {
-    // avoid interruption if the new_transcript contains fewer words than minInterruptionWords
-    this.cancelPreemptiveGeneration();
-    this.logger.info(
-      {
-        wordCount,
-        minInterruptionWords: this.agentSession.options.minInterruptionWords,
-      },
-      'skipping user input, word count below minimum interruption threshold',
-    );
-    return false;
-  }
-}
-```
-
-**Key Changes:**
-- Updated to use consistent `splitWords` function (was using `split(' ')` before)
-- Separated the word count check from the condition block for clarity
-- Added detailed logging with word count and threshold values
-- Ensures consistency with `onVADInferenceDone` logic
-
-### 2. File: `agents/src/voice/interruption_detection.test.ts` (NEW)
-Comprehensive unit test suite with 23 tests covering:
-
-#### Word Splitting Tests (8 tests)
-- Empty string handling
-- Single word detection
-- Multiple word counting
-- Punctuation handling
-- Multiple spaces between words
-- Whitespace-only strings
-- Leading/trailing whitespace
-
-#### Interruption Threshold Logic (5 tests)
-- Word count below threshold (should block)
-- Word count at threshold (should allow)
-- Word count above threshold (should allow)
-- Zero threshold behavior (check disabled)
-- High threshold behavior
-
-#### Undefined/Null Handling (4 tests)
-- Undefined normalization
-- Null normalization
-- Empty string preservation
-- Valid string preservation
-
-#### Integration Tests (6 tests)
-- Complete flow for empty string
-- Complete flow for undefined
-- Complete flow for single word
-- Complete flow for exact threshold match
-- Complete flow for exceeding threshold
-- Consistency between `onVADInferenceDone` and `onEndOfTurn`
-
-## Test Results
-```
-✓ |nodejs| agents/src/voice/interruption_detection.test.ts (23 tests) 4ms
-
-Test Files  1 passed (1)
-      Tests  23 passed (23)
-```
-
-All 23 tests pass successfully!
-
-## Impact
-
-### Behavioral Changes
-1. **Empty/Undefined Transcripts**: Now blocked by default when `minInterruptionWords > 0`
-   - Before: Allowed interruption
-   - After: Blocked (0 words < threshold)
-
-2. **Short Utterances**: Consistently blocked based on word count
-   - Before: Only blocked for non-empty strings
-   - After: All utterances checked uniformly
-
-3. **Word Counting Logic**: Now uses `splitWords()` consistently
-   - Before: `onEndOfTurn` used basic `split(' ')`
-   - After: Both methods use `splitWords()` with proper punctuation handling
-
-### Configuration
-- Applications can still disable word count checking by setting `minInterruptionWords: 0`
-- Default value remains `minInterruptionWords: 0` (check disabled by default)
-
-## Benefits
-1. **Consistency**: Uniform behavior across all code paths
-2. **Predictability**: No edge cases where empty speech bypasses word count check
-3. **Robustness**: Explicit normalization prevents undefined/null related issues
-4. **Maintainability**: Clear, well-documented code with comprehensive test coverage
-5. **Logging**: Enhanced debug information for troubleshooting interruption issues
-
-## Migration Guide
-No action required for most users. However, if your application relies on the previous behavior where empty speech could interrupt:
-- Set `minInterruptionWords: 0` explicitly to disable word count checking
-- Or adjust `minInterruptionWords` to accommodate shorter utterances
-
-## Files Modified
-- `agents/src/voice/agent_activity.ts` - Refactored interruption logic
-- `agents/src/voice/interruption_detection.test.ts` - NEW comprehensive test suite
-
-## Branch
-Created on branch: `mini-interruption`

From 9fed052dce8e961da2f5faf76e8e46fb7fe4b953 Mon Sep 17 00:00:00 2001
From: Devesh <deveshrathod047@gmail.com>
Date: Mon, 10 Nov 2025 22:43:51 +0530
Subject: [PATCH 5/9] Delete interruption threshold logic tests

Removed tests for interruption threshold logic.
---
 .../src/voice/interruption_detection.test.ts  | 35 -------------------
 1 file changed, 35 deletions(-)

diff --git a/agents/src/voice/interruption_detection.test.ts b/agents/src/voice/interruption_detection.test.ts
index dd10df9d..335cf07c 100644
--- a/agents/src/voice/interruption_detection.test.ts
+++ b/agents/src/voice/interruption_detection.test.ts
@@ -64,41 +64,6 @@ describe('Interruption Detection - Word Counting', () => {
     });
   });
 
-  describe('Interruption Threshold Logic', () => {
-    it('should block interruption when word count is below threshold', () => {
-      const minInterruptionWords = 2;
-      const wordCount = 1;
-      const shouldBlock = wordCount < minInterruptionWords;
-      expect(shouldBlock).toBe(true);
-    });
-
-    it('should allow interruption when word count meets threshold', () => {
-      const minInterruptionWords = 2;
-      const wordCount = 2;
-      const shouldBlock = wordCount < minInterruptionWords;
-      expect(shouldBlock).toBe(false);
-    });
-
-    it('should allow interruption when word count exceeds threshold', () => {
-      const minInterruptionWords = 2;
-      const wordCount = 6;
-      const shouldBlock = wordCount < minInterruptionWords;
-      expect(shouldBlock).toBe(false);
-    });
-
-    it('should skip word count check when minInterruptionWords is 0', () => {
-      const minInterruptionWords = 0;
-      const shouldPerformCheck = minInterruptionWords > 0;
-      expect(shouldPerformCheck).toBe(false);
-    });
-
-    it('should respect high minInterruptionWords threshold', () => {
-      const minInterruptionWords = 5;
-      const wordCount = 2;
-      const shouldBlock = wordCount < minInterruptionWords;
-      expect(shouldBlock).toBe(true);
-    });
-  });
 
   describe('Undefined and Null Handling', () => {
     it('should normalize undefined to empty string', () => {

From 1808876a2c7ec828a82f03fcac9550b6e0397981 Mon Sep 17 00:00:00 2001
From: Devesh <deveshrathod047@gmail.com>
Date: Mon, 10 Nov 2025 22:46:16 +0530
Subject: [PATCH 6/9] Remove undefined and null handling tests

Removed tests for undefined and null handling normalization.
---
 .../src/voice/interruption_detection.test.ts  | 26 -------------------
 1 file changed, 26 deletions(-)

diff --git a/agents/src/voice/interruption_detection.test.ts b/agents/src/voice/interruption_detection.test.ts
index 335cf07c..bf32f38c 100644
--- a/agents/src/voice/interruption_detection.test.ts
+++ b/agents/src/voice/interruption_detection.test.ts
@@ -65,32 +65,6 @@ describe('Interruption Detection - Word Counting', () => {
   });
 
 
-  describe('Undefined and Null Handling', () => {
-    it('should normalize undefined to empty string', () => {
-      const text: string | undefined = undefined;
-      const normalizedText = text ?? '';
-      expect(normalizedText).toBe('');
-    });
-
-    it('should normalize null to empty string', () => {
-      const text: string | null = null;
-      const normalizedText = text ?? '';
-      expect(normalizedText).toBe('');
-    });
-
-    it('should preserve empty string during normalization', () => {
-      const text = '';
-      const normalizedText = text ?? '';
-      expect(normalizedText).toBe('');
-    });
-
-    it('should preserve valid string during normalization', () => {
-      const text = 'hello';
-      const normalizedText = text ?? '';
-      expect(normalizedText).toBe('hello');
-    });
-  });
-
   describe('Integration: Full Interruption Check Logic', () => {
     it('should block interruption for empty transcript with threshold 2', () => {
       const text = '';

From 75e094082c961afd1a731183c28c6bc2f6a93a2e Mon Sep 17 00:00:00 2001
From: Devesh36 <142524747+Devesh36@users.noreply.github.com>
Date: Tue, 11 Nov 2025 18:17:36 +0530
Subject: [PATCH 7/9] refactor: remove unnecessary test in interruption
 detection tests

---
 agents/src/voice/interruption_detection.test.ts | 1 -
 1 file changed, 1 deletion(-)

diff --git a/agents/src/voice/interruption_detection.test.ts b/agents/src/voice/interruption_detection.test.ts
index bf32f38c..63d3ec9a 100644
--- a/agents/src/voice/interruption_detection.test.ts
+++ b/agents/src/voice/interruption_detection.test.ts
@@ -64,7 +64,6 @@ describe('Interruption Detection - Word Counting', () => {
     });
   });
 
-
   describe('Integration: Full Interruption Check Logic', () => {
     it('should block interruption for empty transcript with threshold 2', () => {
       const text = '';

From fa61663ca99b6210c3d8d2b2ca4214337f594780 Mon Sep 17 00:00:00 2001
From: Devesh36 <142524747+Devesh36@users.noreply.github.com>
Date: Wed, 12 Nov 2025 18:28:27 +0530
Subject: [PATCH 8/9] refactor: implement persistent WebSocket connection for
 ElevenLabs TTS

- Replace per-call WebSocket creation with single persistent connection
- Implement WebSocketManager class for multi-stream API support
- Add context-based routing for multiplexed concurrent requests
- Implement async send/recv loops for efficient message handling
- Add graceful connection draining and lifecycle management
- Port Python _Connection class pattern to TypeScript

This allows multiple synthesize() calls to reuse the same WebSocket connection,
reducing latency and resource overhead.
---
 plugins/elevenlabs/src/tts.ts | 649 ++++++++++++++++++++++++++--------
 1 file changed, 503 insertions(+), 146 deletions(-)

diff --git a/plugins/elevenlabs/src/tts.ts b/plugins/elevenlabs/src/tts.ts
index 31793f98..6951a92c 100644
--- a/plugins/elevenlabs/src/tts.ts
+++ b/plugins/elevenlabs/src/tts.ts
@@ -1,6 +1,17 @@
 // SPDX-FileCopyrightText: 2024 LiveKit, Inc.
 //
 // SPDX-License-Identifier: Apache-2.0
+
+/**
+ * REFACTORED: Persistent WebSocket Connection for ElevenLabs TTS
+ *
+ * Key improvements:
+ * - Single persistent WebSocket per TTS instance (multi-stream API)
+ * - Multiple TTS requests multiplexed via context IDs
+ * - Efficient send/recv loops with proper lifecycle management
+ * - Graceful connection draining when connection is replaced
+ */
+
 import {
   AsyncIterableQueue,
   AudioByteStream,
@@ -10,11 +21,11 @@ import {
   tts,
 } from '@livekit/agents';
 import type { AudioFrame } from '@livekit/rtc-node';
-import { URL } from 'node:url';
 import { type RawData, WebSocket } from 'ws';
 import type { TTSEncoding, TTSModels } from './models.js';
 
 const DEFAULT_INACTIVITY_TIMEOUT = 300;
+const AUTHORIZATION_HEADER = 'xi-api-key';
 
 type Voice = {
   id: string;
@@ -43,7 +54,7 @@ const DEFAULT_VOICE: Voice = {
 };
 
 const API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';
-const AUTHORIZATION_HEADER = 'xi-api-key';
+
 
 export interface TTSOptions {
   apiKey?: string;
@@ -73,8 +84,378 @@ const defaultTTSOptions: TTSOptions = {
   syncAlignment: true,
 };
 
+// ============================================================================
+// WebSocket Connection Manager - Manages persistent connection with multi-stream support
+// ============================================================================
+
+interface StreamContext {
+  contextId: string;
+  eos: boolean;
+  audioBuffer: Int8Array[];
+}
+
+interface SynthesizeContent {
+  type: 'synthesize';
+  contextId: string;
+  text: string;
+  flush: boolean;
+}
+
+interface CloseContext {
+  type: 'close';
+  contextId: string;
+}
+
+type QueueMessage = SynthesizeContent | CloseContext;
+
+/**
+ * Manages a single persistent WebSocket connection for multi-stream TTS.
+ * Allows multiple synthesize requests to share one connection via context IDs.
+ */
+class WebSocketManager {
+  private ws: WebSocket | null = null;
+  private opts: TTSOptions;
+  private logger = log();
+  private inputQueue = new AsyncIterableQueue<QueueMessage>();
+  private contextData = new Map<string, StreamContext>();
+  private activeContexts = new Set<string>();
+  private sendTask: Promise<void> | null = null;
+  private recvTask: Promise<void> | null = null;
+  private closed = false;
+  private isCurrent = true;
+
+  constructor(opts: TTSOptions) {
+    this.opts = opts;
+  }
+
+  async connect(): Promise<void> {
+    if (this.ws || this.closed) {
+      return;
+    }
+
+    const url = this.buildMultiStreamUrl();
+    const headers = {
+      [AUTHORIZATION_HEADER]: this.opts.apiKey!,
+    };
+
+    this.ws = new WebSocket(url, { headers });
+
+    // Wait for connection to open
+    await new Promise<void>((resolve, reject) => {
+      if (!this.ws) {
+        reject(new Error('WebSocket not initialized'));
+        return;
+      }
+
+      const ws = this.ws;
+      let resolved = false;
+
+      const openHandler = () => {
+        resolved = true;
+        ws.removeListener('open', openHandler);
+        ws.removeListener('error', errorHandler);
+        resolve();
+      };
+
+      const errorHandler = (error: Error) => {
+        if (!resolved) {
+          ws.removeListener('open', openHandler);
+          reject(new Error(`WebSocket connection failed: ${error.message}`));
+        }
+      };
+
+      ws.on('open', openHandler);
+      ws.on('error', errorHandler);
+    });
+
+    // Start send and recv loops
+    this.sendTask = this.sendLoop();
+    this.recvTask = this.recvLoop();
+  }
+
+  registerContext(contextId: string): void {
+    if (!this.contextData.has(contextId)) {
+      this.contextData.set(contextId, {
+        contextId,
+        eos: false,
+        audioBuffer: [],
+      });
+    }
+  }
+
+  sendContent(contextId: string, text: string, flush: boolean = false): void {
+    if (this.closed || !this.ws || this.ws.readyState !== 1) {
+      throw new Error('WebSocket connection is closed');
+    }
+
+    this.inputQueue.put({
+      type: 'synthesize',
+      contextId,
+      text,
+      flush,
+    });
+  }
+
+  closeContext(contextId: string): void {
+    if (this.closed || !this.ws || this.ws.readyState !== 1) {
+      throw new Error('WebSocket connection is closed');
+    }
+
+    this.inputQueue.put({
+      type: 'close',
+      contextId,
+    });
+  }
+
+  getContextAudio(contextId: string): Int8Array[] | null {
+    return this.contextData.get(contextId)?.audioBuffer ?? null;
+  }
+
+  isContextEOS(contextId: string): boolean {
+    return this.contextData.get(contextId)?.eos ?? false;
+  }
+
+  markNonCurrent(): void {
+    this.isCurrent = false;
+  }
+
+  get isClosed(): boolean {
+    return this.closed;
+  }
+
+  async close(): Promise<void> {
+    if (this.closed) {
+      return;
+    }
+
+    this.closed = true;
+    this.inputQueue.close();
+
+    this.contextData.clear();
+    this.activeContexts.clear();
+
+    if (this.ws) {
+      this.ws.close();
+      this.ws = null;
+    }
+
+    if (this.sendTask) {
+      try {
+        await this.sendTask;
+      } catch {
+        // Expected when queue closes
+      }
+    }
+
+    if (this.recvTask) {
+      try {
+        await this.recvTask;
+      } catch {
+        // Expected when connection closes
+      }
+    }
+  }
+
+  private buildMultiStreamUrl(): string {
+    const baseURL = this.opts.baseURL
+      .replace('https://', 'wss://')
+      .replace('http://', 'ws://')
+      .replace(/\/$/, '');
+
+    const voiceId = this.opts.voice.id;
+    let urlStr = `${baseURL}/text-to-speech/${voiceId}/multi-stream-input?`;
+
+    const params: string[] = [];
+    params.push(`model_id=${this.opts.modelID}`);
+    params.push(`output_format=${this.opts.encoding}`);
+    params.push(`enable_ssml_parsing=${this.opts.enableSsmlParsing}`);
+    params.push(`sync_alignment=${this.opts.syncAlignment}`);
+    params.push(`inactivity_timeout=${this.opts.inactivityTimeout}`);
+
+    if (this.opts.streamingLatency !== undefined) {
+      params.push(`optimize_streaming_latency=${this.opts.streamingLatency}`);
+    }
+
+    if (this.opts.autoMode !== undefined) {
+      params.push(`auto_mode=${this.opts.autoMode}`);
+    }
+
+    if (this.opts.languageCode) {
+      params.push(`language_code=${this.opts.languageCode}`);
+    }
+
+    urlStr += params.join('&');
+    return urlStr;
+  }
+
+  private async sendLoop(): Promise<void> {
+    try {
+      for await (const msg of this.inputQueue) {
+        if (!this.ws || this.ws.readyState !== 1) {
+          break;
+        }
+
+        if (msg.type === 'synthesize') {
+          const isNewContext = !this.activeContexts.has(msg.contextId);
+
+          // If not current and new context, ignore (connection is draining)
+          if (!this.isCurrent && isNewContext) {
+            continue;
+          }
+
+          if (isNewContext) {
+            const voiceSettings = this.opts.voice.settings || {};
+            const initPkt = {
+              text: ' ',
+              voice_settings: voiceSettings,
+              context_id: msg.contextId,
+              ...(this.opts.chunkLengthSchedule && {
+                generation_config: {
+                  chunk_length_schedule: this.opts.chunkLengthSchedule,
+                },
+              }),
+            };
+
+            this.ws.send(JSON.stringify(initPkt));
+            this.activeContexts.add(msg.contextId);
+          }
+
+          const textPkt = {
+            text: msg.text + ' ',
+            context_id: msg.contextId,
+          };
+
+          this.ws.send(JSON.stringify(textPkt));
+
+          if (msg.flush) {
+            const flushPkt = {
+              text: '',
+              context_id: msg.contextId,
+            };
+            this.ws.send(JSON.stringify(flushPkt));
+          }
+        } else if (msg.type === 'close') {
+          if (this.activeContexts.has(msg.contextId)) {
+            const closePkt = {
+              context_id: msg.contextId,
+              close_context: true,
+            };
+            this.ws.send(JSON.stringify(closePkt));
+            this.activeContexts.delete(msg.contextId);
+          }
+        }
+      }
+    } catch (error) {
+      this.logger.error({ error }, 'Error in send loop');
+    } finally {
+      if (!this.closed) {
+        await this.close();
+      }
+    }
+  }
+
+  private async recvLoop(): Promise<void> {
+    try {
+      while (!this.closed && this.ws && this.ws.readyState === 1) {
+        const msg = await new Promise<RawData>((resolve, reject) => {
+          if (!this.ws) {
+            reject(new Error('WebSocket not available'));
+            return;
+          }
+
+          const ws = this.ws;
+          let resolved = false;
+
+          const messageHandler = (data: RawData) => {
+            if (!resolved) {
+              resolved = true;
+              ws.removeListener('message', messageHandler);
+              ws.removeListener('close', closeHandler);
+              ws.removeListener('error', errorHandler);
+              resolve(data);
+            }
+          };
+
+          const closeHandler = () => {
+            if (!resolved) {
+              resolved = true;
+              ws.removeListener('message', messageHandler);
+              ws.removeListener('error', errorHandler);
+              reject(new Error('WebSocket closed'));
+            }
+          };
+
+          const errorHandler = (error: Error) => {
+            if (!resolved) {
+              resolved = true;
+              ws.removeListener('message', messageHandler);
+              ws.removeListener('close', closeHandler);
+              reject(error);
+            }
+          };
+
+          ws.on('message', messageHandler);
+          ws.on('close', closeHandler);
+          ws.on('error', errorHandler);
+        });
+
+        try {
+          const data = JSON.parse(msg.toString()) as Record<string, unknown>;
+          const contextId = (data.contextId || data.context_id) as string | undefined;
+
+          if (!contextId || !this.contextData.has(contextId)) {
+            continue;
+          }
+
+          const context = this.contextData.get(contextId)!;
+
+          if (data.error) {
+            this.logger.error({ contextId, error: data.error }, 'ElevenLabs error');
+            this.contextData.delete(contextId);
+            continue;
+          }
+
+          if (data.audio) {
+            const audioBuffer = Buffer.from(data.audio as string, 'base64');
+            const audioArray = new Int8Array(audioBuffer);
+            context.audioBuffer.push(audioArray);
+          }
+
+          if (data.isFinal) {
+            context.eos = true;
+            this.activeContexts.delete(contextId);
+
+            if (!this.isCurrent && this.activeContexts.size === 0) {
+              this.logger.debug('No active contexts, shutting down');
+              break;
+            }
+          }
+        } catch (parseError) {
+          this.logger.warn({ parseError }, 'Failed to parse message');
+        }
+      }
+    } catch (error) {
+      this.logger.error({ error }, 'Recv loop error');
+      for (const context of this.contextData.values()) {
+        context.eos = true;
+      }
+    } finally {
+      if (!this.closed) {
+        await this.close();
+      }
+    }
+  }
+}
+
+// ============================================================================
+// TTS Implementation
+// ============================================================================
+
 export class TTS extends tts.TTS {
   #opts: TTSOptions;
+  #logger = log();
+  #connection: WebSocketManager | null = null;
+  #connectionLock: Promise<void> | null = null;
   label = 'elevenlabs.TTS';
 
   constructor(opts: Partial<TTSOptions> = {}) {
@@ -117,6 +498,38 @@ export class TTS extends tts.TTS {
       });
   }
 
+  async getCurrentConnection(): Promise<WebSocketManager> {
+    // Wait for any ongoing connection attempt
+    if (this.#connectionLock) {
+      await this.#connectionLock;
+      if (this.#connection && !this.#connection.isClosed) {
+        return this.#connection;
+      }
+    }
+
+    // Create new lock for this connection attempt
+    const newConnectionLock = (async () => {
+      // Mark old connection as non-current if it exists
+      if (this.#connection && !this.#connection.isClosed) {
+        this.#connection.markNonCurrent();
+      }
+
+      // Create and connect new manager
+      const manager = new WebSocketManager(this.#opts);
+      await manager.connect();
+      this.#connection = manager;
+    })();
+
+    this.#connectionLock = newConnectionLock;
+    try {
+      await newConnectionLock;
+    } finally {
+      this.#connectionLock = null;
+    }
+
+    return this.#connection!;
+  }
+
   synthesize(): tts.ChunkedStream {
     throw new Error('Chunked responses are not supported on ElevenLabs TTS');
   }
@@ -124,123 +537,86 @@ export class TTS extends tts.TTS {
   stream(): tts.SynthesizeStream {
     return new SynthesizeStream(this, this.#opts);
   }
+
+  async aclose(): Promise<void> {
+    if (this.#connection) {
+      await this.#connection.close();
+      this.#connection = null;
+    }
+  }
 }
 
 export class SynthesizeStream extends tts.SynthesizeStream {
   #opts: TTSOptions;
   #logger = log();
+  #tts: TTS;
+  #contextId: string;
+  #connection: WebSocketManager | null = null;
   label = 'elevenlabs.SynthesizeStream';
-  readonly streamURL: URL;
 
   constructor(tts: TTS, opts: TTSOptions) {
     super(tts);
+    this.#tts = tts;
     this.#opts = opts;
-    this.closed = false;
-
-    // add trailing slash to URL if needed
-    const baseURL = opts.baseURL + (opts.baseURL.endsWith('/') ? '' : '/');
-
-    this.streamURL = new URL(`text-to-speech/${opts.voice.id}/stream-input`, baseURL);
-    const params = {
-      model_id: opts.modelID,
-      output_format: opts.encoding,
-      enable_ssml_parsing: `${opts.enableSsmlParsing}`,
-      sync_alignment: `${opts.syncAlignment}`,
-      ...(opts.autoMode !== undefined && { auto_mode: `${opts.autoMode}` }),
-      ...(opts.languageCode && { language_code: opts.languageCode }),
-      ...(opts.inactivityTimeout && { inactivity_timeout: `${opts.inactivityTimeout}` }),
-      ...(opts.streamingLatency && { optimize_streaming_latency: `${opts.streamingLatency}` }),
-    };
-    Object.entries(params).forEach(([k, v]) => this.streamURL.searchParams.append(k, v));
-    this.streamURL.protocol = this.streamURL.protocol.replace('http', 'ws');
+    this.#contextId = shortuuid();
   }
 
   protected async run() {
-    const segments = new AsyncIterableQueue<tokenize.WordStream>();
-
-    const tokenizeInput = async () => {
-      let stream: tokenize.WordStream | null = null;
-      for await (const text of this.input) {
-        if (this.abortController.signal.aborted) {
-          break;
+    try {
+      // Get persistent connection
+      this.#connection = await this.#tts.getCurrentConnection();
+      this.#connection.registerContext(this.#contextId);
+
+      const segments = new AsyncIterableQueue<tokenize.WordStream>();
+
+      const tokenizeInput = async () => {
+        let stream: tokenize.WordStream | null = null;
+        for await (const text of this.input) {
+          if (this.abortController.signal.aborted) {
+            break;
+          }
+          if (text === SynthesizeStream.FLUSH_SENTINEL) {
+            stream?.endInput();
+            stream = null;
+          } else {
+            if (!stream) {
+              stream = this.#opts.wordTokenizer.stream();
+              segments.put(stream);
+            }
+            stream.pushText(text);
+          }
         }
-        if (text === SynthesizeStream.FLUSH_SENTINEL) {
-          stream?.endInput();
-          stream = null;
-        } else {
-          if (!stream) {
-            stream = this.#opts.wordTokenizer.stream();
-            segments.put(stream);
+        segments.close();
+      };
+
+      const runStream = async () => {
+        for await (const stream of segments) {
+          if (this.abortController.signal.aborted) {
+            break;
           }
-          stream.pushText(text);
+          await this.runSynthesis(stream);
+          this.queue.put(SynthesizeStream.END_OF_STREAM);
         }
-      }
-      segments.close();
-    };
+      };
 
-    const runStream = async () => {
-      for await (const stream of segments) {
-        if (this.abortController.signal.aborted) {
-          break;
+      await Promise.all([tokenizeInput(), runStream()]);
+    } finally {
+      if (this.#connection) {
+        try {
+          this.#connection.closeContext(this.#contextId);
+        } catch {
+          // Connection may be closed
         }
-        await this.#runWS(stream);
-        this.queue.put(SynthesizeStream.END_OF_STREAM);
       }
-    };
-
-    await Promise.all([tokenizeInput(), runStream()]);
+    }
   }
 
-  async #runWS(stream: tokenize.WordStream, maxRetry = 3) {
-    let retries = 0;
-    let ws: WebSocket;
-    while (true) {
-      ws = new WebSocket(this.streamURL, {
-        headers: { [AUTHORIZATION_HEADER]: this.#opts.apiKey },
-      });
-
-      ws.on('error', (error) => {
-        this.abortController.abort();
-        this.#logger.error({ error }, 'Error connecting to ElevenLabs');
-      });
-
-      try {
-        await new Promise((resolve, reject) => {
-          ws.on('open', resolve);
-          ws.on('error', (error) => reject(error));
-          ws.on('close', (code) => reject(`WebSocket returned ${code}`));
-        });
-        break;
-      } catch (e) {
-        if (retries >= maxRetry) {
-          throw new Error(`failed to connect to ElevenLabs after ${retries} attempts: ${e}`);
-        }
-
-        const delay = Math.min(retries * 5, 5);
-        retries++;
-
-        this.#logger.warn(
-          `failed to connect to ElevenLabs, retrying in ${delay} seconds: ${e} (${retries}/${maxRetry})`,
-        );
-        await new Promise((resolve) => setTimeout(resolve, delay * 1000));
-      }
+  private async runSynthesis(stream: tokenize.WordStream): Promise<void> {
+    if (!this.#connection) {
+      throw new Error('Connection not established');
     }
 
-    const requestId = shortuuid();
-    const segmentId = shortuuid();
-
-    ws.send(
-      JSON.stringify({
-        text: ' ',
-        voice_settings: this.#opts.voice.settings,
-        ...(this.#opts.chunkLengthSchedule && {
-          generation_config: {
-            chunk_length_schedule: this.#opts.chunkLengthSchedule,
-          },
-        }),
-      }),
-    );
-    let eosSent = false;
+    const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);
 
     const sendTask = async () => {
       let xmlContent: string[] = [];
@@ -260,74 +636,55 @@ export class SynthesizeStream extends tts.SynthesizeStream {
           }
         }
 
-        ws.send(JSON.stringify({ text: text + ' ' })); //  must always end with a space
+        this.#connection!.sendContent(this.#contextId, text, false);
       }
 
       if (xmlContent.length) {
-        this.#logger.warn('ElevenLabs stream ended with incomplete XML content');
+        this.#logger.warn('Stream ended with incomplete XML content');
       }
 
-      // no more tokens, mark eos
-      ws.send(JSON.stringify({ text: '' }));
-      eosSent = true;
+      // Signal end of stream
+      this.#connection!.sendContent(this.#contextId, '', true);
     };
 
     let lastFrame: AudioFrame | undefined;
     const sendLastFrame = (segmentId: string, final: boolean) => {
       if (lastFrame) {
-        this.queue.put({ requestId, segmentId, frame: lastFrame, final });
+        this.queue.put({
+          requestId: this.#contextId,
+          segmentId,
+          frame: lastFrame,
+          final,
+        });
         lastFrame = undefined;
       }
     };
 
     const listenTask = async () => {
-      let finalReceived = false;
-      const bstream = new AudioByteStream(sampleRateFromFormat(this.#opts.encoding), 1);
-      while (!this.closed && !this.abortController.signal.aborted) {
-        try {
-          await new Promise<RawData>((resolve, reject) => {
-            ws.removeAllListeners();
-            ws.on('message', (data) => resolve(data));
-            ws.on('close', (code, reason) => {
-              if (!eosSent) {
-                this.#logger.error(`WebSocket closed with code ${code}: ${reason}`);
-              }
-              if (!finalReceived) {
-                reject(new Error('WebSocket closed'));
-              }
-            });
-          }).then((msg) => {
-            const json = JSON.parse(msg.toString());
-            // remove the "audio" field from the json object when printing
-            if ('audio' in json && json.audio !== null) {
-              const data = new Int8Array(Buffer.from(json.audio, 'base64'));
-              for (const frame of bstream.write(data)) {
-                sendLastFrame(segmentId, false);
-                lastFrame = frame;
-              }
-            } else if (json.isFinal) {
-              finalReceived = true;
-              for (const frame of bstream.flush()) {
-                sendLastFrame(segmentId, false);
-                lastFrame = frame;
-              }
-              sendLastFrame(segmentId, true);
-              this.queue.put(SynthesizeStream.END_OF_STREAM);
-
-              if (segmentId === requestId || this.abortController.signal.aborted) {
-                ws.close();
-                return;
-              }
-            }
-          });
-        } catch (err) {
-          // skip log error for normal websocket close
-          if (err instanceof Error && !err.message.includes('WebSocket closed')) {
-            this.#logger.error({ err }, 'Error in listenTask from ElevenLabs WebSocket');
+      // Wait for EOS and collect audio
+      while (!this.#connection!.isContextEOS(this.#contextId)) {
+        await new Promise((resolve) => setTimeout(resolve, 10));
+      }
+
+      // Get all audio buffers and process
+      const audioBuffers = this.#connection!.getContextAudio(this.#contextId);
+      if (audioBuffers) {
+        for (const buffer of audioBuffers) {
+          for (const frame of bstream.write(buffer)) {
+            sendLastFrame(this.#contextId, false);
+            lastFrame = frame;
           }
-          break;
         }
       }
+
+      // Flush remaining frames
+      for (const frame of bstream.flush()) {
+        sendLastFrame(this.#contextId, false);
+        lastFrame = frame;
+      }
+
+      sendLastFrame(this.#contextId, true);
+      this.queue.put(SynthesizeStream.END_OF_STREAM);
     };
 
     await Promise.all([sendTask(), listenTask()]);

From 72f6f3142ffbf08a7abacc74f7601ec738a50a9d Mon Sep 17 00:00:00 2001
From: Devesh36 <142524747+Devesh36@users.noreply.github.com>
Date: Wed, 12 Nov 2025 18:36:29 +0530
Subject: [PATCH 9/9] refactor: implement persistent WebSocket connection for
 ElevenLabs TTS

---
 plugins/elevenlabs/src/tts.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/plugins/elevenlabs/src/tts.ts b/plugins/elevenlabs/src/tts.ts
index 6951a92c..34e905b7 100644
--- a/plugins/elevenlabs/src/tts.ts
+++ b/plugins/elevenlabs/src/tts.ts
@@ -11,7 +11,6 @@
  * - Efficient send/recv loops with proper lifecycle management
  * - Graceful connection draining when connection is replaced
  */
-
 import {
   AsyncIterableQueue,
   AudioByteStream,
@@ -55,7 +54,6 @@ const DEFAULT_VOICE: Voice = {
 
 const API_BASE_URL_V1 = 'https://api.elevenlabs.io/v1/';
 
-
 export interface TTSOptions {
   apiKey?: string;
   voice: Voice;