Skip to content
37 changes: 29 additions & 8 deletions agents/src/voice/agent_activity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -625,11 +625,21 @@ export class AgentActivity implements RecognitionHooks {
return;
}

// Refactored interruption word count check:
// - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
// - Apply check to all STT results: empty string, undefined, or any length
// - This ensures consistent behavior across all interruption scenarios
if (this.stt && this.agentSession.options.minInterruptionWords > 0 && this.audioRecognition) {
const text = this.audioRecognition.currentTranscript;

// TODO(shubhra): better word splitting for multi-language
if (text && splitWords(text, true).length < this.agentSession.options.minInterruptionWords) {

// Normalize text: convert undefined/null to empty string for consistent word counting
const normalizedText = text ?? '';
const wordCount = splitWords(normalizedText, true).length;

// Only allow interruption if word count meets or exceeds minInterruptionWords
// This applies to all cases: empty strings, partial speech, and full speech
if (wordCount < this.agentSession.options.minInterruptionWords) {
return;
}
}
Expand Down Expand Up @@ -767,19 +777,30 @@ export class AgentActivity implements RecognitionHooks {
return true;
}

// Refactored interruption word count check for consistency with onVADInferenceDone:
// - Always apply minInterruptionWords filtering when STT is available and minInterruptionWords > 0
// - Use consistent word splitting logic with splitWords (matching onVADInferenceDone pattern)
if (
this.stt &&
this.turnDetection !== 'manual' &&
this._currentSpeech &&
this._currentSpeech.allowInterruptions &&
!this._currentSpeech.interrupted &&
this.agentSession.options.minInterruptionWords > 0 &&
info.newTranscript.split(' ').length < this.agentSession.options.minInterruptionWords
this.agentSession.options.minInterruptionWords > 0
) {
// avoid interruption if the new_transcript is too short
this.cancelPreemptiveGeneration();
this.logger.info('skipping user input, new_transcript is too short');
return false;
const wordCount = splitWords(info.newTranscript, true).length;
if (wordCount < this.agentSession.options.minInterruptionWords) {
// avoid interruption if the new_transcript contains fewer words than minInterruptionWords
this.cancelPreemptiveGeneration();
this.logger.info(
{
wordCount,
minInterruptionWords: this.agentSession.options.minInterruptionWords,
},
'skipping user input, word count below minimum interruption threshold',
);
return false;
}
}

const oldTask = this._userTurnCompletedTask;
Expand Down
151 changes: 151 additions & 0 deletions agents/src/voice/interruption_detection.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// SPDX-FileCopyrightText: 2025 LiveKit, Inc.
//
// SPDX-License-Identifier: Apache-2.0

/**
* Unit tests for interruption detection logic in AgentActivity.
*
* Tests the refactored minInterruptionWords check which ensures:
* - Consistent word count filtering across all speech scenarios
* - Proper handling of empty strings, undefined, and short speech
* - Interruptions allowed only when word count meets or exceeds minInterruptionWords threshold
*/
import { describe, expect, it } from 'vitest';
import { splitWords } from '../tokenize/basic/word.js';

describe('Interruption Detection - Word Counting', () => {
describe('Word Splitting Behavior', () => {
it('should count empty string as 0 words', () => {
const text = '';
const wordCount = splitWords(text, true).length;
expect(wordCount).toBe(0);
});

it('should count single word correctly', () => {
const text = 'hello';
const wordCount = splitWords(text, true).length;
expect(wordCount).toBe(1);
});

it('should count two words correctly', () => {
const text = 'hello world';
const wordCount = splitWords(text, true).length;
expect(wordCount).toBe(2);
});

it('should count multiple words correctly', () => {
const text = 'hello this is a full sentence';
const wordCount = splitWords(text, true).length;
expect(wordCount).toBe(6);
});

it('should handle punctuation correctly', () => {
const text = 'hello, world!';
const wordCount = splitWords(text, true).length;
expect(wordCount).toBe(2);
});

it('should handle multiple spaces between words', () => {
const text = 'hello world';
const wordCount = splitWords(text, true).length;
expect(wordCount).toBe(2);
});

it('should count whitespace-only string as 0 words', () => {
const text = ' ';
const wordCount = splitWords(text, true).length;
expect(wordCount).toBe(0);
});

it('should handle leading and trailing whitespace', () => {
const text = ' hello world ';
const wordCount = splitWords(text, true).length;
expect(wordCount).toBe(2);
});
});

describe('Integration: Full Interruption Check Logic', () => {
it('should block interruption for empty transcript with threshold 2', () => {
const text = '';
const minInterruptionWords = 2;

const normalizedText = text ?? '';
const wordCount = splitWords(normalizedText, true).length;
const shouldBlock = wordCount < minInterruptionWords;

expect(normalizedText).toBe('');
expect(wordCount).toBe(0);
expect(shouldBlock).toBe(true);
});

it('should block interruption for undefined transcript with threshold 2', () => {
const text: string | undefined = undefined;
const minInterruptionWords = 2;

const normalizedText = text ?? '';
const wordCount = splitWords(normalizedText, true).length;
const shouldBlock = wordCount < minInterruptionWords;

expect(normalizedText).toBe('');
expect(wordCount).toBe(0);
expect(shouldBlock).toBe(true);
});

it('should block interruption for single word with threshold 2', () => {
const text = 'hello';
const minInterruptionWords = 2;

const normalizedText = text ?? '';
const wordCount = splitWords(normalizedText, true).length;
const shouldBlock = wordCount < minInterruptionWords;

expect(normalizedText).toBe('hello');
expect(wordCount).toBe(1);
expect(shouldBlock).toBe(true);
});

it('should allow interruption when word count exactly meets threshold', () => {
const text = 'hello world';
const minInterruptionWords = 2;

const normalizedText = text ?? '';
const wordCount = splitWords(normalizedText, true).length;
const shouldBlock = wordCount < minInterruptionWords;

expect(normalizedText).toBe('hello world');
expect(wordCount).toBe(2);
expect(shouldBlock).toBe(false);
});

it('should allow interruption when word count exceeds threshold', () => {
const text = 'hello this is a full sentence';
const minInterruptionWords = 2;

const normalizedText = text ?? '';
const wordCount = splitWords(normalizedText, true).length;
const shouldBlock = wordCount < minInterruptionWords;

expect(normalizedText).toBe('hello this is a full sentence');
expect(wordCount).toBe(6);
expect(shouldBlock).toBe(false);
});

it('should apply consistent word counting logic in both methods', () => {
const transcripts = ['', 'hello', 'hello world', 'this is a longer sentence'];
const threshold = 2;

transcripts.forEach((transcript) => {
const text1 = transcript;
const normalizedText1 = text1 ?? '';
const wordCount1 = splitWords(normalizedText1, true).length;
const shouldBlock1 = wordCount1 < threshold;

const wordCount2 = splitWords(transcript, true).length;
const shouldBlock2 = wordCount2 < threshold;

expect(wordCount1).toBe(wordCount2);
expect(shouldBlock1).toBe(shouldBlock2);
});
});
});
});
Loading