diff --git a/agents/src/llm/chat_context.ts b/agents/src/llm/chat_context.ts index 138959490..769b85799 100644 --- a/agents/src/llm/chat_context.ts +++ b/agents/src/llm/chat_context.ts @@ -498,7 +498,6 @@ export class ChatContext { async toProviderFormat(format: ProviderFormat, injectDummyUserMessage: boolean = true) { return await toChatCtx(format, this, injectDummyUserMessage); } - /** * Internal helper used by `truncate` & `addMessage` to find the correct * insertion index for a timestamp so the list remains sorted. diff --git a/plugins/deepgram/src/stt.ts b/plugins/deepgram/src/stt.ts index bc4b1d4a4..612bed173 100644 --- a/plugins/deepgram/src/stt.ts +++ b/plugins/deepgram/src/stt.ts @@ -35,6 +35,13 @@ export interface STTOptions { dictation: boolean; diarize: boolean; numerals: boolean; + /** + * Enable eager end-of-turn detection for preemptive generation. + * When set to a value between 0.3-0.9, Deepgram will emit EagerEndOfTurn events + * when it detects a pause in speech, allowing the agent to start generating responses + * preemptively. + */ + eagerEotThreshold?: number; } const defaultSTTOptions: STTOptions = { @@ -161,6 +168,7 @@ export class SpeechStream extends stt.SpeechStream { keyterm: this.#opts.keyterm, profanity_filter: this.#opts.profanityFilter, language: this.#opts.language, + eager_eot_threshold: this.#opts.eagerEotThreshold, }; Object.entries(params).forEach(([k, v]) => { if (v !== undefined) { @@ -326,6 +334,29 @@ export class SpeechStream extends stt.SpeechStream { break; } + case 'EagerEndOfTurn': { + // Deepgram has detected a pause in speech, but the user is technically + // still speaking. Send a preflight event to enable preemptive generation. + const metadata = json['metadata']; + const requestId = metadata['request_id']; + this.#requestId = requestId; + + const alternatives = liveTranscriptionToSpeechData(this.#opts.language!, json); + + if (alternatives[0] && alternatives[0].text) { + this.#logger.debug( + { transcript: alternatives[0].text, confidence: alternatives[0].confidence }, + 'received eager end-of-turn event', + ); + + this.queue.put({ + type: stt.SpeechEventType.PREFLIGHT_TRANSCRIPT, + alternatives: [alternatives[0], ...alternatives.slice(1)], + }); + } + + break; + } case 'Metadata': { break; }