Skip to content

Commit 018d61d

Browse files
added elevenlabs stt support (#222)
* added elevenlabs stt support * review comment fix
1 parent 35d68f8 commit 018d61d

6 files changed

Lines changed: 94 additions & 6 deletions

File tree

wavefront/client/src/config/voice-providers.ts

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,7 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = {
220220
},
221221
},
222222
stt: {
223-
providers: ['deepgram', 'sarvam'] as const,
223+
providers: ['deepgram', 'sarvam', 'elevenlabs'] as const,
224224
configs: {
225225
deepgram: {
226226
name: 'Deepgram',
@@ -325,6 +325,33 @@ export const VOICE_PROVIDERS_CONFIG: VoiceProvidersConfig = {
325325
},
326326
},
327327
},
328+
elevenlabs: {
329+
name: 'ElevenLabs',
330+
badge: {
331+
bg: 'bg-purple-100',
332+
text: 'text-purple-800',
333+
},
334+
parameters: {
335+
model: {
336+
type: 'string',
337+
default: 'scribe_v2_realtime',
338+
description: 'ElevenLabs STT model',
339+
options: ['scribe_v2_realtime'],
340+
},
341+
language: {
342+
type: 'string',
343+
default: '',
344+
description: 'Language code (ISO-639-1, e.g., en, hi)',
345+
placeholder: 'en',
346+
},
347+
sample_rate: {
348+
type: 'number',
349+
default: 8000,
350+
description: 'Audio sample rate in Hz',
351+
placeholder: '8000',
352+
},
353+
},
354+
},
328355
},
329356
},
330357
};

wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/CreateSttConfigDialog.tsx

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import { Input } from '@app/components/ui/input';
2222
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from '@app/components/ui/select';
2323
import { Textarea } from '@app/components/ui/textarea';
2424
import { VOICE_PROVIDERS_CONFIG, getProviderConfig } from '@app/config/voice-providers';
25+
import { SttProvider } from '@app/types/stt-config';
2526
import { extractErrorMessage } from '@app/lib/utils';
2627
import { useNotifyStore } from '@app/store';
2728
import { zodResolver } from '@hookform/resolvers/zod';
@@ -32,7 +33,7 @@ import { z } from 'zod';
3233
const createSttConfigSchema = z.object({
3334
display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'),
3435
description: z.string().max(500, 'Description must be 500 characters or less').optional(),
35-
provider: z.enum(['deepgram', 'sarvam'] as [string, ...string[]]),
36+
provider: z.enum(['deepgram', 'sarvam', 'elevenlabs'] as [string, ...string[]]),
3637
api_key: z.string().min(1, 'API key is required'),
3738
});
3839

@@ -76,7 +77,7 @@ const CreateSttConfigDialog: React.FC<CreateSttConfigDialogProps> = ({ isOpen, o
7677
await floConsoleService.sttConfigService.createSttConfig({
7778
display_name: data.display_name.trim(),
7879
description: data.description?.trim() || null,
79-
provider: data.provider as 'deepgram',
80+
provider: data.provider as SttProvider,
8081
api_key: data.api_key.trim(),
8182
});
8283
notifySuccess('STT configuration created successfully');

wavefront/client/src/pages/apps/[appId]/voice-agents/stt-configs/EditSttConfigDialog.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import { z } from 'zod';
3333
const updateSttConfigSchema = z.object({
3434
display_name: z.string().min(1, 'Display name is required').max(100, 'Display name must be 100 characters or less'),
3535
description: z.string().max(500, 'Description must be 500 characters or less').optional(),
36-
provider: z.enum(['deepgram', 'sarvam'] as [string, ...string[]]),
36+
provider: z.enum(['deepgram', 'sarvam', 'elevenlabs'] as [string, ...string[]]),
3737
api_key: z.string().optional(),
3838
});
3939

wavefront/client/src/types/stt-config.ts

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { IApiResponse } from '@app/lib/axios';
22

3-
export type SttProvider = 'deepgram' | 'sarvam';
3+
export type SttProvider = 'deepgram' | 'sarvam' | 'elevenlabs';
44

55
export interface SttConfig {
66
id: string;
@@ -60,3 +60,10 @@ export interface SarvamSttParameters {
6060
vad_signals?: boolean;
6161
high_vad_sensitivity?: boolean;
6262
}
63+
64+
// ElevenLabs STT specific parameters
65+
export interface ElevenLabsSttParameters {
66+
model?: string; // default: 'scribe_v2_realtime'
67+
language?: string; // ISO-639-1 code e.g. 'en', 'hi'
68+
sample_rate?: number; // default: 8000
69+
}

wavefront/server/apps/call_processing/call_processing/services/stt_service.py

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""
22
STT (Speech-to-Text) service factory
33
4-
Supports multiple providers: Deepgram, AssemblyAI, Whisper, Google, Azure
4+
Supports multiple providers: Deepgram, Sarvam, ElevenLabs
55
"""
66

77
from typing import Dict, Any
@@ -10,6 +10,7 @@
1010
# Pipecat STT services
1111
from pipecat.services.deepgram.stt import DeepgramSTTService
1212
from pipecat.services.sarvam.stt import SarvamSTTService
13+
from pipecat.services.elevenlabs.stt import ElevenLabsRealtimeSTTService
1314

1415
# Pipecat language enum
1516
from pipecat.transcriptions.language import Language
@@ -57,6 +58,8 @@ def create_stt_service(stt_config: Dict[str, Any]):
5758
return STTServiceFactory._create_deepgram_stt(api_key, parameters)
5859
elif provider == 'sarvam':
5960
return STTServiceFactory._create_sarvam_stt(api_key, parameters)
61+
elif provider == 'elevenlabs':
62+
return STTServiceFactory._create_elevenlabs_stt(api_key, parameters)
6063
elif provider == 'assemblyai':
6164
return STTServiceFactory._create_assemblyai_stt(api_key, parameters)
6265
elif provider == 'whisper':
@@ -162,6 +165,55 @@ def _create_sarvam_stt(api_key: str, parameters: Dict[str, Any]):
162165
params=input_params,
163166
)
164167

168+
# Mapping of short language codes to ElevenLabs ISO-639-3 language codes
169+
ELEVENLABS_LANGUAGE_MAP = {
170+
'en': 'eng',
171+
'hi': 'hin',
172+
'ta': 'tam',
173+
'te': 'tel',
174+
'kn': 'kan',
175+
'ml': 'mal',
176+
'gu': 'guj',
177+
'bn': 'ben',
178+
'mr': 'mar',
179+
'pa': 'pan',
180+
'or': 'ori',
181+
}
182+
183+
@staticmethod
184+
def _create_elevenlabs_stt(api_key: str, parameters: Dict[str, Any]):
185+
"""Create ElevenLabs Realtime STT service (WebSocket streaming, scribe_v2_realtime)"""
186+
params_dict = {}
187+
188+
# Map language code to ElevenLabs ISO-639-3 code
189+
if 'language' in parameters and parameters['language']:
190+
lang_code = parameters['language']
191+
elevenlabs_lang = STTServiceFactory.ELEVENLABS_LANGUAGE_MAP.get(lang_code)
192+
if elevenlabs_lang:
193+
params_dict['language_code'] = elevenlabs_lang
194+
else:
195+
logger.warning(
196+
f"Unknown ElevenLabs language '{lang_code}', skipping (auto-detect will be used)"
197+
)
198+
199+
model = parameters.get('model', 'scribe_v2_realtime')
200+
sample_rate = parameters.get('sample_rate', 8000)
201+
202+
input_params = (
203+
ElevenLabsRealtimeSTTService.InputParams(**params_dict)
204+
if params_dict
205+
else None
206+
)
207+
208+
logger.info(f'ElevenLabs STT config: model={model}, sample_rate={sample_rate}')
209+
210+
return ElevenLabsRealtimeSTTService(
211+
api_key=api_key,
212+
model=model,
213+
sample_rate=sample_rate,
214+
params=input_params,
215+
)
216+
165217
@staticmethod
166218
def _create_assemblyai_stt(api_key: str, parameters: Dict[str, Any]):
167219
"""Create AssemblyAI STT service"""

wavefront/server/modules/voice_agents_module/voice_agents_module/models/stt_schemas.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ class SttProvider(str, Enum):
1515
GOOGLE = 'google'
1616
AZURE = 'azure'
1717
SARVAM = 'sarvam'
18+
ELEVENLABS = 'elevenlabs'
1819

1920

2021
class CreateSttConfigPayload(BaseModel):

0 commit comments

Comments
 (0)