|
1 | 1 | """ |
2 | 2 | STT (Speech-to-Text) service factory |
3 | 3 |
|
4 | | -Supports multiple providers: Deepgram, AssemblyAI, Whisper, Google, Azure |
| 4 | +Supports multiple providers: Deepgram, Sarvam, ElevenLabs |
5 | 5 | """ |
6 | 6 |
|
7 | 7 | from typing import Dict, Any |
|
10 | 10 | # Pipecat STT services |
11 | 11 | from pipecat.services.deepgram.stt import DeepgramSTTService |
12 | 12 | from pipecat.services.sarvam.stt import SarvamSTTService |
| 13 | +from pipecat.services.elevenlabs.stt import ElevenLabsRealtimeSTTService |
13 | 14 |
|
14 | 15 | # Pipecat language enum |
15 | 16 | from pipecat.transcriptions.language import Language |
@@ -57,6 +58,8 @@ def create_stt_service(stt_config: Dict[str, Any]): |
57 | 58 | return STTServiceFactory._create_deepgram_stt(api_key, parameters) |
58 | 59 | elif provider == 'sarvam': |
59 | 60 | return STTServiceFactory._create_sarvam_stt(api_key, parameters) |
| 61 | + elif provider == 'elevenlabs': |
| 62 | + return STTServiceFactory._create_elevenlabs_stt(api_key, parameters) |
60 | 63 | elif provider == 'assemblyai': |
61 | 64 | return STTServiceFactory._create_assemblyai_stt(api_key, parameters) |
62 | 65 | elif provider == 'whisper': |
@@ -162,6 +165,55 @@ def _create_sarvam_stt(api_key: str, parameters: Dict[str, Any]): |
162 | 165 | params=input_params, |
163 | 166 | ) |
164 | 167 |
|
| 168 | + # Mapping of short language codes to ElevenLabs ISO-639-3 language codes |
| 169 | + ELEVENLABS_LANGUAGE_MAP = { |
| 170 | + 'en': 'eng', |
| 171 | + 'hi': 'hin', |
| 172 | + 'ta': 'tam', |
| 173 | + 'te': 'tel', |
| 174 | + 'kn': 'kan', |
| 175 | + 'ml': 'mal', |
| 176 | + 'gu': 'guj', |
| 177 | + 'bn': 'ben', |
| 178 | + 'mr': 'mar', |
| 179 | + 'pa': 'pan', |
| 180 | + 'or': 'ori', |
| 181 | + } |
| 182 | + |
| 183 | + @staticmethod |
| 184 | + def _create_elevenlabs_stt(api_key: str, parameters: Dict[str, Any]): |
| 185 | + """Create ElevenLabs Realtime STT service (WebSocket streaming, scribe_v2_realtime)""" |
| 186 | + params_dict = {} |
| 187 | + |
| 188 | + # Map language code to ElevenLabs ISO-639-3 code |
| 189 | + if 'language' in parameters and parameters['language']: |
| 190 | + lang_code = parameters['language'] |
| 191 | + elevenlabs_lang = STTServiceFactory.ELEVENLABS_LANGUAGE_MAP.get(lang_code) |
| 192 | + if elevenlabs_lang: |
| 193 | + params_dict['language_code'] = elevenlabs_lang |
| 194 | + else: |
| 195 | + logger.warning( |
| 196 | + f"Unknown ElevenLabs language '{lang_code}', skipping (auto-detect will be used)" |
| 197 | + ) |
| 198 | + |
| 199 | + model = parameters.get('model', 'scribe_v2_realtime') |
| 200 | + sample_rate = parameters.get('sample_rate', 8000) |
| 201 | + |
| 202 | + input_params = ( |
| 203 | + ElevenLabsRealtimeSTTService.InputParams(**params_dict) |
| 204 | + if params_dict |
| 205 | + else None |
| 206 | + ) |
| 207 | + |
| 208 | + logger.info(f'ElevenLabs STT config: model={model}, sample_rate={sample_rate}') |
| 209 | + |
| 210 | + return ElevenLabsRealtimeSTTService( |
| 211 | + api_key=api_key, |
| 212 | + model=model, |
| 213 | + sample_rate=sample_rate, |
| 214 | + params=input_params, |
| 215 | + ) |
| 216 | + |
165 | 217 | @staticmethod |
166 | 218 | def _create_assemblyai_stt(api_key: str, parameters: Dict[str, Any]): |
167 | 219 | """Create AssemblyAI STT service""" |
|
0 commit comments