|
11 | 11 | from pipecat.services.elevenlabs.tts import ElevenLabsTTSService |
12 | 12 | from pipecat.services.deepgram.tts import DeepgramTTSService |
13 | 13 | from pipecat.services.cartesia.tts import CartesiaTTSService |
| 14 | +from pipecat.services.sarvam.tts import SarvamTTSService |
14 | 15 |
|
15 | 16 | # Language for params |
16 | 17 | from pipecat.transcriptions.language import Language |
@@ -62,6 +63,8 @@ def create_tts_service(tts_config: Dict[str, Any]): |
62 | 63 | return TTSServiceFactory._create_deepgram_tts(api_key, voice_id, parameters) |
63 | 64 | elif provider == 'cartesia': |
64 | 65 | return TTSServiceFactory._create_cartesia_tts(api_key, voice_id, parameters) |
| 66 | + elif provider == 'sarvam': |
| 67 | + return TTSServiceFactory._create_sarvam_tts(api_key, voice_id, parameters) |
65 | 68 | else: |
66 | 69 | raise ValueError(f'Unsupported TTS provider: {provider}') |
67 | 70 |
|
@@ -162,3 +165,55 @@ def _create_cartesia_tts(api_key: str, voice_id: str, parameters: Dict[str, Any] |
162 | 165 | return CartesiaTTSService( |
163 | 166 | api_key=api_key, voice_id=voice_id, model=model, params=input_params |
164 | 167 | ) |
| 168 | + |
| 169 | + # Mapping of short language codes to pipecat Language enum for Sarvam |
| 170 | + SARVAM_LANGUAGE_MAP = { |
| 171 | + 'bn': Language.BN_IN, |
| 172 | + 'en': Language.EN_IN, |
| 173 | + 'gu': Language.GU_IN, |
| 174 | + 'hi': Language.HI_IN, |
| 175 | + 'kn': Language.KN_IN, |
| 176 | + 'ml': Language.ML_IN, |
| 177 | + 'mr': Language.MR_IN, |
| 178 | + 'or': Language.OR_IN, |
| 179 | + 'pa': Language.PA_IN, |
| 180 | + 'ta': Language.TA_IN, |
| 181 | + 'te': Language.TE_IN, |
| 182 | + } |
| 183 | + |
| 184 | + @staticmethod |
| 185 | + def _create_sarvam_tts(api_key: str, voice_id: str, parameters: Dict[str, Any]): |
| 186 | + """Create Sarvam TTS service (WebSocket-based streaming)""" |
| 187 | + model = parameters.get('model', 'bulbul:v2') |
| 188 | + |
| 189 | + # Build InputParams from the parameters dict |
| 190 | + params_dict = {} |
| 191 | + |
| 192 | + if 'language' in parameters and parameters['language']: |
| 193 | + lang_code = parameters['language'] |
| 194 | + lang_enum = TTSServiceFactory.SARVAM_LANGUAGE_MAP.get(lang_code) |
| 195 | + if lang_enum: |
| 196 | + params_dict['language'] = lang_enum |
| 197 | + else: |
| 198 | + logger.warning(f"Unknown Sarvam language '{lang_code}', skipping") |
| 199 | + |
| 200 | + if 'pitch' in parameters: |
| 201 | + params_dict['pitch'] = parameters['pitch'] |
| 202 | + if 'pace' in parameters: |
| 203 | + params_dict['pace'] = parameters['pace'] |
| 204 | + if 'loudness' in parameters: |
| 205 | + params_dict['loudness'] = parameters['loudness'] |
| 206 | + if 'enable_preprocessing' in parameters: |
| 207 | + params_dict['enable_preprocessing'] = parameters['enable_preprocessing'] |
| 208 | + if 'temperature' in parameters: |
| 209 | + params_dict['temperature'] = parameters['temperature'] |
| 210 | + |
| 211 | + input_params = ( |
| 212 | + SarvamTTSService.InputParams(**params_dict) if params_dict else None |
| 213 | + ) |
| 214 | + |
| 215 | + logger.info(f'Sarvam TTS config: voice={voice_id}, model={model}') |
| 216 | + |
| 217 | + return SarvamTTSService( |
| 218 | + api_key=api_key, voice_id=voice_id, model=model, params=input_params |
| 219 | + ) |
0 commit comments