diff --git a/docs.json b/docs.json index d5f8193b..3517921c 100644 --- a/docs.json +++ b/docs.json @@ -193,7 +193,8 @@ "pages": [ "server/services/s2s/aws", "server/services/s2s/gemini", - "server/services/s2s/openai" + "server/services/s2s/openai", + "server/services/s2s/pinch" ] }, { diff --git a/server/services/s2s/pinch.mdx b/server/services/s2s/pinch.mdx new file mode 100644 index 00000000..4ee2f049 --- /dev/null +++ b/server/services/s2s/pinch.mdx @@ -0,0 +1,138 @@ +--- +title: "Pinch" +description: "Real-time translation service implementation using Pinch's speech-to-speech API" +--- + +## Overview + +`PinchAudioService` provides real-time speech translation with synchronized audio output and transcription capabilities. The service translates spoken audio from one language to another while maintaining natural conversation flow through streaming audio processing. + +The service provides: +- **Real-time Translation**: Stream audio input and receive translated audio output with minimal latency +- **Dual Transcription**: Both source language transcription and translated text output +- **Voice Synthesis**: Natural-sounding translated speech with customizable voice parameters +- **Streaming Architecture**: Optimized for low-latency conversational applications + +## Installation + +To use `PinchAudioService`, install the required dependencies: + +```bash +pip install "pipecat-ai[pinch]" +``` + +You'll also need to set up your Pinch API token as an environment variable: `PINCH_API_TOKEN`. + + + Get your API token by creating an account at [Pinch](https://www.startpinch.com/). + + +## Frames + +### Input + + + Raw PCM audio data for speech input (16-bit, 16kHz, mono) + + +### Output + + + Final transcription of the source language speech + + + + Real-time partial transcription updates during speech + + + + Translated text output in the target language + + + + Text being synthesized to speech in the target language + + + + Translated audio stream chunks (16-bit PCM) + + +## Configuration + +### Constructor Parameters + + + Pinch API authentication token + + + + HTTP client session for WebSocket connections and API requests + + + + Session configuration object. Defaults to English → Spanish translation with female voice + + +### Session Configuration + +The `PinchSessionRequest` object configures the translation session: + + + Input language code (e.g., "en" for English). See supported languages below + + + + Output language code (e.g., "es" for Spanish). See supported languages below + + + + Voice characteristic for synthesized speech. Options: "female", "male" + + + + Whether to generate translated audio output. Default: `True` + + + + Whether to output transcription frames. Default: `True` + + + + Audio sample rate in Hz. Default: `16000` + + +## Language Support + +Pinch supports real-time translation between a growing number of language pairs. We are constantly adding new languages and improving translation quality. +For a complete list of supported languages and available translation pairs, please visit the [Pinch documentation](https://www.startpinch.com/). + +## Usage Example + +```python +from pipecat.transports.pinch.api import PinchSessionRequest +from pipecat.services.pinch import PinchAudioService + +pinch_api_key = os.getenv("PINCH_API_KEY") + +# Configure session +session_request = PinchSessionRequest( + source_language="en", + target_language="es", + voice_type="female", + enable_audio_output=True +) + +# Create Pinch audio streaming service +pinch_service = PinchAudioService( + api_token=pinch_api_key, + session=session, + session_request=session_request +) + +# Create pipeline +pipeline = Pipeline([ + transport.input(), # Audio input + pinch_service, # Translation service + transport.output(), # Audio output +]) +``` diff --git a/server/services/supported-services.mdx b/server/services/supported-services.mdx index 32b1c236..c7201556 100644 --- a/server/services/supported-services.mdx +++ b/server/services/supported-services.mdx @@ -112,6 +112,7 @@ Speech-to-Speech services are multi-modal LLM services that take in audio, video | [AWS Nova Sonic](/server/services/s2s/aws) | `pip install "pipecat-ai[aws-nova-sonic]"` | | [Gemini Multimodal Live](/server/services/s2s/gemini) | `pip install "pipecat-ai[google]"` | | [OpenAI Realtime](/server/services/s2s/openai) | `pip install "pipecat-ai[openai]"` | +| [Pinch](/server/services/s2s/pinch) | `pip install "pipecat-ai[pinch]"` | ## Image Generation