client.history.get_all(...)
-
-
-
Returns a list of your generated audio.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.history.get_all()
-
-
-
page_size:
typing.Optional[int]
— How many history items to return at maximum. Can not exceed 1000, defaults to 100.
-
start_after_history_item_id:
typing.Optional[str]
— After which ID to start fetching, use this parameter to paginate across a large collection of history items. In case this parameter is not provided history items will be fetched starting from the most recently created one ordered descending by their creation date.
-
voice_id:
typing.Optional[str]
— Voice ID to be filtered for, you can use GET https://api.elevenlabs.io/v1/voices to receive a list of voices and their IDs.
-
search:
typing.Optional[str]
— search term used for filtering
-
source:
typing.Optional[HistoryGetAllRequestSource]
— Source of the generated history item
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.history.get(...)
-
-
-
Retrieves a history item.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.history.get( history_item_id="HISTORY_ITEM_ID", )
-
-
-
history_item_id:
str
— History item ID to be used, you can use GET https://api.elevenlabs.io/v1/history to receive a list of history items and their IDs.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.history.delete(...)
-
-
-
Delete a history item by its ID
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.history.delete( history_item_id="HISTORY_ITEM_ID", )
-
-
-
history_item_id:
str
— History item ID to be used, you can use GET https://api.elevenlabs.io/v1/history to receive a list of history items and their IDs.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.history.get_audio(...)
-
-
-
Returns the audio of an history item.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.history.get_audio( history_item_id="HISTORY_ITEM_ID", )
-
-
-
history_item_id:
str
— History item ID to be used, you can use GET https://api.elevenlabs.io/v1/history to receive a list of history items and their IDs.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration. You can pass in configuration such aschunk_size
, and more to customize the request and response.
-
-
client.history.download(...)
-
-
-
Download one or more history items. If one history item ID is provided, we will return a single audio file. If more than one history item IDs are provided, we will provide the history items packed into a .zip file.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.history.download( history_item_ids=["HISTORY_ITEM_ID"], )
-
-
-
history_item_ids:
typing.Sequence[str]
— A list of history items to download, you can get IDs of history items and other metadata using the GET https://api.elevenlabs.io/v1/history endpoint.
-
output_format:
typing.Optional[str]
— Output format to transcode the audio file, can be wav or default.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration. You can pass in configuration such aschunk_size
, and more to customize the request and response.
-
-
client.text_to_sound_effects.convert(...)
-
-
-
Turn text into sound effects for your videos, voice-overs or video games using the most advanced sound effects model in the world.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.text_to_sound_effects.convert( text="Spacious braam suitable for high-impact movie trailer moments", )
-
-
-
text:
str
— The text that will get converted into a sound effect.
-
output_format:
typing.Optional[TextToSoundEffectsConvertRequestOutputFormat]
— Output format of the generated audio. Formatted as codec_sample_rate_bitrate. So an mp3 with 22.05kHz sample rate at 32kbs is represented as mp3_22050_32. MP3 with 192kbps bitrate requires you to be subscribed to Creator tier or above. PCM with 44.1kHz sample rate requires you to be subscribed to Pro tier or above. Note that the μ-law format (sometimes written mu-law, often approximated as u-law) is commonly used for Twilio audio inputs.
-
duration_seconds:
typing.Optional[float]
— The duration of the sound which will be generated in seconds. Must be at least 0.5 and at most 22. If set to None we will guess the optimal duration using the prompt. Defaults to None.
-
prompt_influence:
typing.Optional[float]
— A higher prompt influence makes your generation follow the prompt more closely while also making generations less variable. Must be a value between 0 and 1. Defaults to 0.3.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration. You can pass in configuration such aschunk_size
, and more to customize the request and response.
-
-
client.samples.delete(...)
-
-
-
Removes a sample by its ID.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.samples.delete( voice_id="VOICE_ID", sample_id="SAMPLE_ID", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
sample_id:
str
— Sample ID to be used, you can use GET https://api.elevenlabs.io/v1/voices/{voice_id} to list all the available samples for a voice.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.samples.get_audio(...)
-
-
-
Returns the audio corresponding to a sample attached to a voice.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.samples.get_audio( voice_id="VOICE_ID", sample_id="SAMPLE_ID", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
sample_id:
str
— Sample ID to be used, you can use GET https://api.elevenlabs.io/v1/voices/{voice_id} to list all the available samples for a voice.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration. You can pass in configuration such aschunk_size
, and more to customize the request and response.
-
-
client.text_to_speech.convert(...)
-
-
-
Converts text into speech using a voice of your choice and returns audio.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.text_to_speech.convert( voice_id="JBFqnCBsd6RMkjVDRZzb", output_format="mp3_44100_128", text="The first move is what sets everything in motion.", model_id="eleven_multilingual_v2", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
text:
str
— The text that will get converted into speech.
-
enable_logging:
typing.Optional[bool]
— When enable_logging is set to false zero retention mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Zero retention mode may only be used by enterprise customers.
-
optimize_streaming_latency:
typing.Optional[int]
You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values: 0 - default mode (no latency optimizations) 1 - normal latency optimizations (about 50% of possible latency improvement of option 3) 2 - strong latency optimizations (about 75% of possible latency improvement of option 3) 3 - max latency optimizations 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
Defaults to None.
-
output_format:
typing.Optional[OutputFormat]
— The output format of the generated audio.
-
model_id:
typing.Optional[str]
— Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.
-
language_code:
typing.Optional[str]
— Language code (ISO 639-1) used to enforce a language for the model. Currently only Turbo v2.5 and Flash v2.5 support language enforcement. For other models, an error will be returned if language code is provided.
-
voice_settings:
typing.Optional[VoiceSettings]
— Voice settings overriding stored settings for the given voice. They are applied only on the given request.
-
pronunciation_dictionary_locators:
typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]
— A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request
-
seed:
typing.Optional[int]
— If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Must be integer between 0 and 4294967295.
-
previous_text:
typing.Optional[str]
— The text that came before the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation.
-
next_text:
typing.Optional[str]
— The text that comes after the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation.
-
previous_request_ids:
typing.Optional[typing.Sequence[str]]
— A list of request_id of the samples that were generated before this generation. Can be used to improve the speech's continuity when splitting up a large task into multiple requests. The results will be best when the same model is used across the generations. In case both previous_text and previous_request_ids is send, previous_text will be ignored. A maximum of 3 request_ids can be send.
-
next_request_ids:
typing.Optional[typing.Sequence[str]]
— A list of request_id of the samples that come after this generation. next_request_ids is especially useful for maintaining the speech's continuity when regenerating a sample that has had some audio quality issues. For example, if you have generated 3 speech clips, and you want to improve clip 2, passing the request id of clip 3 as a next_request_id (and that of clip 1 as a previous_request_id) will help maintain natural flow in the combined speech. The results will be best when the same model is used across the generations. In case both next_text and next_request_ids is send, next_text will be ignored. A maximum of 3 request_ids can be send.
-
use_pvc_as_ivc:
typing.Optional[bool]
— If true, we won't use PVC version of the voice for the generation but the IVC version. This is a temporary workaround for higher latency in PVC versions.
-
apply_text_normalization:
typing.Optional[BodyTextToSpeechV1TextToSpeechVoiceIdPostApplyTextNormalization]
— This parameter controls text normalization with three modes: 'auto', 'on', and 'off'. When set to 'auto', the system will automatically decide whether to apply text normalization (e.g., spelling out numbers). With 'on', text normalization will always be applied, while with 'off', it will be skipped. Cannot be turned on for 'eleven_turbo_v2_5' or 'eleven_flash_v2_5' models.
-
apply_language_text_normalization:
typing.Optional[bool]
— This parameter controls language text normalization. This helps with proper pronunciation of text in some supported languages. WARNING: This parameter can heavily increase the latency of the request. Currently only supported for Japanese.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration. You can pass in configuration such aschunk_size
, and more to customize the request and response.
-
-
client.text_to_speech.convert_with_timestamps(...)
-
-
-
Generate speech from text with precise character-level timing information for audio-text synchronization.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.text_to_speech.convert_with_timestamps( voice_id="21m00Tcm4TlvDq8ikWAM", text="This is a test for the API of ElevenLabs.", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
text:
str
— The text that will get converted into speech.
-
enable_logging:
typing.Optional[bool]
— When enable_logging is set to false zero retention mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Zero retention mode may only be used by enterprise customers.
-
optimize_streaming_latency:
typing.Optional[int]
You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values: 0 - default mode (no latency optimizations) 1 - normal latency optimizations (about 50% of possible latency improvement of option 3) 2 - strong latency optimizations (about 75% of possible latency improvement of option 3) 3 - max latency optimizations 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
Defaults to None.
-
output_format:
typing.Optional[OutputFormat]
— The output format of the generated audio.
-
model_id:
typing.Optional[str]
— Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.
-
language_code:
typing.Optional[str]
— Language code (ISO 639-1) used to enforce a language for the model. Currently only Turbo v2.5 and Flash v2.5 support language enforcement. For other models, an error will be returned if language code is provided.
-
voice_settings:
typing.Optional[VoiceSettings]
— Voice settings overriding stored settings for the given voice. They are applied only on the given request.
-
pronunciation_dictionary_locators:
typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]
— A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request
-
seed:
typing.Optional[int]
— If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Must be integer between 0 and 4294967295.
-
previous_text:
typing.Optional[str]
— The text that came before the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation.
-
next_text:
typing.Optional[str]
— The text that comes after the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation.
-
previous_request_ids:
typing.Optional[typing.Sequence[str]]
— A list of request_id of the samples that were generated before this generation. Can be used to improve the speech's continuity when splitting up a large task into multiple requests. The results will be best when the same model is used across the generations. In case both previous_text and previous_request_ids is send, previous_text will be ignored. A maximum of 3 request_ids can be send.
-
next_request_ids:
typing.Optional[typing.Sequence[str]]
— A list of request_id of the samples that come after this generation. next_request_ids is especially useful for maintaining the speech's continuity when regenerating a sample that has had some audio quality issues. For example, if you have generated 3 speech clips, and you want to improve clip 2, passing the request id of clip 3 as a next_request_id (and that of clip 1 as a previous_request_id) will help maintain natural flow in the combined speech. The results will be best when the same model is used across the generations. In case both next_text and next_request_ids is send, next_text will be ignored. A maximum of 3 request_ids can be send.
-
use_pvc_as_ivc:
typing.Optional[bool]
— If true, we won't use PVC version of the voice for the generation but the IVC version. This is a temporary workaround for higher latency in PVC versions.
-
apply_text_normalization:
typing.Optional[ BodyTextToSpeechWithTimestampsV1TextToSpeechVoiceIdWithTimestampsPostApplyTextNormalization ]
— This parameter controls text normalization with three modes: 'auto', 'on', and 'off'. When set to 'auto', the system will automatically decide whether to apply text normalization (e.g., spelling out numbers). With 'on', text normalization will always be applied, while with 'off', it will be skipped. Cannot be turned on for 'eleven_turbo_v2_5' or 'eleven_flash_v2_5' models.
-
apply_language_text_normalization:
typing.Optional[bool]
— This parameter controls language text normalization. This helps with proper pronunciation of text in some supported languages. WARNING: This parameter can heavily increase the latency of the request. Currently only supported for Japanese.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.text_to_speech.convert_as_stream(...)
-
-
-
Converts text into speech using a voice of your choice and returns audio as an audio stream.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.text_to_speech.convert_as_stream( voice_id="JBFqnCBsd6RMkjVDRZzb", output_format="mp3_44100_128", text="The first move is what sets everything in motion.", model_id="eleven_multilingual_v2", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
text:
str
— The text that will get converted into speech.
-
enable_logging:
typing.Optional[bool]
— When enable_logging is set to false zero retention mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Zero retention mode may only be used by enterprise customers.
-
optimize_streaming_latency:
typing.Optional[int]
You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values: 0 - default mode (no latency optimizations) 1 - normal latency optimizations (about 50% of possible latency improvement of option 3) 2 - strong latency optimizations (about 75% of possible latency improvement of option 3) 3 - max latency optimizations 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
Defaults to None.
-
output_format:
typing.Optional[OutputFormat]
— The output format of the generated audio.
-
model_id:
typing.Optional[str]
— Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.
-
language_code:
typing.Optional[str]
— Language code (ISO 639-1) used to enforce a language for the model. Currently only Turbo v2.5 and Flash v2.5 support language enforcement. For other models, an error will be returned if language code is provided.
-
voice_settings:
typing.Optional[VoiceSettings]
— Voice settings overriding stored settings for the given voice. They are applied only on the given request.
-
pronunciation_dictionary_locators:
typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]
— A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request
-
seed:
typing.Optional[int]
— If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Must be integer between 0 and 4294967295.
-
previous_text:
typing.Optional[str]
— The text that came before the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation.
-
next_text:
typing.Optional[str]
— The text that comes after the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation.
-
previous_request_ids:
typing.Optional[typing.Sequence[str]]
— A list of request_id of the samples that were generated before this generation. Can be used to improve the speech's continuity when splitting up a large task into multiple requests. The results will be best when the same model is used across the generations. In case both previous_text and previous_request_ids is send, previous_text will be ignored. A maximum of 3 request_ids can be send.
-
next_request_ids:
typing.Optional[typing.Sequence[str]]
— A list of request_id of the samples that come after this generation. next_request_ids is especially useful for maintaining the speech's continuity when regenerating a sample that has had some audio quality issues. For example, if you have generated 3 speech clips, and you want to improve clip 2, passing the request id of clip 3 as a next_request_id (and that of clip 1 as a previous_request_id) will help maintain natural flow in the combined speech. The results will be best when the same model is used across the generations. In case both next_text and next_request_ids is send, next_text will be ignored. A maximum of 3 request_ids can be send.
-
use_pvc_as_ivc:
typing.Optional[bool]
— If true, we won't use PVC version of the voice for the generation but the IVC version. This is a temporary workaround for higher latency in PVC versions.
-
apply_text_normalization:
typing.Optional[ BodyTextToSpeechStreamingV1TextToSpeechVoiceIdStreamPostApplyTextNormalization ]
— This parameter controls text normalization with three modes: 'auto', 'on', and 'off'. When set to 'auto', the system will automatically decide whether to apply text normalization (e.g., spelling out numbers). With 'on', text normalization will always be applied, while with 'off', it will be skipped. Cannot be turned on for 'eleven_turbo_v2_5' or 'eleven_flash_v2_5' models.
-
apply_language_text_normalization:
typing.Optional[bool]
— This parameter controls language text normalization. This helps with proper pronunciation of text in some supported languages. WARNING: This parameter can heavily increase the latency of the request. Currently only supported for Japanese.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration. You can pass in configuration such aschunk_size
, and more to customize the request and response.
-
-
client.text_to_speech.stream_with_timestamps(...)
-
-
-
Converts text into speech using a voice of your choice and returns a stream of JSONs containing audio as a base64 encoded string together with information on when which character was spoken.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) response = client.text_to_speech.stream_with_timestamps( voice_id="JBFqnCBsd6RMkjVDRZzb", output_format="mp3_44100_128", text="The first move is what sets everything in motion.", model_id="eleven_multilingual_v2", ) for chunk in response: yield chunk
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
text:
str
— The text that will get converted into speech.
-
enable_logging:
typing.Optional[bool]
— When enable_logging is set to false zero retention mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Zero retention mode may only be used by enterprise customers.
-
optimize_streaming_latency:
typing.Optional[int]
You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values: 0 - default mode (no latency optimizations) 1 - normal latency optimizations (about 50% of possible latency improvement of option 3) 2 - strong latency optimizations (about 75% of possible latency improvement of option 3) 3 - max latency optimizations 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
Defaults to None.
-
output_format:
typing.Optional[OutputFormat]
— The output format of the generated audio.
-
model_id:
typing.Optional[str]
— Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for text to speech, you can check this using the can_do_text_to_speech property.
-
language_code:
typing.Optional[str]
— Language code (ISO 639-1) used to enforce a language for the model. Currently only Turbo v2.5 and Flash v2.5 support language enforcement. For other models, an error will be returned if language code is provided.
-
voice_settings:
typing.Optional[VoiceSettings]
— Voice settings overriding stored settings for the given voice. They are applied only on the given request.
-
pronunciation_dictionary_locators:
typing.Optional[typing.Sequence[PronunciationDictionaryVersionLocator]]
— A list of pronunciation dictionary locators (id, version_id) to be applied to the text. They will be applied in order. You may have up to 3 locators per request
-
seed:
typing.Optional[int]
— If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Must be integer between 0 and 4294967295.
-
previous_text:
typing.Optional[str]
— The text that came before the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation.
-
next_text:
typing.Optional[str]
— The text that comes after the text of the current request. Can be used to improve the speech's continuity when concatenating together multiple generations or to influence the speech's continuity in the current generation.
-
previous_request_ids:
typing.Optional[typing.Sequence[str]]
— A list of request_id of the samples that were generated before this generation. Can be used to improve the speech's continuity when splitting up a large task into multiple requests. The results will be best when the same model is used across the generations. In case both previous_text and previous_request_ids is send, previous_text will be ignored. A maximum of 3 request_ids can be send.
-
next_request_ids:
typing.Optional[typing.Sequence[str]]
— A list of request_id of the samples that come after this generation. next_request_ids is especially useful for maintaining the speech's continuity when regenerating a sample that has had some audio quality issues. For example, if you have generated 3 speech clips, and you want to improve clip 2, passing the request id of clip 3 as a next_request_id (and that of clip 1 as a previous_request_id) will help maintain natural flow in the combined speech. The results will be best when the same model is used across the generations. In case both next_text and next_request_ids is send, next_text will be ignored. A maximum of 3 request_ids can be send.
-
use_pvc_as_ivc:
typing.Optional[bool]
— If true, we won't use PVC version of the voice for the generation but the IVC version. This is a temporary workaround for higher latency in PVC versions.
-
apply_text_normalization:
typing.Optional[ BodyTextToSpeechStreamingWithTimestampsV1TextToSpeechVoiceIdStreamWithTimestampsPostApplyTextNormalization ]
— This parameter controls text normalization with three modes: 'auto', 'on', and 'off'. When set to 'auto', the system will automatically decide whether to apply text normalization (e.g., spelling out numbers). With 'on', text normalization will always be applied, while with 'off', it will be skipped. Cannot be turned on for 'eleven_turbo_v2_5' or 'eleven_flash_v2_5' models.
-
apply_language_text_normalization:
typing.Optional[bool]
— This parameter controls language text normalization. This helps with proper pronunciation of text in some supported languages. WARNING: This parameter can heavily increase the latency of the request. Currently only supported for Japanese.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.speech_to_speech.convert(...)
-
-
-
Transform audio from one voice to another. Maintain full control over emotion, timing and delivery.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.speech_to_speech.convert( voice_id="JBFqnCBsd6RMkjVDRZzb", output_format="mp3_44100_128", model_id="eleven_multilingual_sts_v2", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
audio: `from future import annotations
core.File` — See core.File for more documentation
-
enable_logging:
typing.Optional[bool]
— When enable_logging is set to false zero retention mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Zero retention mode may only be used by enterprise customers.
-
optimize_streaming_latency:
typing.Optional[int]
You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values: 0 - default mode (no latency optimizations) 1 - normal latency optimizations (about 50% of possible latency improvement of option 3) 2 - strong latency optimizations (about 75% of possible latency improvement of option 3) 3 - max latency optimizations 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
Defaults to None.
-
output_format:
typing.Optional[OutputFormat]
— The output format of the generated audio.
-
model_id:
typing.Optional[str]
— Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for speech to speech, you can check this using the can_do_voice_conversion property.
-
voice_settings:
typing.Optional[str]
— Voice settings overriding stored settings for the given voice. They are applied only on the given request. Needs to be send as a JSON encoded string.
-
seed:
typing.Optional[int]
— If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Must be integer between 0 and 4294967295.
-
remove_background_noise:
typing.Optional[bool]
— If set, will remove the background noise from your audio input using our audio isolation model. Only applies to Voice Changer.
-
file_format:
typing.Optional[SpeechToSpeechConvertRequestFileFormat]
— The format of input audio. Options are 'pcm_s16le_16' or 'other' Forpcm_s16le_16
, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration. You can pass in configuration such aschunk_size
, and more to customize the request and response.
-
-
client.speech_to_speech.convert_as_stream(...)
-
-
-
Stream audio from one voice to another. Maintain full control over emotion, timing and delivery.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.speech_to_speech.convert_as_stream( voice_id="JBFqnCBsd6RMkjVDRZzb", output_format="mp3_44100_128", model_id="eleven_multilingual_sts_v2", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
audio: `from future import annotations
core.File` — See core.File for more documentation
-
enable_logging:
typing.Optional[bool]
— When enable_logging is set to false zero retention mode will be used for the request. This will mean history features are unavailable for this request, including request stitching. Zero retention mode may only be used by enterprise customers.
-
optimize_streaming_latency:
typing.Optional[int]
You can turn on latency optimizations at some cost of quality. The best possible final latency varies by model. Possible values: 0 - default mode (no latency optimizations) 1 - normal latency optimizations (about 50% of possible latency improvement of option 3) 2 - strong latency optimizations (about 75% of possible latency improvement of option 3) 3 - max latency optimizations 4 - max latency optimizations, but also with text normalizer turned off for even more latency savings (best latency, but can mispronounce eg numbers and dates).
Defaults to None.
-
output_format:
typing.Optional[OutputFormat]
— The output format of the generated audio.
-
model_id:
typing.Optional[str]
— Identifier of the model that will be used, you can query them using GET /v1/models. The model needs to have support for speech to speech, you can check this using the can_do_voice_conversion property.
-
voice_settings:
typing.Optional[str]
— Voice settings overriding stored settings for the given voice. They are applied only on the given request. Needs to be send as a JSON encoded string.
-
seed:
typing.Optional[int]
— If specified, our system will make a best effort to sample deterministically, such that repeated requests with the same seed and parameters should return the same result. Determinism is not guaranteed. Must be integer between 0 and 4294967295.
-
remove_background_noise:
typing.Optional[bool]
— If set, will remove the background noise from your audio input using our audio isolation model. Only applies to Voice Changer.
-
file_format:
typing.Optional[SpeechToSpeechConvertAsStreamRequestFileFormat]
— The format of input audio. Options are 'pcm_s16le_16' or 'other' Forpcm_s16le_16
, the input audio must be 16-bit PCM at a 16kHz sample rate, single channel (mono), and little-endian byte order. Latency will be lower than with passing an encoded waveform.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration. You can pass in configuration such aschunk_size
, and more to customize the request and response.
-
-
client.voice_generation.generate_parameters()
-
-
-
Get possible parameters for the /v1/voice-generation/generate-voice endpoint.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voice_generation.generate_parameters()
-
-
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.voice_generation.generate(...)
-
-
-
Generate a random voice based on parameters. This method returns a generated_voice_id in the response header, and a sample of the voice in the body. If you like the generated voice call /v1/voice-generation/create-voice with the generated_voice_id to create the voice.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voice_generation.generate( gender="female", accent="american", age="middle_aged", accent_strength=2.0, text="It sure does, Jackie… My mama always said: “In Carolina, the air's so thick you can wear it!”", )
-
-
-
gender:
Gender
— Category code corresponding to the gender of the generated voice. Possible values: female, male.
-
accent:
str
— Category code corresponding to the accent of the generated voice. Possible values: british, american, african, australian, indian.
-
age:
Age
— Category code corresponding to the age of the generated voice. Possible values: young, middle_aged, old.
-
accent_strength:
float
— The strength of the accent of the generated voice. Has to be between 0.3 and 2.0.
-
text:
str
— Text to generate, text length has to be between 100 and 1000.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration. You can pass in configuration such aschunk_size
, and more to customize the request and response.
-
-
client.voice_generation.create_a_previously_generated_voice(...)
-
-
-
Create a previously generated voice. This endpoint should be called after you fetched a generated_voice_id using /v1/voice-generation/generate-voice.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voice_generation.create_a_previously_generated_voice( voice_name="Alex", voice_description="Middle-aged American woman", generated_voice_id="rbVJFu6SGRD1dbWpKnWl", )
-
-
-
voice_name:
str
— Name to use for the created voice.
-
voice_description:
str
— Description to use for the created voice.
-
generated_voice_id:
str
— The generated_voice_id to create, call POST /v1/text-to-voice/create-previews and fetch the generated_voice_id from the response header if don't have one yet.
-
played_not_selected_voice_ids:
typing.Optional[typing.Sequence[str]]
— List of voice ids that the user has played but not selected. Used for RLHF.
-
labels:
typing.Optional[typing.Dict[str, typing.Optional[str]]]
— Optional, metadata to add to the created voice. Defaults to None.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.text_to_voice.create_previews(...)
-
-
-
Generate a custom voice based on voice description. This method returns a list of voice previews. Each preview has a generated_voice_id and a sample of the voice as base64 encoded mp3 audio. If you like the a voice previewand want to create the voice call /v1/text-to-voice/create-voice-from-preview with the generated_voice_id to create the voice.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.text_to_voice.create_previews( voice_description="A sassy squeaky mouse", )
-
-
-
voice_description:
str
— Description to use for the created voice.
-
output_format:
typing.Optional[TextToVoiceCreatePreviewsRequestOutputFormat]
— Output format of the generated audio. Formatted as codec_sample_rate_bitrate. So an mp3 with 22.05kHz sample rate at 32kbs is represented as mp3_22050_32. MP3 with 192kbps bitrate requires you to be subscribed to Creator tier or above. PCM with 44.1kHz sample rate requires you to be subscribed to Pro tier or above. Note that the μ-law format (sometimes written mu-law, often approximated as u-law) is commonly used for Twilio audio inputs.
-
text:
typing.Optional[str]
— Text to generate, text length has to be between 100 and 1000.
-
auto_generate_text:
typing.Optional[bool]
— Whether to automatically generate a text suitable for the voice description.
-
loudness:
typing.Optional[float]
— Controls the volume level of the generated voice. -1 is quietest, 1 is loudest, 0 corresponds to roughly -24 LUFS.
-
quality:
typing.Optional[float]
— Higher quality results in better voice output but less variety.
-
seed:
typing.Optional[int]
— Random number that controls the voice generation. Same seed with same inputs produces same voice.
-
guidance_scale:
typing.Optional[float]
— Controls how closely the AI follows the prompt. Lower numbers give the AI more freedom to be creative, while higher numbers force it to stick more to the prompt. High numbers can cause voice to sound artificial or robotic. We recommend to use longer, more detailed prompts at lower Guidance Scale.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.text_to_voice.create_voice_from_preview(...)
-
-
-
Create a voice from previously generated voice preview. This endpoint should be called after you fetched a generated_voice_id using POST /v1/text-to-voice/create-previews.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.text_to_voice.create_voice_from_preview( voice_name="Sassy squeaky mouse", voice_description="A sassy squeaky mouse", generated_voice_id="37HceQefKmEi3bGovXjL", )
-
-
-
voice_name:
str
— Name to use for the created voice.
-
voice_description:
str
— Description to use for the created voice.
-
generated_voice_id:
str
— The generated_voice_id to create, call POST /v1/text-to-voice/create-previews and fetch the generated_voice_id from the response header if don't have one yet.
-
labels:
typing.Optional[typing.Dict[str, typing.Optional[str]]]
— Optional, metadata to add to the created voice. Defaults to None.
-
played_not_selected_voice_ids:
typing.Optional[typing.Sequence[str]]
— List of voice ids that the user has played but not selected. Used for RLHF.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.user.get_subscription()
-
-
-
Gets extended information about the users subscription
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.user.get_subscription()
-
-
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.user.get()
-
-
-
Gets information about the user
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.user.get()
-
-
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.voices.get_all(...)
-
-
-
Returns a list of all available voices for a user.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voices.get_all()
-
-
-
show_legacy:
typing.Optional[bool]
— If set to true, legacy premade voices will be included in responses from /v1/voices
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.voices.search(...)
-
-
-
Gets a list of all available voices for a user with search, filtering and pagination.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voices.search( include_total_count=True, )
-
-
-
next_page_token:
typing.Optional[str]
— The next page token to use for pagination. Returned from the previous request.
-
page_size:
typing.Optional[int]
— How many voices to return at maximum. Can not exceed 100, defaults to 10. Page 0 may include more voices due to default voices being included.
-
search:
typing.Optional[str]
— Search term to filter voices by. Searches in name, description, labels, category.
-
sort:
typing.Optional[str]
— Which field to sort by, one of 'created_at_unix' or 'name'. 'created_at_unix' may not be available for older voices.
-
sort_direction:
typing.Optional[str]
— Which direction to sort the voices in. 'asc' or 'desc'.
-
voice_type:
typing.Optional[str]
— Type of the voice to filter by. One of 'personal', 'community', 'default', 'workspace'.
-
category:
typing.Optional[str]
— Category of the voice to filter by. One of 'premade', 'cloned', 'generated', 'professional'
-
fine_tuning_state:
typing.Optional[str]
— State of the voice's fine tuning to filter by. Applicable only to professional voices clones. One of 'draft', 'not_verified', 'not_started', 'queued', 'fine_tuning', 'fine_tuned', 'failed', 'delayed'
-
collection_id:
typing.Optional[str]
— Collection ID to filter voices by.
-
include_total_count:
typing.Optional[bool]
— Whether to include the total count of voices found in the response. Incurs a performance cost.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.voices.get_default_settings()
-
-
-
Gets the default settings for voices. "similarity_boost" corresponds to"Clarity + Similarity Enhancement" in the web app and "stability" corresponds to "Stability" slider in the web app.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voices.get_default_settings()
-
-
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.voices.get_settings(...)
-
-
-
Returns the settings for a specific voice. "similarity_boost" corresponds to"Clarity + Similarity Enhancement" in the web app and "stability" corresponds to "Stability" slider in the web app.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voices.get_settings( voice_id="JBFqnCBsd6RMkjVDRZzb", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.voices.get(...)
-
-
-
Returns metadata about a specific voice.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voices.get( voice_id="JBFqnCBsd6RMkjVDRZzb", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
with_settings:
typing.Optional[bool]
— This parameter is now deprecated. It is ignored and will be removed in a future version.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.voices.delete(...)
-
-
-
Deletes a voice by its ID.
-
-
-
from elevenlabs import ElevenLabs client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voices.delete( voice_id="VOICE_ID", )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-
client.voices.edit_settings(...)
-
-
-
Edit your settings for a specific voice. "similarity_boost" corresponds to "Clarity + Similarity Enhancement" in the web app and "stability" corresponds to "Stability" slider in the web app.
-
-
-
from elevenlabs import ElevenLabs, VoiceSettings client = ElevenLabs( api_key="YOUR_API_KEY", ) client.voices.edit_settings( voice_id="VOICE_ID", request=VoiceSettings( stability=0.1, similarity_boost=0.3, style=0.2, ), )
-
-
-
voice_id:
str
— Voice ID to be used, you can use https://api.elevenlabs.io/v1/voices to list all the available voices.
-
request:
VoiceSettings
-
request_options:
typing.Optional[RequestOptions]
— Request-specific configuration.
-
-