From 17a6b454d6e7d59569bda96bf64b963253280694 Mon Sep 17 00:00:00 2001 From: Henry Coles Date: Mon, 15 Dec 2025 20:33:07 +0000 Subject: [PATCH] update readme --- README.md | 58 ++++++++++++++----- .../org/pitest/voices/kokoro/KokoroTest.java | 10 ++++ .../pitest/voices/download/UsModelsTest.java | 2 +- 3 files changed, 56 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index a651121..4dc0209 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,9 @@ Fast in-process text to speech for Java 17 and above. No external apis. No system dependencies. - -* [sample 1](https://github.com/user-attachments/assets/3bb91fe5-682a-498b-ab38-3f4e0d1885f6) -* [sample 2](https://github.com/user-attachments/assets/3ff5dd48-df3f-4b47-9b4e-e88f97bf6d4d) +* [piper sample 1](https://github.com/user-attachments/assets/3bb91fe5-682a-498b-ab38-3f4e0d1885f6) +* [piper sample 2](https://github.com/user-attachments/assets/3ff5dd48-df3f-4b47-9b4e-e88f97bf6d4d) +* [kokoro sample](https://github.com/user-attachments/assets/b55f8ed8-08a1-4de1-b6c6-493d7e449431) # What is this? @@ -14,41 +14,48 @@ It can produce reasonable quality audio using low-specced hardware. It provides several components -* Code to run the voice models from the [piper](https://github.com/rhasspy/piper) project -* A piper-compatible pure Java phonemizer for English partially ported from [phonemize](https://github.com/hans00/phonemize) +* Code to run the voice models from the [piper](https://github.com/rhasspy/piper) and [kokoro](https://huggingface.co/spaces/hexgrad/Kokoro-TTS) projects +* A compatible pure Java phonemizer for English partially ported from [phonemize](https://github.com/hans00/phonemize) * Compatible phoneme dictionaries for uk and us English * A multi-lingual phonemizer using the [onnx model](https://huggingface.co/OpenVoiceOS/g2p-mbyt5-12l-ipa-childes-espeak-onnx) from OpenVoiceOs -* A small number of piper models available as dependencies on maven central +* A small number of models available as dependencies on maven central * Code to download other models not uploaded to central The models are run using the onnxruntime library, so can utilise both CPU and GPU. +## Which Model Should I Use? + +The piper models are fast and very lightweight. The Kokoro models arguably produce better quality speech, but run approximately +4x slower but can be accelerated with a GPU. There seems to be little benefit using a GPU with the piper models. + +Ultimately, which voice sounds better is a matter of personal taste. + ## Releases See [Releases](https://github.com/hcoles/voices/releases) -## English-Only Usage With Rules Based Phonemizer +## English-Only Usage With Piper & Rules Based Phonemizer -Using Voices requires three code dependencies and one or more models. +Using Voices with pipper requires three code dependencies and one or more models. ```xml org.pitest.voices chorus - 0.0.8 + 0.0.9 org.pitest.voices alba - 0.0.8 + 0.0.9 org.pitest.voices en_uk - 0.0.8 + 0.0.9 @@ -81,7 +88,7 @@ A wider range of models can be retrieved at runtime by adding the model download org.pitest.voices model-downloader - 0.0.8 + 0.0.9 ``` @@ -109,7 +116,7 @@ Once the dependency has been added org.pitest.voices openvoice-phonemizer - 0.0.8 + 0.0.9 ``` @@ -120,6 +127,29 @@ ChorusConfig config = chorusConfig(Dictionaries.empty()) .withModel(new OpenVoiceSupplier()); ``` +## Using Kokoro Models + +The kokoro-runtime dependency provides the kokoro model and 11 voices. Usage and dictionary/phonmiser selection is then the same as for piper models, + +```xml + + org.pitest.voices + openvoice-phonemizer + 0.0.9 + +``` + +```java +ChorusConfig config = chorusConfig(EnUkDictionary.en_uk()); +try (Chorus chorus = new Chorus(config)) { + Voice v1 = chorus.voice(KokoroModels.afSarah()) + .withSpeed(1.1f); + + Audio audio = alba.say("Kokoro also works!"); + audio.save(some path); +} +``` + ## Running on GPU Models can be run on GPU instead of CPU by using the `onnxruntime_gpu` dependency instead of `onnxruntime`. It is @@ -135,6 +165,8 @@ ChorusConfig config = gpuChorusConfig(EnUkDictionary.en_uk()); This runs the model on gpu 0 with no other options set. More complex setups can be configured using the `withCudaOptions` method on ChorusConfig. +There seems to be little benefit using a gpu with the piper models, but inferences can be much faster for kokoro models. + ## Pauses Voices will add pauses if it encounters the following markdown symbols diff --git a/kokoro/src/test/java/org/pitest/voices/kokoro/KokoroTest.java b/kokoro/src/test/java/org/pitest/voices/kokoro/KokoroTest.java index f7a1d24..d57d78a 100644 --- a/kokoro/src/test/java/org/pitest/voices/kokoro/KokoroTest.java +++ b/kokoro/src/test/java/org/pitest/voices/kokoro/KokoroTest.java @@ -28,6 +28,16 @@ void wordsWithPauseSymbols() { } } + @Test + void camus() { + try (Chorus chorus = new Chorus(config)) { + Voice v1 = chorus.voice(KokoroModels.afSarah()) + .withSpeed(1.1f); + var audio = v1.say("Mother died today. Or maybe, yesterday; I can't be sure."); + Play.play(audio); + } + } + @Test void gibson() { try (Chorus chorus = new Chorus(config)) { diff --git a/model_downloader/src/test/java/org/pitest/voices/download/UsModelsTest.java b/model_downloader/src/test/java/org/pitest/voices/download/UsModelsTest.java index 44cf38e..8e252e3 100644 --- a/model_downloader/src/test/java/org/pitest/voices/download/UsModelsTest.java +++ b/model_downloader/src/test/java/org/pitest/voices/download/UsModelsTest.java @@ -15,7 +15,7 @@ class UsModelsTest { Path cache = ChorusConfig.chorusConfig(Dictionaries.empty()).base(); - + @Test void bryceMedium() throws IOException { var model = UsModels.bryceMedium();