diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index 4ede6514..ed5901e7 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -234,11 +234,13 @@ jobs: run: | cd ./java-api-examples + ./run-non-streaming-tts-kokoro-zh-en.sh ./run-non-streaming-tts-kokoro-en.sh ./run-non-streaming-tts-matcha-zh.sh ./run-non-streaming-tts-matcha-en.sh ls -lh + rm -rf kokoro-multi-* rm -rf kokoro-en-* rm -rf matcha-icefall-* diff --git a/java-api-examples/NonStreamingTtsKokoroZhEn.java b/java-api-examples/NonStreamingTtsKokoroZhEn.java new file mode 100644 index 00000000..b0282fce --- /dev/null +++ b/java-api-examples/NonStreamingTtsKokoroZhEn.java @@ -0,0 +1,64 @@ +// Copyright 2025 Xiaomi Corporation + +// This file shows how to use a Kokoro multi-lingual model +// to convert Chinese and English text to speech +import com.k2fsa.sherpa.onnx.*; + +public class NonStreamingTtsKokoroZhEn { + public static void main(String[] args) { + // please visit + // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html + // to download model files + String model = "./kokoro-multi-lang-v1_0/model.onnx"; + String voices = "./kokoro-multi-lang-v1_0/voices.bin"; + String tokens = "./kokoro-multi-lang-v1_0/tokens.txt"; + String dataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data"; + String dictDir = "./kokoro-multi-lang-v1_0/dict"; + String lexicon = + "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt"; + String text = + "中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki." + + " 你觉得中英文说的如何呢?"; + + OfflineTtsKokoroModelConfig kokoroModelConfig = + OfflineTtsKokoroModelConfig.builder() + .setModel(model) + .setVoices(voices) + .setTokens(tokens) + .setDataDir(dataDir) + .setDictDir(dictDir) + .setLexicon(lexicon) + .build(); + + OfflineTtsModelConfig modelConfig = + OfflineTtsModelConfig.builder() + .setKokoro(kokoroModelConfig) + .setNumThreads(2) + .setDebug(true) + .build(); + + OfflineTtsConfig config = OfflineTtsConfig.builder().setModel(modelConfig).build(); + OfflineTts tts = new OfflineTts(config); + + int sid = 0; // this model has 53 speakers. You can use sid in the range 0-52 + float speed = 1.0f; + long start = System.currentTimeMillis(); + GeneratedAudio audio = tts.generate(text, sid, speed); + long stop = System.currentTimeMillis(); + + float timeElapsedSeconds = (stop - start) / 1000.0f; + + float audioDuration = audio.getSamples().length / (float) audio.getSampleRate(); + float real_time_factor = timeElapsedSeconds / audioDuration; + + String waveFilename = "tts-kokoro-zh-en.wav"; + audio.save(waveFilename); + System.out.printf("-- elapsed : %.3f seconds\n", timeElapsedSeconds); + System.out.printf("-- audio duration: %.3f seconds\n", timeElapsedSeconds); + System.out.printf("-- real-time factor (RTF): %.3f\n", real_time_factor); + System.out.printf("-- text: %s\n", text); + System.out.printf("-- Saved to %s\n", waveFilename); + + tts.release(); + } +} diff --git a/java-api-examples/run-non-streaming-tts-kokoro-zh-en.sh b/java-api-examples/run-non-streaming-tts-kokoro-zh-en.sh new file mode 100755 index 00000000..a9da93a5 --- /dev/null +++ b/java-api-examples/run-non-streaming-tts-kokoro-zh-en.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +set -ex + +if [[ ! -f ../build/lib/libsherpa-onnx-jni.dylib && ! -f ../build/lib/libsherpa-onnx-jni.so ]]; then + mkdir -p ../build + pushd ../build + cmake \ + -DSHERPA_ONNX_ENABLE_PYTHON=OFF \ + -DSHERPA_ONNX_ENABLE_TESTS=OFF \ + -DSHERPA_ONNX_ENABLE_CHECK=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \ + -DSHERPA_ONNX_ENABLE_JNI=ON \ + .. + + make -j4 + ls -lh lib + popd +fi + +if [ ! -f ../sherpa-onnx/java-api/build/sherpa-onnx.jar ]; then + pushd ../sherpa-onnx/java-api + make + popd +fi + +# please visit +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html +# to download more models +if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 + tar xf kokoro-multi-lang-v1_0.tar.bz2 + rm kokoro-multi-lang-v1_0.tar.bz2 +fi + +java \ + -Djava.library.path=$PWD/../build/lib \ + -cp ../sherpa-onnx/java-api/build/sherpa-onnx.jar \ + NonStreamingTtsKokoroZhEn.java diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh index 02339f95..c815b6ad 100755 --- a/kotlin-api-examples/run.sh +++ b/kotlin-api-examples/run.sh @@ -115,6 +115,12 @@ function testTts() { curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx fi + if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 + tar xf kokoro-multi-lang-v1_0.tar.bz2 + rm kokoro-multi-lang-v1_0.tar.bz2 + fi + if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 tar xf kokoro-en-v0_19.tar.bz2 diff --git a/kotlin-api-examples/test_tts.kt b/kotlin-api-examples/test_tts.kt index d9637873..c387dcf7 100644 --- a/kotlin-api-examples/test_tts.kt +++ b/kotlin-api-examples/test_tts.kt @@ -3,10 +3,34 @@ package com.k2fsa.sherpa.onnx fun main() { testVits() testMatcha() - testKokoro() + testKokoroEn() + testKokoroZhEn() } -fun testKokoro() { +fun testKokoroZhEn() { + // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models + var config = OfflineTtsConfig( + model=OfflineTtsModelConfig( + kokoro=OfflineTtsKokoroModelConfig( + model="./kokoro-multi-lang-v1_0/model.onnx", + voices="./kokoro-multi-lang-v1_0/voices.bin", + tokens="./kokoro-multi-lang-v1_0/tokens.txt", + dataDir="./kokoro-multi-lang-v1_0/espeak-ng-data", + dictDir="./kokoro-multi-lang-v1_0/dict", + lexicon="./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt", + ), + numThreads=2, + debug=true, + ), + ) + val tts = OfflineTts(config=config) + val audio = tts.generateWithCallback(text="中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?", callback=::callback) + audio.save(filename="test-kokoro-zh-en.wav") + tts.release() + println("Saved to test-kokoro-zh-en.wav") +} + +fun testKokoroEn() { // see https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models var config = OfflineTtsConfig( model=OfflineTtsModelConfig( diff --git a/scripts/kokoro/v1.0/add_meta_data.py b/scripts/kokoro/v1.0/add_meta_data.py index 114dfe7b..eab4c4ff 100755 --- a/scripts/kokoro/v1.0/add_meta_data.py +++ b/scripts/kokoro/v1.0/add_meta_data.py @@ -27,7 +27,7 @@ def main(): meta_data = { "model_type": "kokoro", - "language": "English", + "language": "multi-lang, e.g., English, Chinese", "has_espeak": 1, "sample_rate": 24000, "version": 2, diff --git a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java index 4088acfd..67fbf5ea 100644 --- a/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java +++ b/sherpa-onnx/java-api/src/com/k2fsa/sherpa/onnx/OfflineTtsKokoroModelConfig.java @@ -5,14 +5,18 @@ public class OfflineTtsKokoroModelConfig { private final String model; private final String voices; private final String tokens; + private final String lexicon; private final String dataDir; + private final String dictDir; private final float lengthScale; private OfflineTtsKokoroModelConfig(Builder builder) { this.model = builder.model; this.voices = builder.voices; this.tokens = builder.tokens; + this.lexicon = builder.lexicon; this.dataDir = builder.dataDir; + this.dictDir = builder.dictDir; this.lengthScale = builder.lengthScale; } @@ -45,7 +49,9 @@ public class OfflineTtsKokoroModelConfig { private String model = ""; private String voices = ""; private String tokens = ""; + private String lexicon = ""; private String dataDir = ""; + private String dictDir = ""; private float lengthScale = 1.0f; public OfflineTtsKokoroModelConfig build() { @@ -67,11 +73,21 @@ public class OfflineTtsKokoroModelConfig { return this; } + public Builder setLexicon(String lexicon) { + this.lexicon = lexicon; + return this; + } + public Builder setDataDir(String dataDir) { this.dataDir = dataDir; return this; } + public Builder setDictDir(String dictDir) { + this.dictDir = dictDir; + return this; + } + public Builder setLengthScale(float lengthScale) { this.lengthScale = lengthScale; return this; diff --git a/sherpa-onnx/jni/offline-tts.cc b/sherpa-onnx/jni/offline-tts.cc index e80b90a5..8d8f9029 100644 --- a/sherpa-onnx/jni/offline-tts.cc +++ b/sherpa-onnx/jni/offline-tts.cc @@ -137,12 +137,24 @@ static OfflineTtsConfig GetOfflineTtsConfig(JNIEnv *env, jobject config) { ans.model.kokoro.tokens = p; env->ReleaseStringUTFChars(s, p); + fid = env->GetFieldID(kokoro_cls, "lexicon", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(kokoro, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model.kokoro.lexicon = p; + env->ReleaseStringUTFChars(s, p); + fid = env->GetFieldID(kokoro_cls, "dataDir", "Ljava/lang/String;"); s = (jstring)env->GetObjectField(kokoro, fid); p = env->GetStringUTFChars(s, nullptr); ans.model.kokoro.data_dir = p; env->ReleaseStringUTFChars(s, p); + fid = env->GetFieldID(kokoro_cls, "dictDir", "Ljava/lang/String;"); + s = (jstring)env->GetObjectField(kokoro, fid); + p = env->GetStringUTFChars(s, nullptr); + ans.model.kokoro.dict_dir = p; + env->ReleaseStringUTFChars(s, p); + fid = env->GetFieldID(kokoro_cls, "lengthScale", "F"); ans.model.kokoro.length_scale = env->GetFloatField(kokoro, fid); diff --git a/sherpa-onnx/kotlin-api/Tts.kt b/sherpa-onnx/kotlin-api/Tts.kt index ce85a04d..1dc1ebab 100644 --- a/sherpa-onnx/kotlin-api/Tts.kt +++ b/sherpa-onnx/kotlin-api/Tts.kt @@ -30,6 +30,8 @@ data class OfflineTtsKokoroModelConfig( var voices: String = "", var tokens: String = "", var dataDir: String = "", + var lexicon: String = "", + var dictDir: String = "", var lengthScale: Float = 1.0f, ) @@ -254,6 +256,8 @@ fun getOfflineTtsConfig( voices = "$modelDir/$voices", tokens = "$modelDir/tokens.txt", dataDir = dataDir, + lexicon = if ("," in lexicon) lexicon else "$modelDir/$lexicon", + dictDir = dictDir, ) } else { OfflineTtsKokoroModelConfig()