diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh index e2d8487b..3d755f88 100755 --- a/.github/scripts/test-nodejs-addon-npm.sh +++ b/.github/scripts/test-nodejs-addon-npm.sh @@ -85,6 +85,13 @@ fi echo "----------tts----------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 +tar xf kokoro-en-v0_19.tar.bz2 +rm kokoro-en-v0_19.tar.bz2 + +node ./test_tts_non_streaming_kokoro_en.js +ls -lh *.wav + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc index 7baf3ce8..55d4adb2 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc @@ -53,6 +53,25 @@ static SherpaOnnxOfflineTtsMatchaModelConfig GetOfflineTtsMatchaModelConfig( return c; } +static SherpaOnnxOfflineTtsKokoroModelConfig GetOfflineTtsKokoroModelConfig( + Napi::Object obj) { + SherpaOnnxOfflineTtsKokoroModelConfig c; + memset(&c, 0, sizeof(c)); + + if (!obj.Has("kokoro") || !obj.Get("kokoro").IsObject()) { + return c; + } + + Napi::Object o = obj.Get("kokoro").As(); + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model); + SHERPA_ONNX_ASSIGN_ATTR_STR(voices, voices); + SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens); + SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir); + SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale); + + return c; +} + static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( Napi::Object obj) { SherpaOnnxOfflineTtsModelConfig c; @@ -66,6 +85,7 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig( c.vits = GetOfflineTtsVitsModelConfig(o); c.matcha = GetOfflineTtsMatchaModelConfig(o); + c.kokoro = GetOfflineTtsKokoroModelConfig(o); SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); @@ -180,6 +200,22 @@ static Napi::External CreateOfflineTtsWrapper( delete[] c.model.matcha.dict_dir; } + if (c.model.kokoro.model) { + delete[] c.model.kokoro.model; + } + + if (c.model.kokoro.voices) { + delete[] c.model.kokoro.voices; + } + + if (c.model.kokoro.tokens) { + delete[] c.model.kokoro.tokens; + } + + if (c.model.kokoro.data_dir) { + delete[] c.model.kokoro.data_dir; + } + if (c.model.provider) { delete[] c.model.provider; } diff --git a/nodejs-addon-examples/README.md b/nodejs-addon-examples/README.md index 2de8a214..ccb87a7a 100644 --- a/nodejs-addon-examples/README.md +++ b/nodejs-addon-examples/README.md @@ -133,6 +133,7 @@ The following tables list the examples in this folder. |File| Description| |---|---| +|[./test_tts_non_streaming_kokoro_en.js](./test_tts_non_streaming_kokoro_en.js)| Text-to-speech with a Kokoro English Model| |[./test_tts_non_streaming_matcha_icefall_en.js](./test_tts_non_streaming_matcha_icefall_en.js)| Text-to-speech with a [MatchaTTS English Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)| |[./test_tts_non_streaming_matcha_icefall_zhjs](./test_tts_non_streaming_matcha_icefall_zh.js)| Text-to-speech with a [MatchaTTS Chinese Model](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)| |[./test_tts_non_streaming_vits_piper_en.js](./test_tts_non_streaming_vits_piper_en.js)| Text-to-speech with a [piper](https://github.com/rhasspy/piper) English model| @@ -347,6 +348,16 @@ npm install naudiodon2 node ./test_vad_asr_non_streaming_sense_voice_microphone.js ``` +### Text-to-speech with Kokoro TTS models (English TTS) + +```bash +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 +tar xf kokoro-en-v0_19.tar.bz2 +rm kokoro-en-v0_19.tar.bz2 + +node ./test_tts_non_streaming_kokoro_en.js +``` + ### Text-to-speech with MatchaTTS models (English TTS) ```bash wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 diff --git a/nodejs-addon-examples/test_tts_non_streaming_kokoro_en.js b/nodejs-addon-examples/test_tts_non_streaming_kokoro_en.js new file mode 100644 index 00000000..84b03982 --- /dev/null +++ b/nodejs-addon-examples/test_tts_non_streaming_kokoro_en.js @@ -0,0 +1,47 @@ +// Copyright (c) 2025 Xiaomi Corporation +const sherpa_onnx = require('sherpa-onnx-node'); + +// please refer to +// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html +// to download model files +function createOfflineTts() { + const config = { + model: { + kokoro: { + model: './kokoro-en-v0_19/model.onnx', + voices: './kokoro-en-v0_19/voices.bin', + tokens: './kokoro-en-v0_19/tokens.txt', + dataDir: './kokoro-en-v0_19/espeak-ng-data', + }, + debug: true, + numThreads: 1, + provider: 'cpu', + }, + maxNumSentences: 1, + }; + return new sherpa_onnx.OfflineTts(config); +} + +const tts = createOfflineTts(); + +const text = + 'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.' + + +let start = Date.now(); +const audio = tts.generate({text: text, sid: 6, speed: 1.0}); +let stop = Date.now(); +const elapsed_seconds = (stop - start) / 1000; +const duration = audio.samples.length / audio.sampleRate; +const real_time_factor = elapsed_seconds / duration; +console.log('Wave duration', duration.toFixed(3), 'secodns') +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') +console.log( + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, + real_time_factor.toFixed(3)) + +const filename = 'test-kokoro-en-6.wav'; +sherpa_onnx.writeWave( + filename, {samples: audio.samples, sampleRate: audio.sampleRate}); + +console.log(`Saved to ${filename}`);