diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index 8d17eae4..536310af 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -10,12 +10,21 @@ ls -lh ls -lh node_modules # offline tts +# +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 +tar xf kokoro-multi-lang-v1_0.tar.bz2 +rm kokoro-multi-lang-v1_0.tar.bz2 + +node ./test-offline-tts-kokoro-zh-en.js +ls -lh *.wav +rm -rf kokoro-multi-lang-v1_0 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 tar xf kokoro-en-v0_19.tar.bz2 rm kokoro-en-v0_19.tar.bz2 node ./test-offline-tts-kokoro-en.js +rm -rf kokoro-en-v0_19 ls -lh diff --git a/nodejs-examples/test-offline-tts-kokoro-zh-en.js b/nodejs-examples/test-offline-tts-kokoro-zh-en.js new file mode 100644 index 00000000..244219f2 --- /dev/null +++ b/nodejs-examples/test-offline-tts-kokoro-zh-en.js @@ -0,0 +1,40 @@ +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) + +const sherpa_onnx = require('sherpa-onnx'); + +function createOfflineTts() { + let offlineTtsKokoroModelConfig = { + model: './kokoro-multi-lang-v1_0/model.onnx', + voices: './kokoro-multi-lang-v1_0/voices.bin', + tokens: './kokoro-multi-lang-v1_0/tokens.txt', + dataDir: './kokoro-multi-lang-v1_0/espeak-ng-data', + dictDir: './kokoro-multi-lang-v1_0/dict', + lexicon: + './kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt', + lengthScale: 1.0, + }; + let offlineTtsModelConfig = { + offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig, + numThreads: 1, + debug: 1, + provider: 'cpu', + }; + + let offlineTtsConfig = { + offlineTtsModelConfig: offlineTtsModelConfig, + maxNumSentences: 1, + }; + + return sherpa_onnx.createOfflineTts(offlineTtsConfig); +} + +const tts = createOfflineTts(); +const speakerId = 49; +const speed = 1.0; +const text = + '中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?' + +const audio = tts.generate({text: text, sid: speakerId, speed: speed}); +tts.save('./test-kokoro-zh-en-49.wav', audio); +console.log('Saved to test-kokoro-zh-en-49.wav successfully.'); +tts.free(); diff --git a/wasm/tts/sherpa-onnx-tts.js b/wasm/tts/sherpa-onnx-tts.js index 833ee936..e08c9e8d 100644 --- a/wasm/tts/sherpa-onnx-tts.js +++ b/wasm/tts/sherpa-onnx-tts.js @@ -141,12 +141,15 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { const voicesLen = Module.lengthBytesUTF8(config.voices) + 1; const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1; const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1; + const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1; + const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1; - const n = modelLen + voicesLen + tokensLen + dataDirLen; + const n = + modelLen + voicesLen + tokensLen + dataDirLen + dictDirLen + lexiconLen; const buffer = Module._malloc(n); - const len = 5 * 4; + const len = 7 * 4; const ptr = Module._malloc(len); let offset = 0; @@ -162,6 +165,12 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen); offset += dataDirLen; + Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen); + offset += dictDirLen; + + Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen); + offset += lexiconLen; + offset = 0; Module.setValue(ptr, buffer + offset, 'i8*'); offset += modelLen; @@ -177,6 +186,12 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) { Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float'); + Module.setValue(ptr + 20, buffer + offset, 'i8*'); + offset += dictDirLen; + + Module.setValue(ptr + 24, buffer + offset, 'i8*'); + offset += lexiconLen; + return { buffer: buffer, ptr: ptr, len: len, } @@ -216,6 +231,8 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) { tokens: '', lengthScale: 1.0, dataDir: '', + dictDir: '', + lexicon: '', }; } @@ -382,6 +399,8 @@ function createOfflineTts(Module, myConfig) { tokens: '', dataDir: '', lengthScale: 1.0, + dictDir: '', + lexicon: '', }; const offlineTtsModelConfig = { diff --git a/wasm/tts/sherpa-onnx-wasm-main-tts.cc b/wasm/tts/sherpa-onnx-wasm-main-tts.cc index f2cd42c5..07bf4d42 100644 --- a/wasm/tts/sherpa-onnx-wasm-main-tts.cc +++ b/wasm/tts/sherpa-onnx-wasm-main-tts.cc @@ -15,7 +15,7 @@ extern "C" { static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, ""); static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, ""); -static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 5 * 4, ""); +static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 7 * 4, ""); static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) == sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) + @@ -56,6 +56,8 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) { fprintf(stdout, "tokens: %s\n", kokoro->tokens); fprintf(stdout, "data_dir: %s\n", kokoro->data_dir); fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale); + fprintf(stdout, "dict_dir: %s\n", kokoro->dict_dir); + fprintf(stdout, "lexicon: %s\n", kokoro->lexicon); fprintf(stdout, "----------tts model config----------\n"); fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);