Add JavaScript API (WebAssembly) for Kokoro TTS 1.0 (#1809)
This commit is contained in:
9
.github/scripts/test-nodejs-npm.sh
vendored
9
.github/scripts/test-nodejs-npm.sh
vendored
@@ -10,12 +10,21 @@ ls -lh
|
|||||||
ls -lh node_modules
|
ls -lh node_modules
|
||||||
|
|
||||||
# offline tts
|
# offline tts
|
||||||
|
#
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
|
||||||
|
tar xf kokoro-multi-lang-v1_0.tar.bz2
|
||||||
|
rm kokoro-multi-lang-v1_0.tar.bz2
|
||||||
|
|
||||||
|
node ./test-offline-tts-kokoro-zh-en.js
|
||||||
|
ls -lh *.wav
|
||||||
|
rm -rf kokoro-multi-lang-v1_0
|
||||||
|
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2
|
||||||
tar xf kokoro-en-v0_19.tar.bz2
|
tar xf kokoro-en-v0_19.tar.bz2
|
||||||
rm kokoro-en-v0_19.tar.bz2
|
rm kokoro-en-v0_19.tar.bz2
|
||||||
|
|
||||||
node ./test-offline-tts-kokoro-en.js
|
node ./test-offline-tts-kokoro-en.js
|
||||||
|
rm -rf kokoro-en-v0_19
|
||||||
|
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
|||||||
40
nodejs-examples/test-offline-tts-kokoro-zh-en.js
Normal file
40
nodejs-examples/test-offline-tts-kokoro-zh-en.js
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createOfflineTts() {
|
||||||
|
let offlineTtsKokoroModelConfig = {
|
||||||
|
model: './kokoro-multi-lang-v1_0/model.onnx',
|
||||||
|
voices: './kokoro-multi-lang-v1_0/voices.bin',
|
||||||
|
tokens: './kokoro-multi-lang-v1_0/tokens.txt',
|
||||||
|
dataDir: './kokoro-multi-lang-v1_0/espeak-ng-data',
|
||||||
|
dictDir: './kokoro-multi-lang-v1_0/dict',
|
||||||
|
lexicon:
|
||||||
|
'./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt',
|
||||||
|
lengthScale: 1.0,
|
||||||
|
};
|
||||||
|
let offlineTtsModelConfig = {
|
||||||
|
offlineTtsKokoroModelConfig: offlineTtsKokoroModelConfig,
|
||||||
|
numThreads: 1,
|
||||||
|
debug: 1,
|
||||||
|
provider: 'cpu',
|
||||||
|
};
|
||||||
|
|
||||||
|
let offlineTtsConfig = {
|
||||||
|
offlineTtsModelConfig: offlineTtsModelConfig,
|
||||||
|
maxNumSentences: 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
|
||||||
|
}
|
||||||
|
|
||||||
|
const tts = createOfflineTts();
|
||||||
|
const speakerId = 49;
|
||||||
|
const speed = 1.0;
|
||||||
|
const text =
|
||||||
|
'中英文语音合成测试。This is generated by next generation Kaldi using Kokoro without Misaki. 你觉得中英文说的如何呢?'
|
||||||
|
|
||||||
|
const audio = tts.generate({text: text, sid: speakerId, speed: speed});
|
||||||
|
tts.save('./test-kokoro-zh-en-49.wav', audio);
|
||||||
|
console.log('Saved to test-kokoro-zh-en-49.wav successfully.');
|
||||||
|
tts.free();
|
||||||
@@ -141,12 +141,15 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
|
|||||||
const voicesLen = Module.lengthBytesUTF8(config.voices) + 1;
|
const voicesLen = Module.lengthBytesUTF8(config.voices) + 1;
|
||||||
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
|
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
|
||||||
const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
|
const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
|
||||||
|
const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
|
||||||
|
const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
|
||||||
|
|
||||||
const n = modelLen + voicesLen + tokensLen + dataDirLen;
|
const n =
|
||||||
|
modelLen + voicesLen + tokensLen + dataDirLen + dictDirLen + lexiconLen;
|
||||||
|
|
||||||
const buffer = Module._malloc(n);
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
const len = 5 * 4;
|
const len = 7 * 4;
|
||||||
const ptr = Module._malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
let offset = 0;
|
let offset = 0;
|
||||||
@@ -162,6 +165,12 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
|
|||||||
Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
|
Module.stringToUTF8(config.dataDir || '', buffer + offset, dataDirLen);
|
||||||
offset += dataDirLen;
|
offset += dataDirLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
|
||||||
|
offset += dictDirLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
|
||||||
|
offset += lexiconLen;
|
||||||
|
|
||||||
offset = 0;
|
offset = 0;
|
||||||
Module.setValue(ptr, buffer + offset, 'i8*');
|
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||||
offset += modelLen;
|
offset += modelLen;
|
||||||
@@ -177,6 +186,12 @@ function initSherpaOnnxOfflineTtsKokoroModelConfig(config, Module) {
|
|||||||
|
|
||||||
Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float');
|
Module.setValue(ptr + 16, config.lengthScale || 1.0, 'float');
|
||||||
|
|
||||||
|
Module.setValue(ptr + 20, buffer + offset, 'i8*');
|
||||||
|
offset += dictDirLen;
|
||||||
|
|
||||||
|
Module.setValue(ptr + 24, buffer + offset, 'i8*');
|
||||||
|
offset += lexiconLen;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len,
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
}
|
}
|
||||||
@@ -216,6 +231,8 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
|
|||||||
tokens: '',
|
tokens: '',
|
||||||
lengthScale: 1.0,
|
lengthScale: 1.0,
|
||||||
dataDir: '',
|
dataDir: '',
|
||||||
|
dictDir: '',
|
||||||
|
lexicon: '',
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -382,6 +399,8 @@ function createOfflineTts(Module, myConfig) {
|
|||||||
tokens: '',
|
tokens: '',
|
||||||
dataDir: '',
|
dataDir: '',
|
||||||
lengthScale: 1.0,
|
lengthScale: 1.0,
|
||||||
|
dictDir: '',
|
||||||
|
lexicon: '',
|
||||||
};
|
};
|
||||||
|
|
||||||
const offlineTtsModelConfig = {
|
const offlineTtsModelConfig = {
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ extern "C" {
|
|||||||
|
|
||||||
static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, "");
|
static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, "");
|
||||||
static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, "");
|
static_assert(sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) == 8 * 4, "");
|
||||||
static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 5 * 4, "");
|
static_assert(sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) == 7 * 4, "");
|
||||||
static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
|
static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
|
||||||
sizeof(SherpaOnnxOfflineTtsVitsModelConfig) +
|
sizeof(SherpaOnnxOfflineTtsVitsModelConfig) +
|
||||||
sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) +
|
sizeof(SherpaOnnxOfflineTtsMatchaModelConfig) +
|
||||||
@@ -56,6 +56,8 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
|
|||||||
fprintf(stdout, "tokens: %s\n", kokoro->tokens);
|
fprintf(stdout, "tokens: %s\n", kokoro->tokens);
|
||||||
fprintf(stdout, "data_dir: %s\n", kokoro->data_dir);
|
fprintf(stdout, "data_dir: %s\n", kokoro->data_dir);
|
||||||
fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale);
|
fprintf(stdout, "length scale: %.3f\n", kokoro->length_scale);
|
||||||
|
fprintf(stdout, "dict_dir: %s\n", kokoro->dict_dir);
|
||||||
|
fprintf(stdout, "lexicon: %s\n", kokoro->lexicon);
|
||||||
|
|
||||||
fprintf(stdout, "----------tts model config----------\n");
|
fprintf(stdout, "----------tts model config----------\n");
|
||||||
fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
|
fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
|
||||||
|
|||||||
Reference in New Issue
Block a user