Add C# and JavaScript (wasm) API for MatchaTTS models (#1682)
This commit is contained in:
@@ -42,9 +42,45 @@ node ./test-offline-speaker-diarization.js
|
||||
|
||||
In the following, we demonstrate how to run text-to-speech.
|
||||
|
||||
## ./test-offline-tts-en.js
|
||||
## ./test-offline-tts-matcha-zh.js
|
||||
|
||||
[./test-offline-tts-en.js](./test-offline-tts-en.js) shows how to use
|
||||
[./test-offline-tts-matcha-zh.js](./test-offline-tts-matcha-zh.js) shows how to use
|
||||
[matcha-icefall-zh-baker](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)
|
||||
for text-to-speech.
|
||||
|
||||
You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
|
||||
tar xvf matcha-icefall-zh-baker.tar.bz2
|
||||
rm matcha-icefall-zh-baker.tar.bz2
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||
|
||||
node ./test-offline-tts-matcha-zh.js
|
||||
```
|
||||
|
||||
## ./test-offline-tts-matcha-en.js
|
||||
|
||||
[./test-offline-tts-matcha-en.js](./test-offline-tts-matcha-en.js) shows how to use
|
||||
[matcha-icefall-en_US-ljspeech](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)
|
||||
for text-to-speech.
|
||||
|
||||
You can use the following command to run it:
|
||||
|
||||
```bash
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||
|
||||
node ./test-offline-tts-matcha-en.js
|
||||
```
|
||||
|
||||
## ./test-offline-tts-vits-en.js
|
||||
|
||||
[./test-offline-tts-vits-en.js](./test-offline-tts-vits-en.js) shows how to use
|
||||
[vits-piper-en_US-amy-low.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2)
|
||||
for text-to-speech.
|
||||
|
||||
@@ -53,12 +89,12 @@ You can use the following command to run it:
|
||||
```bash
|
||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
|
||||
tar xvf vits-piper-en_US-amy-low.tar.bz2
|
||||
node ./test-offline-tts-en.js
|
||||
node ./test-offline-tts-vits-en.js
|
||||
```
|
||||
|
||||
## ./test-offline-tts-zh.js
|
||||
## ./test-offline-tts-vits-zh.js
|
||||
|
||||
[./test-offline-tts-zh.js](./test-offline-tts-zh.js) shows how to use
|
||||
[./test-offline-tts-vits-zh.js](./test-offline-tts-vits-zh.js) shows how to use
|
||||
a VITS pretrained model
|
||||
[aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3)
|
||||
for text-to-speech.
|
||||
@@ -68,7 +104,7 @@ You can use the following command to run it:
|
||||
```bash
|
||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
||||
tar xvf vits-icefall-zh-aishell3.tar.bz2
|
||||
node ./test-offline-tts-zh.js
|
||||
node ./test-offline-tts-vits-zh.js
|
||||
```
|
||||
|
||||
# Speech-to-text
|
||||
|
||||
40
nodejs-examples/test-offline-tts-matcha-en.js
Normal file
40
nodejs-examples/test-offline-tts-matcha-en.js
Normal file
@@ -0,0 +1,40 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx');
|
||||
|
||||
function createOfflineTts() {
|
||||
let offlineTtsMatchaModelConfig = {
|
||||
acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
|
||||
vocoder: './hifigan_v2.onnx',
|
||||
lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
|
||||
tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
|
||||
dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
|
||||
|
||||
noiseScale: 0.667,
|
||||
lengthScale: 1.0,
|
||||
};
|
||||
let offlineTtsModelConfig = {
|
||||
offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
|
||||
numThreads: 1,
|
||||
debug: 1,
|
||||
provider: 'cpu',
|
||||
};
|
||||
|
||||
let offlineTtsConfig = {
|
||||
offlineTtsModelConfig: offlineTtsModelConfig,
|
||||
maxNumSentences: 1,
|
||||
};
|
||||
|
||||
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
|
||||
}
|
||||
|
||||
const tts = createOfflineTts();
|
||||
const speakerId = 0;
|
||||
const speed = 1.0;
|
||||
const text =
|
||||
'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
|
||||
|
||||
const audio = tts.generate({text: text, sid: speakerId, speed: speed});
|
||||
tts.save('./test-matcha-en.wav', audio);
|
||||
console.log('Saved to test-matcha-en.wav successfully.');
|
||||
tts.free();
|
||||
41
nodejs-examples/test-offline-tts-matcha-zh.js
Normal file
41
nodejs-examples/test-offline-tts-matcha-zh.js
Normal file
@@ -0,0 +1,41 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
const sherpa_onnx = require('sherpa-onnx');
|
||||
|
||||
function createOfflineTts() {
|
||||
let offlineTtsMatchaModelConfig = {
|
||||
acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
|
||||
vocoder: './hifigan_v2.onnx',
|
||||
lexicon: './matcha-icefall-zh-baker/lexicon.txt',
|
||||
tokens: './matcha-icefall-zh-baker/tokens.txt',
|
||||
dictDir: './matcha-icefall-zh-baker/dict',
|
||||
noiseScale: 0.667,
|
||||
lengthScale: 1.0,
|
||||
};
|
||||
let offlineTtsModelConfig = {
|
||||
offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
|
||||
numThreads: 1,
|
||||
debug: 1,
|
||||
provider: 'cpu',
|
||||
};
|
||||
|
||||
let offlineTtsConfig = {
|
||||
offlineTtsModelConfig: offlineTtsModelConfig,
|
||||
maxNumSentences: 1,
|
||||
ruleFsts:
|
||||
'./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
|
||||
};
|
||||
|
||||
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
|
||||
}
|
||||
|
||||
const tts = createOfflineTts();
|
||||
const speakerId = 0;
|
||||
const speed = 1.0;
|
||||
const text =
|
||||
'当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。'
|
||||
|
||||
const audio = tts.generate({text: text, sid: speakerId, speed: speed});
|
||||
tts.save('./test-matcha-zh.wav', audio);
|
||||
console.log('Saved to test-matcha-zh.wav successfully.');
|
||||
tts.free();
|
||||
@@ -37,7 +37,7 @@ const audio = tts.generate({
|
||||
speed: speed
|
||||
});
|
||||
|
||||
tts.save('./test-en.wav', audio);
|
||||
console.log('Saved to test-en.wav successfully.');
|
||||
tts.save('./test-vits-en.wav', audio);
|
||||
console.log('Saved to test-vits-en.wav successfully.');
|
||||
|
||||
tts.free();
|
||||
@@ -34,6 +34,6 @@ const speakerId = 66;
|
||||
const speed = 1.0;
|
||||
const audio = tts.generate(
|
||||
{text: '3年前中国总人口是1411778724人', sid: speakerId, speed: speed});
|
||||
tts.save('./test-zh.wav', audio);
|
||||
console.log('Saved to test-zh.wav successfully.');
|
||||
tts.save('./test-vits-zh.wav', audio);
|
||||
console.log('Saved to test-vits-zh.wav successfully.');
|
||||
tts.free();
|
||||
Reference in New Issue
Block a user