Add C# and JavaScript (wasm) API for MatchaTTS models (#1682)

This commit is contained in:
Fangjun Kuang
2025-01-05 15:08:19 +08:00
committed by GitHub
parent 1ef9e5ee3a
commit 3eced3e7ee
26 changed files with 677 additions and 88 deletions

View File

@@ -42,9 +42,45 @@ node ./test-offline-speaker-diarization.js
In the following, we demonstrate how to run text-to-speech.
## ./test-offline-tts-en.js
## ./test-offline-tts-matcha-zh.js
[./test-offline-tts-en.js](./test-offline-tts-en.js) shows how to use
[./test-offline-tts-matcha-zh.js](./test-offline-tts-matcha-zh.js) shows how to use
[matcha-icefall-zh-baker](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)
for text-to-speech.
You can use the following command to run it:
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
node ./test-offline-tts-matcha-zh.js
```
## ./test-offline-tts-matcha-en.js
[./test-offline-tts-matcha-en.js](./test-offline-tts-matcha-en.js) shows how to use
[matcha-icefall-en_US-ljspeech](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)
for text-to-speech.
You can use the following command to run it:
```bash
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
node ./test-offline-tts-matcha-en.js
```
## ./test-offline-tts-vits-en.js
[./test-offline-tts-vits-en.js](./test-offline-tts-vits-en.js) shows how to use
[vits-piper-en_US-amy-low.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2)
for text-to-speech.
@@ -53,12 +89,12 @@ You can use the following command to run it:
```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xvf vits-piper-en_US-amy-low.tar.bz2
node ./test-offline-tts-en.js
node ./test-offline-tts-vits-en.js
```
## ./test-offline-tts-zh.js
## ./test-offline-tts-vits-zh.js
[./test-offline-tts-zh.js](./test-offline-tts-zh.js) shows how to use
[./test-offline-tts-vits-zh.js](./test-offline-tts-vits-zh.js) shows how to use
a VITS pretrained model
[aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3)
for text-to-speech.
@@ -68,7 +104,7 @@ You can use the following command to run it:
```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
tar xvf vits-icefall-zh-aishell3.tar.bz2
node ./test-offline-tts-zh.js
node ./test-offline-tts-vits-zh.js
```
# Speech-to-text

View File

@@ -0,0 +1,40 @@
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
const sherpa_onnx = require('sherpa-onnx');
function createOfflineTts() {
let offlineTtsMatchaModelConfig = {
acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
vocoder: './hifigan_v2.onnx',
lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
noiseScale: 0.667,
lengthScale: 1.0,
};
let offlineTtsModelConfig = {
offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
numThreads: 1,
debug: 1,
provider: 'cpu',
};
let offlineTtsConfig = {
offlineTtsModelConfig: offlineTtsModelConfig,
maxNumSentences: 1,
};
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}
const tts = createOfflineTts();
const speakerId = 0;
const speed = 1.0;
const text =
'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
const audio = tts.generate({text: text, sid: speakerId, speed: speed});
tts.save('./test-matcha-en.wav', audio);
console.log('Saved to test-matcha-en.wav successfully.');
tts.free();

View File

@@ -0,0 +1,41 @@
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
const sherpa_onnx = require('sherpa-onnx');
function createOfflineTts() {
let offlineTtsMatchaModelConfig = {
acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
vocoder: './hifigan_v2.onnx',
lexicon: './matcha-icefall-zh-baker/lexicon.txt',
tokens: './matcha-icefall-zh-baker/tokens.txt',
dictDir: './matcha-icefall-zh-baker/dict',
noiseScale: 0.667,
lengthScale: 1.0,
};
let offlineTtsModelConfig = {
offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
numThreads: 1,
debug: 1,
provider: 'cpu',
};
let offlineTtsConfig = {
offlineTtsModelConfig: offlineTtsModelConfig,
maxNumSentences: 1,
ruleFsts:
'./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
};
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}
const tts = createOfflineTts();
const speakerId = 0;
const speed = 1.0;
const text =
'当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号拨打110或者18920240511。123456块钱。'
const audio = tts.generate({text: text, sid: speakerId, speed: speed});
tts.save('./test-matcha-zh.wav', audio);
console.log('Saved to test-matcha-zh.wav successfully.');
tts.free();

View File

@@ -37,7 +37,7 @@ const audio = tts.generate({
speed: speed
});
tts.save('./test-en.wav', audio);
console.log('Saved to test-en.wav successfully.');
tts.save('./test-vits-en.wav', audio);
console.log('Saved to test-vits-en.wav successfully.');
tts.free();

View File

@@ -34,6 +34,6 @@ const speakerId = 66;
const speed = 1.0;
const audio = tts.generate(
{text: '3年前中国总人口是1411778724人', sid: speakerId, speed: speed});
tts.save('./test-zh.wav', audio);
console.log('Saved to test-zh.wav successfully.');
tts.save('./test-vits-zh.wav', audio);
console.log('Saved to test-vits-zh.wav successfully.');
tts.free();