Add C# and JavaScript (wasm) API for MatchaTTS models (#1682)

2025-01-05 15:08:19 +08:00
parent 1ef9e5ee3a
commit 3eced3e7ee
26 changed files with 677 additions and 88 deletions
--- a/nodejs-examples/README.md
+++ b/nodejs-examples/README.md
@@ -42,9 +42,45 @@ node ./test-offline-speaker-diarization.js

 In the following, we demonstrate how to run text-to-speech.

-## ./test-offline-tts-en.js
+## ./test-offline-tts-matcha-zh.js

-[./test-offline-tts-en.js](./test-offline-tts-en.js) shows how to use
+[./test-offline-tts-matcha-zh.js](./test-offline-tts-matcha-zh.js) shows how to use
+[matcha-icefall-zh-baker](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker)
+for text-to-speech.
+
+You can use the following command to run it:
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
+tar xvf matcha-icefall-zh-baker.tar.bz2
+rm matcha-icefall-zh-baker.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test-offline-tts-matcha-zh.js
+```
+
+## ./test-offline-tts-matcha-en.js
+
+[./test-offline-tts-matcha-en.js](./test-offline-tts-matcha-en.js) shows how to use
+[matcha-icefall-en_US-ljspeech](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker)
+for text-to-speech.
+
+You can use the following command to run it:
+
+```bash
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
+tar xvf matcha-icefall-en_US-ljspeech.tar.bz2
+rm matcha-icefall-en_US-ljspeech.tar.bz2
+
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
+
+node ./test-offline-tts-matcha-en.js
+```
+
+## ./test-offline-tts-vits-en.js
+
+[./test-offline-tts-vits-en.js](./test-offline-tts-vits-en.js) shows how to use
 [vits-piper-en_US-amy-low.tar.bz2](https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2)
 for text-to-speech.

@@ -53,12 +89,12 @@ You can use the following command to run it:
 ```bash
 wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
 tar xvf vits-piper-en_US-amy-low.tar.bz2
-node ./test-offline-tts-en.js
+node ./test-offline-tts-vits-en.js
 ```

-## ./test-offline-tts-zh.js
+## ./test-offline-tts-vits-zh.js

-[./test-offline-tts-zh.js](./test-offline-tts-zh.js) shows how to use
+[./test-offline-tts-vits-zh.js](./test-offline-tts-vits-zh.js) shows how to use
 a VITS pretrained model
 [aishell3](https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3)
 for text-to-speech.
@@ -68,7 +104,7 @@ You can use the following command to run it:
 ```bash
 wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
 tar xvf vits-icefall-zh-aishell3.tar.bz2
-node ./test-offline-tts-zh.js
+node ./test-offline-tts-vits-zh.js
 ```

 # Speech-to-text
--- a/nodejs-examples/test-offline-tts-matcha-en.js
+++ b/nodejs-examples/test-offline-tts-matcha-en.js
@@ -0,0 +1,40 @@
+// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+const sherpa_onnx = require('sherpa-onnx');
+
+function createOfflineTts() {
+  let offlineTtsMatchaModelConfig = {
+    acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx',
+    vocoder: './hifigan_v2.onnx',
+    lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt',
+    tokens: './matcha-icefall-en_US-ljspeech/tokens.txt',
+    dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data',
+
+    noiseScale: 0.667,
+    lengthScale: 1.0,
+  };
+  let offlineTtsModelConfig = {
+    offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
+    numThreads: 1,
+    debug: 1,
+    provider: 'cpu',
+  };
+
+  let offlineTtsConfig = {
+    offlineTtsModelConfig: offlineTtsModelConfig,
+    maxNumSentences: 1,
+  };
+
+  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
+}
+
+const tts = createOfflineTts();
+const speakerId = 0;
+const speed = 1.0;
+const text =
+    'Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.'
+
+const audio = tts.generate({text: text, sid: speakerId, speed: speed});
+tts.save('./test-matcha-en.wav', audio);
+console.log('Saved to test-matcha-en.wav successfully.');
+tts.free();
--- a/nodejs-examples/test-offline-tts-matcha-zh.js
+++ b/nodejs-examples/test-offline-tts-matcha-zh.js
@@ -0,0 +1,41 @@
+// Copyright (c)  2025  Xiaomi Corporation (authors: Fangjun Kuang)
+
+const sherpa_onnx = require('sherpa-onnx');
+
+function createOfflineTts() {
+  let offlineTtsMatchaModelConfig = {
+    acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx',
+    vocoder: './hifigan_v2.onnx',
+    lexicon: './matcha-icefall-zh-baker/lexicon.txt',
+    tokens: './matcha-icefall-zh-baker/tokens.txt',
+    dictDir: './matcha-icefall-zh-baker/dict',
+    noiseScale: 0.667,
+    lengthScale: 1.0,
+  };
+  let offlineTtsModelConfig = {
+    offlineTtsMatchaModelConfig: offlineTtsMatchaModelConfig,
+    numThreads: 1,
+    debug: 1,
+    provider: 'cpu',
+  };
+
+  let offlineTtsConfig = {
+    offlineTtsModelConfig: offlineTtsModelConfig,
+    maxNumSentences: 1,
+    ruleFsts:
+        './matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst',
+  };
+
+  return sherpa_onnx.createOfflineTts(offlineTtsConfig);
+}
+
+const tts = createOfflineTts();
+const speakerId = 0;
+const speed = 1.0;
+const text =
+    '当夜幕降临，星光点点，伴随着微风拂面，我在静谧中感受着时光的流转，思念如涟漪荡漾，梦境如画卷展开，我与自然融为一体，沉静在这片宁静的美丽之中，感受着生命的奇迹与温柔. 某某银行的副行长和一些行政领导表示，他们去过长江和长白山; 经济不断增长。2024年12月31号，拨打110或者18920240511。123456块钱。'
+
+const audio = tts.generate({text: text, sid: speakerId, speed: speed});
+tts.save('./test-matcha-zh.wav', audio);
+console.log('Saved to test-matcha-zh.wav successfully.');
+tts.free();
--- a/nodejs-examples/test-offline-tts-vits-en.js
+++ b/nodejs-examples/test-offline-tts-vits-en.js
@@ -37,7 +37,7 @@ const audio = tts.generate({
  speed: speed
 });

-tts.save('./test-en.wav', audio);
-console.log('Saved to test-en.wav successfully.');
+tts.save('./test-vits-en.wav', audio);
+console.log('Saved to test-vits-en.wav successfully.');

 tts.free();
--- a/nodejs-examples/test-offline-tts-vits-zh.js
+++ b/nodejs-examples/test-offline-tts-vits-zh.js
@@ -34,6 +34,6 @@ const speakerId = 66;
 const speed = 1.0;
 const audio = tts.generate(
    {text: '3年前中国总人口是1411778724人', sid: speakerId, speed: speed});
-tts.save('./test-zh.wav', audio);
-console.log('Saved to test-zh.wav successfully.');
+tts.save('./test-vits-zh.wav', audio);
+console.log('Saved to test-vits-zh.wav successfully.');
 tts.free();