From 0aacf02dd8ac6d9625c3b94dfb3d8e837e61b0e3 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 17 Mar 2025 17:05:15 +0800 Subject: [PATCH] Add C++ runtime for vocos (#2014) --- .github/scripts/test-nodejs-addon-npm.sh | 8 +- .github/scripts/test-nodejs-npm.sh | 9 +- .github/scripts/test-offline-tts.sh | 22 +- .github/scripts/test-python.sh | 12 +- .github/workflows/c-api.yaml | 8 +- .github/workflows/cxx-api.yaml | 8 +- .github/workflows/run-java-test.yaml | 2 +- .../com/k2fsa/sherpa/onnx/MainActivity.kt | 4 +- .../k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt | 4 +- c-api-examples/matcha-tts-en-c-api.c | 4 +- c-api-examples/matcha-tts-zh-c-api.c | 4 +- cxx-api-examples/matcha-tts-en-cxx-api.cc | 4 +- cxx-api-examples/matcha-tts-zh-cxx-api.cc | 4 +- dart-api-examples/tts/run-matcha-en.sh | 6 +- dart-api-examples/tts/run-matcha-zh.sh | 8 +- dotnet-examples/offline-tts-play/Program.cs | 8 +- .../offline-tts-play/run-matcha-en.sh | 6 +- .../offline-tts-play/run-matcha-zh.sh | 6 +- dotnet-examples/offline-tts/Program.cs | 8 +- dotnet-examples/offline-tts/run-matcha-en.sh | 6 +- dotnet-examples/offline-tts/run-matcha-zh.sh | 6 +- .../non-streaming-tts/run-matcha-en.sh | 6 +- .../non-streaming-tts/run-matcha-zh.sh | 6 +- .../ets/workers/NonStreamingTtsWorker.ets | 4 +- .../SherpaOnnxTts/ViewModel.swift | 2 +- .../NonStreamingTtsMatchaEn.java | 2 +- .../NonStreamingTtsMatchaZh.java | 2 +- .../run-non-streaming-tts-matcha-en.sh | 4 +- .../run-non-streaming-tts-matcha-zh.sh | 4 +- kotlin-api-examples/run.sh | 4 +- kotlin-api-examples/test_tts.kt | 2 +- .../NonStreamingTextToSpeechDlg.cpp | 16 +- nodejs-addon-examples/README.md | 4 +- ...est_tts_non_streaming_matcha_icefall_en.js | 2 +- ...est_tts_non_streaming_matcha_icefall_zh.js | 2 +- nodejs-examples/README.md | 4 +- nodejs-examples/test-offline-tts-matcha-en.js | 2 +- nodejs-examples/test-offline-tts-matcha-zh.js | 2 +- .../tts/matcha-en-playback.pas | 2 +- pascal-api-examples/tts/matcha-en.pas | 2 +- .../tts/matcha-zh-playback.pas | 2 +- pascal-api-examples/tts/matcha-zh.pas | 2 +- .../tts/run-matcha-en-playback.sh | 4 +- pascal-api-examples/tts/run-matcha-en.sh | 4 +- .../tts/run-matcha-zh-playback.sh | 4 +- pascal-api-examples/tts/run-matcha-zh.sh | 4 +- python-api-examples/offline-tts-play.py | 8 +- python-api-examples/offline-tts.py | 8 +- scripts/apk/generate-tts-apk-script.py | 4 +- scripts/matcha-tts/fa-en/run.sh | 8 +- sherpa-onnx/csrc/CMakeLists.txt | 2 + sherpa-onnx/csrc/hifigan-vocoder.cc | 16 +- sherpa-onnx/csrc/hifigan-vocoder.h | 8 +- sherpa-onnx/csrc/offline-tts-matcha-impl.h | 27 +-- sherpa-onnx/csrc/vocoder.cc | 120 +++++++++++ sherpa-onnx/csrc/vocoder.h | 35 ++++ sherpa-onnx/csrc/vocos-vocoder.cc | 194 ++++++++++++++++++ sherpa-onnx/csrc/vocos-vocoder.h | 39 ++++ swift-api-examples/run-tts-matcha-en.sh | 4 +- swift-api-examples/run-tts-matcha-zh.sh | 4 +- swift-api-examples/tts-matcha-en.swift | 2 +- swift-api-examples/tts-matcha-zh.swift | 2 +- 62 files changed, 558 insertions(+), 162 deletions(-) create mode 100644 sherpa-onnx/csrc/vocoder.cc create mode 100644 sherpa-onnx/csrc/vocoder.h create mode 100644 sherpa-onnx/csrc/vocos-vocoder.cc create mode 100644 sherpa-onnx/csrc/vocos-vocoder.h diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh index bcb74ee8..7fde8726 100755 --- a/.github/scripts/test-nodejs-addon-npm.sh +++ b/.github/scripts/test-nodejs-addon-npm.sh @@ -121,19 +121,19 @@ rm -rf kokoro-en-v0_19 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2 tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx node ./test_tts_non_streaming_matcha_icefall_en.js -rm hifigan_v2.onnx +rm vocos-22khz-univ.onnx rm -rf matcha-icefall-en_US-ljspeech curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2 tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx node ./test_tts_non_streaming_matcha_icefall_zh.js -rm hifigan_v2.onnx +rm vocos-22khz-univ.onnx rm -rf matcha-icefall-zh-baker ls -lh *.wav diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index 89a9df21..0dd17982 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -42,12 +42,13 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx node ./test-offline-tts-matcha-zh.js rm -rf matcha-icefall-zh-baker -rm hifigan_v2.onnx +rm vocos-22khz-univ.onnx + echo "---" @@ -55,12 +56,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx node ./test-offline-tts-matcha-en.js rm -rf matcha-icefall-en_US-ljspeech -rm hifigan_v2.onnx +rm vocos-22khz-univ.onnx echo "---" diff --git a/.github/scripts/test-offline-tts.sh b/.github/scripts/test-offline-tts.sh index f063ca0d..36cbd1ba 100755 --- a/.github/scripts/test-offline-tts.sh +++ b/.github/scripts/test-offline-tts.sh @@ -50,11 +50,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-tts-fa_en-musa.tar.bz2 rm matcha-tts-fa_en-musa.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx + $EXE \ --matcha-acoustic-model=./matcha-tts-fa_en-musa/model.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-tokens=./matcha-tts-fa_en-musa/tokens.txt \ --matcha-data-dir=./matcha-tts-fa_en-musa/espeak-ng-data \ --output-filename=./tts/test-matcha-fa-en-musa.wav \ @@ -62,7 +63,7 @@ $EXE \ "How are you doing today? این یک نمونه ی تست فارسی است. This is a test." rm -rf matcha-tts-fa_en-musa -rm hifigan_v2.onnx +rm vocos-22khz-univ.onnx ls -lh tts/*.wav log "------------------------------------------------------------" @@ -72,11 +73,12 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx + $EXE \ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --num-threads=2 \ @@ -84,7 +86,7 @@ $EXE \ --debug=1 \ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." -rm hifigan_v2.onnx +rm vocos-22khz-univ.onnx rm -rf matcha-icefall-en_US-ljspeech ls -lh tts/*.wav @@ -95,11 +97,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx $EXE \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ @@ -110,7 +112,7 @@ $EXE \ $EXE \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ @@ -119,7 +121,7 @@ $EXE \ --output-filename=./tts/matcha-baker-zh-2.wav \ "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" -rm hifigan_v2.onnx +rm vocos-22khz-univ.onnx rm -rf matcha-icefall-zh-baker log "------------------------------------------------------------" diff --git a/.github/scripts/test-python.sh b/.github/scripts/test-python.sh index 1b8eaa51..8a132ade 100755 --- a/.github/scripts/test-python.sh +++ b/.github/scripts/test-python.sh @@ -320,18 +320,18 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx python3 ./python-api-examples/offline-tts.py \ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --output-filename=./tts/test-matcha-ljspeech-en.wav \ --num-threads=2 \ "Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar." -rm hifigan_v2.onnx +rm vocos-22khz-univ.onnx rm -rf matcha-icefall-en_US-ljspeech log "matcha-baker-zh test" @@ -340,11 +340,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx python3 ./python-api-examples/offline-tts.py \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ @@ -353,7 +353,7 @@ python3 ./python-api-examples/offline-tts.py \ "某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。" rm -rf matcha-icefall-zh-baker -rm hifigan_v2.onnx +rm vocos-22khz-univ.onnx log "vits-ljs test" diff --git a/.github/workflows/c-api.yaml b/.github/workflows/c-api.yaml index 02fb392d..1d0accb3 100644 --- a/.github/workflows/c-api.yaml +++ b/.github/workflows/c-api.yaml @@ -228,7 +228,7 @@ jobs: tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH @@ -237,7 +237,7 @@ jobs: rm ./matcha-tts-zh-c-api rm -rf matcha-icefall-* - rm hifigan_v2.onnx + rm vocos-22khz-univ.onnx - name: Test Matcha TTS (en) shell: bash @@ -252,7 +252,7 @@ jobs: tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH @@ -261,7 +261,7 @@ jobs: rm ./matcha-tts-en-c-api rm -rf matcha-icefall-* - rm hifigan_v2.onnx + rm vocos-22khz-univ.onnx - uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/cxx-api.yaml b/.github/workflows/cxx-api.yaml index 00ea936c..519b5113 100644 --- a/.github/workflows/cxx-api.yaml +++ b/.github/workflows/cxx-api.yaml @@ -237,7 +237,7 @@ jobs: tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH @@ -245,7 +245,7 @@ jobs: ./matcha-tts-zh-cxx-api rm -rf matcha-icefall-* - rm hifigan_v2.onnx + rm vocos-22khz-univ.onnx rm matcha-tts-zh-cxx-api - name: Test Matcha TTS (en) @@ -262,7 +262,7 @@ jobs: tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH @@ -271,7 +271,7 @@ jobs: rm matcha-tts-en-cxx-api rm -rf matcha-icefall-* - rm hifigan_v2.onnx + rm vocos-22khz-univ.onnx - uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/run-java-test.yaml b/.github/workflows/run-java-test.yaml index dae47b1a..4b8dda24 100644 --- a/.github/workflows/run-java-test.yaml +++ b/.github/workflows/run-java-test.yaml @@ -265,7 +265,7 @@ jobs: rm -rf kokoro-en-* rm -rf matcha-icefall-* - rm hifigan_v2.onnx + rm vocos-22khz-univ.onnx ./run-non-streaming-tts-piper-en.sh rm -rf vits-piper-* diff --git a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt index 99e49e78..b6254c03 100644 --- a/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt +++ b/android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx/MainActivity.kt @@ -262,7 +262,7 @@ class MainActivity : AppCompatActivity() { // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker // modelDir = "matcha-icefall-zh-baker" // acousticModelName = "model-steps-3.onnx" - // vocoder = "hifigan_v2.onnx" + // vocoder = "vocos-22khz-univ.onnx" // lexicon = "lexicon.txt" // dictDir = "matcha-icefall-zh-baker/dict" @@ -271,7 +271,7 @@ class MainActivity : AppCompatActivity() { // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker // modelDir = "matcha-icefall-en_US-ljspeech" // acousticModelName = "model-steps-3.onnx" - // vocoder = "hifigan_v2.onnx" + // vocoder = "vocos-22khz-univ.onnx" // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data" // Example 9 diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt index 2ae628c2..6c576167 100644 --- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt +++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt @@ -131,7 +131,7 @@ object TtsEngine { // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker // modelDir = "matcha-icefall-zh-baker" // acousticModelName = "model-steps-3.onnx" - // vocoder = "hifigan_v2.onnx" + // vocoder = "vocos-22khz-univ.onnx" // lexicon = "lexicon.txt" // dictDir = "matcha-icefall-zh-baker/dict" // lang = "zho" @@ -141,7 +141,7 @@ object TtsEngine { // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker // modelDir = "matcha-icefall-en_US-ljspeech" // acousticModelName = "model-steps-3.onnx" - // vocoder = "hifigan_v2.onnx" + // vocoder = "vocos-22khz-univ.onnx" // dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data" // lang = "eng" diff --git a/c-api-examples/matcha-tts-en-c-api.c b/c-api-examples/matcha-tts-en-c-api.c index 99b0a974..2a4a8150 100644 --- a/c-api-examples/matcha-tts-en-c-api.c +++ b/c-api-examples/matcha-tts-en-c-api.c @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx ./matcha-tts-en-c-api @@ -40,7 +40,7 @@ int32_t main(int32_t argc, char *argv[]) { config.model.matcha.acoustic_model = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; - config.model.matcha.vocoder = "./hifigan_v2.onnx"; + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx"; config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; diff --git a/c-api-examples/matcha-tts-zh-c-api.c b/c-api-examples/matcha-tts-zh-c-api.c index 9fb9f459..68cba285 100644 --- a/c-api-examples/matcha-tts-zh-c-api.c +++ b/c-api-examples/matcha-tts-zh-c-api.c @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx ./matcha-tts-zh-c-api @@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) { memset(&config, 0, sizeof(config)); config.model.matcha.acoustic_model = "./matcha-icefall-zh-baker/model-steps-3.onnx"; - config.model.matcha.vocoder = "./hifigan_v2.onnx"; + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx"; config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt"; config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict"; diff --git a/cxx-api-examples/matcha-tts-en-cxx-api.cc b/cxx-api-examples/matcha-tts-en-cxx-api.cc index cf9b3c69..c813e32d 100644 --- a/cxx-api-examples/matcha-tts-en-cxx-api.cc +++ b/cxx-api-examples/matcha-tts-en-cxx-api.cc @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx ./matcha-tts-en-cxx-api @@ -39,7 +39,7 @@ int32_t main(int32_t argc, char *argv[]) { config.model.matcha.acoustic_model = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; - config.model.matcha.vocoder = "./hifigan_v2.onnx"; + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx"; config.model.matcha.tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; diff --git a/cxx-api-examples/matcha-tts-zh-cxx-api.cc b/cxx-api-examples/matcha-tts-zh-cxx-api.cc index b0fcbab2..89e4d031 100644 --- a/cxx-api-examples/matcha-tts-zh-cxx-api.cc +++ b/cxx-api-examples/matcha-tts-zh-cxx-api.cc @@ -13,7 +13,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx ./matcha-tts-zh-cxx-api @@ -37,7 +37,7 @@ int32_t main(int32_t argc, char *argv[]) { OfflineTtsConfig config; config.model.matcha.acoustic_model = "./matcha-icefall-zh-baker/model-steps-3.onnx"; - config.model.matcha.vocoder = "./hifigan_v2.onnx"; + config.model.matcha.vocoder = "./vocos-22khz-univ.onnx"; config.model.matcha.lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; config.model.matcha.tokens = "./matcha-icefall-zh-baker/tokens.txt"; config.model.matcha.dict_dir = "./matcha-icefall-zh-baker/dict"; diff --git a/dart-api-examples/tts/run-matcha-en.sh b/dart-api-examples/tts/run-matcha-en.sh index f727ee5c..57560168 100755 --- a/dart-api-examples/tts/run-matcha-en.sh +++ b/dart-api-examples/tts/run-matcha-en.sh @@ -14,14 +14,14 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then rm matcha-icefall-en_US-ljspeech.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi dart run \ ./bin/matcha-en.dart \ --acoustic-model ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --vocoder ./hifigan_v2.onnx \ + --vocoder ./vocos-22khz-univ.onnx \ --tokens ./matcha-icefall-en_US-ljspeech/tokens.txt \ --data-dir ./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --sid 0 \ diff --git a/dart-api-examples/tts/run-matcha-zh.sh b/dart-api-examples/tts/run-matcha-zh.sh index be95a827..213bc213 100755 --- a/dart-api-examples/tts/run-matcha-zh.sh +++ b/dart-api-examples/tts/run-matcha-zh.sh @@ -13,14 +13,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then rm matcha-icefall-zh-baker.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi dart run \ ./bin/matcha-zh.dart \ --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ - --vocoder ./hifigan_v2.onnx \ + --vocoder ./vocos-22khz-univ.onnx \ --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ --tokens ./matcha-icefall-zh-baker/tokens.txt \ --dict-dir ./matcha-icefall-zh-baker/dict \ @@ -33,7 +33,7 @@ dart run \ dart run \ ./bin/matcha-zh.dart \ --acoustic-model ./matcha-icefall-zh-baker/model-steps-3.onnx \ - --vocoder ./hifigan_v2.onnx \ + --vocoder ./vocos-22khz-univ.onnx \ --lexicon ./matcha-icefall-zh-baker/lexicon.txt \ --tokens ./matcha-icefall-zh-baker/tokens.txt \ --dict-dir ./matcha-icefall-zh-baker/dict \ diff --git a/dotnet-examples/offline-tts-play/Program.cs b/dotnet-examples/offline-tts-play/Program.cs index 543a50cd..dc14f792 100644 --- a/dotnet-examples/offline-tts-play/Program.cs +++ b/dotnet-examples/offline-tts-play/Program.cs @@ -92,11 +92,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx dotnet run \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --tokens=./matcha-icefall-zh-baker/tokens.txt \ --dict-dir=./matcha-icefall-zh-baker/dict \ @@ -111,11 +111,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx dotnet run \ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --tokens=./matcha-icefall-zh-baker/tokens.txt \ --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --debug=1 \ diff --git a/dotnet-examples/offline-tts-play/run-matcha-en.sh b/dotnet-examples/offline-tts-play/run-matcha-en.sh index 0f7caa21..4a40f5bd 100755 --- a/dotnet-examples/offline-tts-play/run-matcha-en.sh +++ b/dotnet-examples/offline-tts-play/run-matcha-en.sh @@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then rm matcha-icefall-en_US-ljspeech.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi dotnet run \ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --debug=1 \ diff --git a/dotnet-examples/offline-tts-play/run-matcha-zh.sh b/dotnet-examples/offline-tts-play/run-matcha-zh.sh index e3b34268..ab02ea78 100755 --- a/dotnet-examples/offline-tts-play/run-matcha-zh.sh +++ b/dotnet-examples/offline-tts-play/run-matcha-zh.sh @@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then rm matcha-icefall-zh-baker.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi dotnet run \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --tokens=./matcha-icefall-zh-baker/tokens.txt \ --dict-dir=./matcha-icefall-zh-baker/dict \ diff --git a/dotnet-examples/offline-tts/Program.cs b/dotnet-examples/offline-tts/Program.cs index 21f90c52..222d068b 100644 --- a/dotnet-examples/offline-tts/Program.cs +++ b/dotnet-examples/offline-tts/Program.cs @@ -85,11 +85,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx dotnet run \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --tokens=./matcha-icefall-zh-baker/tokens.txt \ --dict-dir=./matcha-icefall-zh-baker/dict \ @@ -104,11 +104,11 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx dotnet run \ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --tokens=./matcha-icefall-zh-baker/tokens.txt \ --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --debug=1 \ diff --git a/dotnet-examples/offline-tts/run-matcha-en.sh b/dotnet-examples/offline-tts/run-matcha-en.sh index 0f7caa21..4a40f5bd 100755 --- a/dotnet-examples/offline-tts/run-matcha-en.sh +++ b/dotnet-examples/offline-tts/run-matcha-en.sh @@ -12,13 +12,13 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then rm matcha-icefall-en_US-ljspeech.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi dotnet run \ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ --data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --debug=1 \ diff --git a/dotnet-examples/offline-tts/run-matcha-zh.sh b/dotnet-examples/offline-tts/run-matcha-zh.sh index e3b34268..ab02ea78 100755 --- a/dotnet-examples/offline-tts/run-matcha-zh.sh +++ b/dotnet-examples/offline-tts/run-matcha-zh.sh @@ -10,14 +10,14 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then rm matcha-icefall-zh-baker.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi dotnet run \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --tokens=./matcha-icefall-zh-baker/tokens.txt \ --dict-dir=./matcha-icefall-zh-baker/dict \ diff --git a/go-api-examples/non-streaming-tts/run-matcha-en.sh b/go-api-examples/non-streaming-tts/run-matcha-en.sh index f0932da5..3285640c 100755 --- a/go-api-examples/non-streaming-tts/run-matcha-en.sh +++ b/go-api-examples/non-streaming-tts/run-matcha-en.sh @@ -12,8 +12,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then rm matcha-icefall-en_US-ljspeech.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi go mod tidy @@ -21,7 +21,7 @@ go build ./non-streaming-tts \ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --debug=1 \ diff --git a/go-api-examples/non-streaming-tts/run-matcha-zh.sh b/go-api-examples/non-streaming-tts/run-matcha-zh.sh index ef4165d0..733c9235 100755 --- a/go-api-examples/non-streaming-tts/run-matcha-zh.sh +++ b/go-api-examples/non-streaming-tts/run-matcha-zh.sh @@ -11,8 +11,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then rm matcha-icefall-zh-baker.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi go mod tidy @@ -20,7 +20,7 @@ go build ./non-streaming-tts \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ --matcha-dict-dir=./matcha-icefall-zh-baker/dict \ diff --git a/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets b/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets index 99751bab..57f1da6b 100644 --- a/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets +++ b/harmony-os/SherpaOnnxTts/entry/src/main/ets/workers/NonStreamingTtsWorker.ets @@ -159,7 +159,7 @@ function initTts(context: Context): OfflineTts { // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker // modelDir = 'matcha-icefall-zh-baker'; // acousticModelName = 'model-steps-3.onnx'; - // vocoder = 'hifigan_v2.onnx'; + // vocoder = 'vocos-22khz-univ.onnx'; // lexicon = 'lexicon.txt'; // dictDir = 'dict'; // ruleFsts = `date.fst,phone.fst,number.fst`; @@ -169,7 +169,7 @@ function initTts(context: Context): OfflineTts { // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker // modelDir = 'matcha-icefall-en_US-ljspeech'; // acousticModelName = 'model-steps-3.onnx'; - // vocoder = 'hifigan_v2.onnx'; + // vocoder = 'vocos-22khz-univ.onnx'; // dataDir = 'espeak-ng-data'; // Example 10 diff --git a/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift b/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift index b2000c3b..07800dd8 100644 --- a/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift +++ b/ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift @@ -131,7 +131,7 @@ func getTtsFor_matcha_icefall_zh_baker() -> SherpaOnnxOfflineTtsWrapper { // please see https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker let acousticModel = getResource("model-steps-3", "onnx") - let vocoder = getResource("hifigan_v2", "onnx") + let vocoder = getResource("vocos-22khz-univ", "onnx") let tokens = getResource("tokens", "txt") let lexicon = getResource("lexicon", "txt") diff --git a/java-api-examples/NonStreamingTtsMatchaEn.java b/java-api-examples/NonStreamingTtsMatchaEn.java index bda41f06..55d6a0d6 100644 --- a/java-api-examples/NonStreamingTtsMatchaEn.java +++ b/java-api-examples/NonStreamingTtsMatchaEn.java @@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaEn { // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker // to download model files String acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx"; - String vocoder = "./hifigan_v2.onnx"; + String vocoder = "./vocos-22khz-univ.onnx"; String tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt"; String dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data"; String text = diff --git a/java-api-examples/NonStreamingTtsMatchaZh.java b/java-api-examples/NonStreamingTtsMatchaZh.java index dec24dbb..c2125147 100644 --- a/java-api-examples/NonStreamingTtsMatchaZh.java +++ b/java-api-examples/NonStreamingTtsMatchaZh.java @@ -10,7 +10,7 @@ public class NonStreamingTtsMatchaZh { // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker // to download model files String acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx"; - String vocoder = "./hifigan_v2.onnx"; + String vocoder = "./vocos-22khz-univ.onnx"; String tokens = "./matcha-icefall-zh-baker/tokens.txt"; String lexicon = "./matcha-icefall-zh-baker/lexicon.txt"; String dictDir = "./matcha-icefall-zh-baker/dict"; diff --git a/java-api-examples/run-non-streaming-tts-matcha-en.sh b/java-api-examples/run-non-streaming-tts-matcha-en.sh index ba03beaf..b34b9668 100755 --- a/java-api-examples/run-non-streaming-tts-matcha-en.sh +++ b/java-api-examples/run-non-streaming-tts-matcha-en.sh @@ -35,8 +35,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then rm matcha-icefall-en_US-ljspeech.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi java \ diff --git a/java-api-examples/run-non-streaming-tts-matcha-zh.sh b/java-api-examples/run-non-streaming-tts-matcha-zh.sh index a339e298..24229e18 100755 --- a/java-api-examples/run-non-streaming-tts-matcha-zh.sh +++ b/java-api-examples/run-non-streaming-tts-matcha-zh.sh @@ -34,8 +34,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then rm matcha-icefall-zh-baker.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi java \ diff --git a/kotlin-api-examples/run.sh b/kotlin-api-examples/run.sh index f0614ae5..7f197714 100755 --- a/kotlin-api-examples/run.sh +++ b/kotlin-api-examples/run.sh @@ -111,8 +111,8 @@ function testTts() { rm matcha-icefall-zh-baker.tar.bz2 fi - if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx + if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then diff --git a/kotlin-api-examples/test_tts.kt b/kotlin-api-examples/test_tts.kt index c387dcf7..6aa2ae36 100644 --- a/kotlin-api-examples/test_tts.kt +++ b/kotlin-api-examples/test_tts.kt @@ -58,7 +58,7 @@ fun testMatcha() { model=OfflineTtsModelConfig( matcha=OfflineTtsMatchaModelConfig( acousticModel="./matcha-icefall-zh-baker/model-steps-3.onnx", - vocoder="./hifigan_v2.onnx", + vocoder="./vocos-22khz-univ.onnx", tokens="./matcha-icefall-zh-baker/tokens.txt", lexicon="./matcha-icefall-zh-baker/lexicon.txt", dictDir="./matcha-icefall-zh-baker/dict", diff --git a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp index 98d98b50..a53171c9 100644 --- a/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp +++ b/mfc-examples/NonStreamingTextToSpeech/NonStreamingTextToSpeechDlg.cpp @@ -513,9 +513,9 @@ void CNonStreamingTextToSpeechDlg::Init() { "(c) Switch to the directory matcha-icefall-zh-baker\r\n" "(d) Rename model-steps-3.onnx to model.onnx\r\n" "(e) Download a vocoder model from \r\n" - " https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx\r\n" - "(f) Rename hifigan_v2.onnx to hifigan.onnx\r\n" - "(g) Remember to put hifigan.onnx in the directory matcha-icefall-zh-baker\r\n" + " https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx\r\n" + "(f) Rename vocos-22khz-univ.onnx to vocos.onnx\r\n" + "(g) Remember to put vocos.onnx in the directory matcha-icefall-zh-baker\r\n" "(h) Copy the current exe to the directory matcha-icefall-zh-baker\r\n" "(i) Done! You can now run the exe in the directory matcha-icefall-zh-baker\r\n"; @@ -540,10 +540,16 @@ void CNonStreamingTextToSpeechDlg::Init() { config.model.kokoro.dict_dir = "./dict"; config.model.kokoro.lexicon = "./lexicon-us-en.txt,./lexicon-zh.txt"; } - } else if (Exists("./hifigan.onnx")) { + } else if (Exists("./hifigan.onnx") || Exists("./vocos.onnx")) { // it is a matcha tts model config.model.matcha.acoustic_model = "./model.onnx"; - config.model.matcha.vocoder = "./hifigan.onnx"; + + if (Exists("./hifigan.onnx")) { + config.model.matcha.vocoder = "./hifigan.onnx"; + } else if (Exists("./vocos.onnx")) { + config.model.matcha.vocoder = "./vocos.onnx"; + } + config.model.matcha.tokens = "./tokens.txt"; if (Exists("./espeak-ng-data/phontab")) { diff --git a/nodejs-addon-examples/README.md b/nodejs-addon-examples/README.md index 0c15ec13..1fecaa04 100644 --- a/nodejs-addon-examples/README.md +++ b/nodejs-addon-examples/README.md @@ -400,7 +400,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx node ./test_tts_non_streaming_matcha_icefall_en.js ``` @@ -411,7 +411,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx node ./test_tts_non_streaming_matcha_icefall_zh.js ``` diff --git a/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_en.js b/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_en.js index 4fb4e60a..91914f72 100644 --- a/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_en.js +++ b/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_en.js @@ -9,7 +9,7 @@ function createOfflineTts() { model: { matcha: { acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx', - vocoder: './hifigan_v2.onnx', + vocoder: './vocos-22khz-univ.onnx', lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt', tokens: './matcha-icefall-en_US-ljspeech/tokens.txt', dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data', diff --git a/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_zh.js b/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_zh.js index 0b870745..ff244592 100644 --- a/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_zh.js +++ b/nodejs-addon-examples/test_tts_non_streaming_matcha_icefall_zh.js @@ -9,7 +9,7 @@ function createOfflineTts() { model: { matcha: { acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx', - vocoder: './hifigan_v2.onnx', + vocoder: './vocos-22khz-univ.onnx', lexicon: './matcha-icefall-zh-baker/lexicon.txt', tokens: './matcha-icefall-zh-baker/tokens.txt', dictDir: './matcha-icefall-zh-baker/dict', diff --git a/nodejs-examples/README.md b/nodejs-examples/README.md index 3e8b860b..9b89dad3 100644 --- a/nodejs-examples/README.md +++ b/nodejs-examples/README.md @@ -83,7 +83,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx node ./test-offline-tts-matcha-zh.js ``` @@ -101,7 +101,7 @@ wget https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-i tar xf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx node ./test-offline-tts-matcha-en.js ``` diff --git a/nodejs-examples/test-offline-tts-matcha-en.js b/nodejs-examples/test-offline-tts-matcha-en.js index c2f98204..f5d1e6de 100644 --- a/nodejs-examples/test-offline-tts-matcha-en.js +++ b/nodejs-examples/test-offline-tts-matcha-en.js @@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx'); function createOfflineTts() { let offlineTtsMatchaModelConfig = { acousticModel: './matcha-icefall-en_US-ljspeech/model-steps-3.onnx', - vocoder: './hifigan_v2.onnx', + vocoder: './vocos-22khz-univ.onnx', lexicon: './matcha-icefall-en_US-ljspeech/lexicon.txt', tokens: './matcha-icefall-en_US-ljspeech/tokens.txt', dataDir: './matcha-icefall-en_US-ljspeech/espeak-ng-data', diff --git a/nodejs-examples/test-offline-tts-matcha-zh.js b/nodejs-examples/test-offline-tts-matcha-zh.js index 21c6a087..b7d16cca 100644 --- a/nodejs-examples/test-offline-tts-matcha-zh.js +++ b/nodejs-examples/test-offline-tts-matcha-zh.js @@ -5,7 +5,7 @@ const sherpa_onnx = require('sherpa-onnx'); function createOfflineTts() { let offlineTtsMatchaModelConfig = { acousticModel: './matcha-icefall-zh-baker/model-steps-3.onnx', - vocoder: './hifigan_v2.onnx', + vocoder: './vocos-22khz-univ.onnx', lexicon: './matcha-icefall-zh-baker/lexicon.txt', tokens: './matcha-icefall-zh-baker/tokens.txt', dictDir: './matcha-icefall-zh-baker/dict', diff --git a/pascal-api-examples/tts/matcha-en-playback.pas b/pascal-api-examples/tts/matcha-en-playback.pas index 7a6e8c75..0cf07be4 100644 --- a/pascal-api-examples/tts/matcha-en-playback.pas +++ b/pascal-api-examples/tts/matcha-en-playback.pas @@ -115,7 +115,7 @@ var Config: TSherpaOnnxOfflineTtsConfig; begin Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx'; Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; Config.Model.NumThreads := 1; diff --git a/pascal-api-examples/tts/matcha-en.pas b/pascal-api-examples/tts/matcha-en.pas index f818d53e..fa8f7cca 100644 --- a/pascal-api-examples/tts/matcha-en.pas +++ b/pascal-api-examples/tts/matcha-en.pas @@ -21,7 +21,7 @@ var Config: TSherpaOnnxOfflineTtsConfig; begin Config.Model.Matcha.AcousticModel := './matcha-icefall-en_US-ljspeech/model-steps-3.onnx'; - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx'; Config.Model.Matcha.Tokens := './matcha-icefall-en_US-ljspeech/tokens.txt'; Config.Model.Matcha.DataDir := './matcha-icefall-en_US-ljspeech/espeak-ng-data'; Config.Model.NumThreads := 1; diff --git a/pascal-api-examples/tts/matcha-zh-playback.pas b/pascal-api-examples/tts/matcha-zh-playback.pas index 05f94ba9..27d41ad9 100644 --- a/pascal-api-examples/tts/matcha-zh-playback.pas +++ b/pascal-api-examples/tts/matcha-zh-playback.pas @@ -115,7 +115,7 @@ var Config: TSherpaOnnxOfflineTtsConfig; begin Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx'; Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; diff --git a/pascal-api-examples/tts/matcha-zh.pas b/pascal-api-examples/tts/matcha-zh.pas index c9410095..f122a706 100644 --- a/pascal-api-examples/tts/matcha-zh.pas +++ b/pascal-api-examples/tts/matcha-zh.pas @@ -21,7 +21,7 @@ var Config: TSherpaOnnxOfflineTtsConfig; begin Config.Model.Matcha.AcousticModel := './matcha-icefall-zh-baker/model-steps-3.onnx'; - Config.Model.Matcha.Vocoder := './hifigan_v2.onnx'; + Config.Model.Matcha.Vocoder := './vocos-22khz-univ.onnx'; Config.Model.Matcha.Lexicon := './matcha-icefall-zh-baker/lexicon.txt'; Config.Model.Matcha.Tokens := './matcha-icefall-zh-baker/tokens.txt'; Config.Model.Matcha.DictDir := './matcha-icefall-zh-baker/dict'; diff --git a/pascal-api-examples/tts/run-matcha-en-playback.sh b/pascal-api-examples/tts/run-matcha-en-playback.sh index ffa677e9..d50ddc51 100755 --- a/pascal-api-examples/tts/run-matcha-en-playback.sh +++ b/pascal-api-examples/tts/run-matcha-en-playback.sh @@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then rm matcha-icefall-en_US-ljspeech.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi fpc \ diff --git a/pascal-api-examples/tts/run-matcha-en.sh b/pascal-api-examples/tts/run-matcha-en.sh index 084e672b..85dbba3a 100755 --- a/pascal-api-examples/tts/run-matcha-en.sh +++ b/pascal-api-examples/tts/run-matcha-en.sh @@ -33,8 +33,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then rm matcha-icefall-en_US-ljspeech.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi fpc \ diff --git a/pascal-api-examples/tts/run-matcha-zh-playback.sh b/pascal-api-examples/tts/run-matcha-zh-playback.sh index e12ad22a..8cf3675f 100755 --- a/pascal-api-examples/tts/run-matcha-zh-playback.sh +++ b/pascal-api-examples/tts/run-matcha-zh-playback.sh @@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then rm matcha-icefall-zh-baker.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi fpc \ diff --git a/pascal-api-examples/tts/run-matcha-zh.sh b/pascal-api-examples/tts/run-matcha-zh.sh index a7d83d37..33000534 100755 --- a/pascal-api-examples/tts/run-matcha-zh.sh +++ b/pascal-api-examples/tts/run-matcha-zh.sh @@ -32,8 +32,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then rm matcha-icefall-zh-baker.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi fpc \ diff --git a/python-api-examples/offline-tts-play.py b/python-api-examples/offline-tts-play.py index 8ec419ac..3457f86f 100755 --- a/python-api-examples/offline-tts-play.py +++ b/python-api-examples/offline-tts-play.py @@ -59,11 +59,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx python3 ./python-api-examples/offline-tts-play.py \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ @@ -77,11 +77,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx python3 ./python-api-examples/offline-tts-play.py \ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --output-filename=./test-matcha-ljspeech-en.wav \ diff --git a/python-api-examples/offline-tts.py b/python-api-examples/offline-tts.py index c4e63b4f..10650ab4 100755 --- a/python-api-examples/offline-tts.py +++ b/python-api-examples/offline-tts.py @@ -60,11 +60,11 @@ curl -O -SL https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-zh-baker.tar.bz2 rm matcha-icefall-zh-baker.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx python3 ./python-api-examples/offline-tts.py \ --matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \ --matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \ --tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \ @@ -78,11 +78,11 @@ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/m tar xvf matcha-icefall-en_US-ljspeech.tar.bz2 rm matcha-icefall-en_US-ljspeech.tar.bz2 -curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx python3 ./python-api-examples/offline-tts.py \ --matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \ - --matcha-vocoder=./hifigan_v2.onnx \ + --matcha-vocoder=./vocos-22khz-univ.onnx \ --matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \ --matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \ --output-filename=./test-matcha-ljspeech-en.wav \ diff --git a/scripts/apk/generate-tts-apk-script.py b/scripts/apk/generate-tts-apk-script.py index 338c06a1..af5469a4 100755 --- a/scripts/apk/generate-tts-apk-script.py +++ b/scripts/apk/generate-tts-apk-script.py @@ -395,7 +395,7 @@ def get_matcha_models() -> List[TtsModel]: s = [f"{m.model_dir}/{r}" for r in rule_fsts] m.rule_fsts = ",".join(s) m.dict_dir = m.model_dir + "/dict" - m.vocoder = "hifigan_v2.onnx" + m.vocoder = "vocos-22khz-univ.onnx" english_persian_models = [ TtsModel( @@ -416,7 +416,7 @@ def get_matcha_models() -> List[TtsModel]: ] for m in english_persian_models: m.data_dir = f"{m.model_dir}/espeak-ng-data" - m.vocoder = "hifigan_v2.onnx" + m.vocoder = "vocos-22khz-univ.onnx" return chinese_models + english_persian_models diff --git a/scripts/matcha-tts/fa-en/run.sh b/scripts/matcha-tts/fa-en/run.sh index b445f2b6..073c5a65 100755 --- a/scripts/matcha-tts/fa-en/run.sh +++ b/scripts/matcha-tts/fa-en/run.sh @@ -20,8 +20,8 @@ if [ ! -f male/tokens.txt ]; then curl -SL --output male/tokens.txt https://huggingface.co/mah92/Musa-FA_EN-Matcha-TTS-Model/resolve/main/tokens_sherpa_with_fa.txt fi -if [ ! -f hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi if [ ! -f .add-meta-data.done ]; then @@ -31,14 +31,14 @@ fi python3 ./test.py \ --am ./female/model.onnx \ - --vocoder ./hifigan_v2.onnx \ + --vocoder ./vocos-22khz-univ.onnx \ --tokens ./female/tokens.txt \ --text "This is a test. این یک نمونه ی تست فارسی است." \ --out-wav "./female-en-fa.wav" python3 ./test.py \ --am ./male/model.onnx \ - --vocoder ./hifigan_v2.onnx \ + --vocoder ./vocos-22khz-univ.onnx \ --tokens ./male/tokens.txt \ --text "This is a test. این یک نمونه ی تست فارسی است." \ --out-wav "./male-en-fa.wav" diff --git a/sherpa-onnx/csrc/CMakeLists.txt b/sherpa-onnx/csrc/CMakeLists.txt index fbe20792..20133a35 100644 --- a/sherpa-onnx/csrc/CMakeLists.txt +++ b/sherpa-onnx/csrc/CMakeLists.txt @@ -183,6 +183,8 @@ if(SHERPA_ONNX_ENABLE_TTS) offline-tts-vits-model.cc offline-tts.cc piper-phonemize-lexicon.cc + vocoder.cc + vocos-vocoder.cc ) endif() diff --git a/sherpa-onnx/csrc/hifigan-vocoder.cc b/sherpa-onnx/csrc/hifigan-vocoder.cc index d6edae71..6703449f 100644 --- a/sherpa-onnx/csrc/hifigan-vocoder.cc +++ b/sherpa-onnx/csrc/hifigan-vocoder.cc @@ -45,11 +45,21 @@ class HifiganVocoder::Impl { Init(buf.data(), buf.size()); } - Ort::Value Run(Ort::Value mel) const { + std::vector Run(Ort::Value mel) const { auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1, output_names_ptr_.data(), output_names_ptr_.size()); - return std::move(out[0]); + std::vector audio_shape = + out[0].GetTensorTypeAndShapeInfo().GetShape(); + + int64_t total = 1; + // The output shape may be (1, 1, total) or (1, total) or (total,) + for (auto i : audio_shape) { + total *= i; + } + + const float *p = out[0].GetTensorData(); + return {p, p + total}; } private: @@ -88,7 +98,7 @@ HifiganVocoder::HifiganVocoder(Manager *mgr, int32_t num_threads, HifiganVocoder::~HifiganVocoder() = default; -Ort::Value HifiganVocoder::Run(Ort::Value mel) const { +std::vector HifiganVocoder::Run(Ort::Value mel) const { return impl_->Run(std::move(mel)); } diff --git a/sherpa-onnx/csrc/hifigan-vocoder.h b/sherpa-onnx/csrc/hifigan-vocoder.h index 3d10a242..f491c3f5 100644 --- a/sherpa-onnx/csrc/hifigan-vocoder.h +++ b/sherpa-onnx/csrc/hifigan-vocoder.h @@ -7,14 +7,16 @@ #include #include +#include #include "onnxruntime_cxx_api.h" // NOLINT +#include "sherpa-onnx/csrc/vocoder.h" namespace sherpa_onnx { -class HifiganVocoder { +class HifiganVocoder : public Vocoder { public: - ~HifiganVocoder(); + ~HifiganVocoder() override; HifiganVocoder(int32_t num_threads, const std::string &provider, const std::string &model); @@ -26,7 +28,7 @@ class HifiganVocoder { /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames). * @return Return a float32 tensor of shape (batch_size, num_samples). */ - Ort::Value Run(Ort::Value mel) const; + std::vector Run(Ort::Value mel) const override; private: class Impl; diff --git a/sherpa-onnx/csrc/offline-tts-matcha-impl.h b/sherpa-onnx/csrc/offline-tts-matcha-impl.h index 2299df5a..fbceeb0e 100644 --- a/sherpa-onnx/csrc/offline-tts-matcha-impl.h +++ b/sherpa-onnx/csrc/offline-tts-matcha-impl.h @@ -13,7 +13,6 @@ #include "fst/extensions/far/far.h" #include "kaldifst/csrc/kaldi-fst-io.h" #include "kaldifst/csrc/text-normalizer.h" -#include "sherpa-onnx/csrc/hifigan-vocoder.h" #include "sherpa-onnx/csrc/jieba-lexicon.h" #include "sherpa-onnx/csrc/lexicon.h" #include "sherpa-onnx/csrc/macros.h" @@ -25,6 +24,7 @@ #include "sherpa-onnx/csrc/onnx-utils.h" #include "sherpa-onnx/csrc/piper-phonemize-lexicon.h" #include "sherpa-onnx/csrc/text-utils.h" +#include "sherpa-onnx/csrc/vocoder.h" namespace sherpa_onnx { @@ -33,9 +33,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { explicit OfflineTtsMatchaImpl(const OfflineTtsConfig &config) : config_(config), model_(std::make_unique(config.model)), - vocoder_(std::make_unique( - config.model.num_threads, config.model.provider, - config.model.matcha.vocoder)) { + vocoder_(Vocoder::Create(config.model)) { InitFrontend(); if (!config.rule_fsts.empty()) { @@ -92,9 +90,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { OfflineTtsMatchaImpl(Manager *mgr, const OfflineTtsConfig &config) : config_(config), model_(std::make_unique(mgr, config.model)), - vocoder_(std::make_unique( - mgr, config.model.num_threads, config.model.provider, - config.model.matcha.vocoder)) { + vocoder_(Vocoder::Create(mgr, config.model)) { InitFrontend(mgr); if (!config.rule_fsts.empty()) { @@ -382,22 +378,11 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { memory_info, x.data(), x.size(), x_shape.data(), x_shape.size()); Ort::Value mel = model_->Run(std::move(x_tensor), sid, speed); - Ort::Value audio = vocoder_->Run(std::move(mel)); - - std::vector audio_shape = - audio.GetTensorTypeAndShapeInfo().GetShape(); - - int64_t total = 1; - // The output shape may be (1, 1, total) or (1, total) or (total,) - for (auto i : audio_shape) { - total *= i; - } - - const float *p = audio.GetTensorData(); GeneratedAudio ans; + + ans.samples = vocoder_->Run(std::move(mel)); ans.sample_rate = model_->GetMetaData().sample_rate; - ans.samples = std::vector(p, p + total); float silence_scale = config_.silence_scale; if (silence_scale != 1) { @@ -410,7 +395,7 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl { private: OfflineTtsConfig config_; std::unique_ptr model_; - std::unique_ptr vocoder_; + std::unique_ptr vocoder_; std::vector> tn_list_; std::unique_ptr frontend_; }; diff --git a/sherpa-onnx/csrc/vocoder.cc b/sherpa-onnx/csrc/vocoder.cc new file mode 100644 index 00000000..e8290673 --- /dev/null +++ b/sherpa-onnx/csrc/vocoder.cc @@ -0,0 +1,120 @@ +// sherpa-onnx/csrc/vocoder.cc +// +// Copyright (c) 2025 Xiaomi Corporation + +#include "sherpa-onnx/csrc/vocoder.h" + +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + +#if __OHOS__ +#include "rawfile/raw_file_manager.h" +#endif + +#include "sherpa-onnx/csrc/file-utils.h" +#include "sherpa-onnx/csrc/hifigan-vocoder.h" +#include "sherpa-onnx/csrc/macros.h" +#include "sherpa-onnx/csrc/onnx-utils.h" +#include "sherpa-onnx/csrc/vocos-vocoder.h" + +namespace sherpa_onnx { + +namespace { + +enum class ModelType : std::uint8_t { + kHifigan, + kVocoos, + kUnknown, +}; + +} // namespace + +static ModelType GetModelType(char *model_data, size_t model_data_length, + bool debug) { + Ort::Env env(ORT_LOGGING_LEVEL_ERROR); + Ort::SessionOptions sess_opts; + sess_opts.SetIntraOpNumThreads(1); + sess_opts.SetInterOpNumThreads(1); + + auto sess = std::make_unique(env, model_data, model_data_length, + sess_opts); + + Ort::ModelMetadata meta_data = sess->GetModelMetadata(); + if (debug) { + std::ostringstream os; + PrintModelMetadata(os, meta_data); +#if __OHOS__ + SHERPA_ONNX_LOGE("%{public}s", os.str().c_str()); +#else + SHERPA_ONNX_LOGE("%s", os.str().c_str()); +#endif + } + + Ort::AllocatorWithDefaultOptions allocator; + auto model_type = + LookupCustomModelMetaData(meta_data, "model_type", allocator); + if (model_type.empty()) { + SHERPA_ONNX_LOGE( + "No model_type in the metadata!\n" + "Please make sure you are using the vocoder from " + "https://github.com/k2-fsa/sherpa-onnx/releases/tag/vocoder-models"); + return ModelType::kUnknown; + } + + if (model_type == "hifigan") { + return ModelType::kHifigan; + } else if (model_type == "vocos") { + return ModelType::kVocoos; + } else { + SHERPA_ONNX_LOGE("Unsupported model_type: %s", model_type.c_str()); + return ModelType::kUnknown; + } +} + +std::unique_ptr Vocoder::Create(const OfflineTtsModelConfig &config) { + auto buffer = ReadFile(config.matcha.vocoder); + auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug); + + switch (model_type) { + case ModelType::kHifigan: + return std::make_unique( + config.num_threads, config.provider, config.matcha.vocoder); + case ModelType::kVocoos: + return std::make_unique(config); + case ModelType::kUnknown: + SHERPA_ONNX_LOGE("Unknown model type in vocoder!"); + return nullptr; + } +} + +template +std::unique_ptr Vocoder::Create(Manager *mgr, + const OfflineTtsModelConfig &config) { + auto buffer = ReadFile(mgr, config.matcha.vocoder); + auto model_type = GetModelType(buffer.data(), buffer.size(), config.debug); + + switch (model_type) { + case ModelType::kHifigan: + return std::make_unique( + config.num_threads, config.provider, config.matcha.vocoder); + case ModelType::kVocoos: + return std::make_unique(config); + case ModelType::kUnknown: + SHERPA_ONNX_LOGE("Unknown model type in vocoder!"); + return nullptr; + } +} + +#if __ANDROID_API__ >= 9 +template std::unique_ptr Vocoder::Create( + AAssetManager *mgr, const OfflineTtsModelConfig &config); +#endif + +#if __OHOS__ +template std::unique_ptr Vocoder::Create( + NativeResourceManager *mgr, const OfflineTtsModelConfig &config); +#endif + +} // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/vocoder.h b/sherpa-onnx/csrc/vocoder.h new file mode 100644 index 00000000..b281af3b --- /dev/null +++ b/sherpa-onnx/csrc/vocoder.h @@ -0,0 +1,35 @@ +// sherpa-onnx/csrc/vocoder.h +// +// Copyright (c) 2025 Xiaomi Corporation + +#ifndef SHERPA_ONNX_CSRC_VOCODER_H_ +#define SHERPA_ONNX_CSRC_VOCODER_H_ + +#include +#include +#include + +#include "onnxruntime_cxx_api.h" // NOLINT +#include "sherpa-onnx/csrc/offline-tts-model-config.h" + +namespace sherpa_onnx { + +class Vocoder { + public: + virtual ~Vocoder() = default; + + static std::unique_ptr Create(const OfflineTtsModelConfig &config); + + template + static std::unique_ptr Create(Manager *mgr, + const OfflineTtsModelConfig &config); + + /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames). + * @return Return a float32 vector containing audio samples.. + */ + virtual std::vector Run(Ort::Value mel) const = 0; +}; + +} // namespace sherpa_onnx + +#endif // SHERPA_ONNX_CSRC_VOCODER_H_ diff --git a/sherpa-onnx/csrc/vocos-vocoder.cc b/sherpa-onnx/csrc/vocos-vocoder.cc new file mode 100644 index 00000000..6173c9d1 --- /dev/null +++ b/sherpa-onnx/csrc/vocos-vocoder.cc @@ -0,0 +1,194 @@ +// sherpa-onnx/csrc/vocos-vocoder.cc +// +// Copyright (c) 2025 Xiaomi Corporation + +#include "sherpa-onnx/csrc/vocos-vocoder.h" + +#include +#include +#include + +#if __ANDROID_API__ >= 9 +#include "android/asset_manager.h" +#include "android/asset_manager_jni.h" +#endif + +#if __OHOS__ +#include "rawfile/raw_file_manager.h" +#endif + +#include "kaldi-native-fbank/csrc/istft.h" +#include "sherpa-onnx/csrc/file-utils.h" +#include "sherpa-onnx/csrc/macros.h" +#include "sherpa-onnx/csrc/onnx-utils.h" +#include "sherpa-onnx/csrc/session.h" + +namespace sherpa_onnx { + +struct VocosModelMetaData { + int32_t n_fft; + int32_t hop_length; + int32_t win_length; + int32_t center; + int32_t normalized; + std::string window_type; + std::string pad_mode; +}; + +class VocosVocoder::Impl { + public: + explicit Impl(const OfflineTtsModelConfig &config) + : config_(config), + env_(ORT_LOGGING_LEVEL_ERROR), + sess_opts_(GetSessionOptions(config.num_threads, config.provider)), + allocator_{} { + auto buf = ReadFile(config.matcha.vocoder); + Init(buf.data(), buf.size()); + } + + template + explicit Impl(Manager *mgr, const OfflineTtsModelConfig &config) + : config_(config), + env_(ORT_LOGGING_LEVEL_ERROR), + sess_opts_(GetSessionOptions(config.num_threads, config.provider)), + allocator_{} { + auto buf = ReadFile(mgr, config.matcha.vocoder); + Init(buf.data(), buf.size()); + } + + std::vector Run(Ort::Value mel) const { + auto out = sess_->Run({}, input_names_ptr_.data(), &mel, 1, + output_names_ptr_.data(), output_names_ptr_.size()); + + std::vector shape = out[0].GetTensorTypeAndShapeInfo().GetShape(); + + if (shape[0] != 1) { + SHERPA_ONNX_LOGE("Support only batch size 1, given: %d", + static_cast(shape[0])); + SHERPA_ONNX_EXIT(-1); + } + + knf::StftResult stft_result; + stft_result.num_frames = shape[2]; + stft_result.real.resize(shape[1] * shape[2]); + stft_result.imag.resize(shape[1] * shape[2]); + + // stft_result.real: (num_frames, n_fft/2+1), flattened in row major + + // mag.shape: (batch_size, n_fft/2+1, num_frames) + const float *p_mag = out[0].GetTensorData(); + const float *p_x = out[1].GetTensorData(); + const float *p_y = out[2].GetTensorData(); + + for (int32_t frame_index = 0; frame_index < static_cast(shape[2]); + ++frame_index) { + for (int32_t bin = 0; bin < static_cast(shape[1]); ++bin) { + stft_result.real[frame_index * shape[1] + bin] = + p_mag[bin * shape[2] + frame_index] * + p_x[bin * shape[2] + frame_index]; + stft_result.imag[frame_index * shape[1] + bin] = + p_mag[bin * shape[2] + frame_index] * + p_y[bin * shape[2] + frame_index]; + } + } + + knf::StftConfig stft_config; + stft_config.n_fft = meta_.n_fft; + stft_config.hop_length = meta_.hop_length; + stft_config.win_length = meta_.win_length; + stft_config.normalized = meta_.normalized; + stft_config.center = meta_.center; + stft_config.window_type = meta_.window_type; + stft_config.pad_mode = meta_.pad_mode; + + knf::IStft istft(stft_config); + return istft.Compute(stft_result); + } + + private: + void Init(void *model_data, size_t model_data_length) { + sess_ = std::make_unique(env_, model_data, model_data_length, + sess_opts_); + + GetInputNames(sess_.get(), &input_names_, &input_names_ptr_); + + GetOutputNames(sess_.get(), &output_names_, &output_names_ptr_); + + // get meta data + Ort::ModelMetadata meta_data = sess_->GetModelMetadata(); + if (config_.debug) { + std::ostringstream os; + os << "---Vocos model---\n"; + PrintModelMetadata(os, meta_data); + + os << "----------input names----------\n"; + int32_t i = 0; + for (const auto &s : input_names_) { + os << i << " " << s << "\n"; + ++i; + } + os << "----------output names----------\n"; + i = 0; + for (const auto &s : output_names_) { + os << i << " " << s << "\n"; + ++i; + } + +#if __OHOS__ + SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str()); +#else + SHERPA_ONNX_LOGE("%s\n", os.str().c_str()); +#endif + } + + Ort::AllocatorWithDefaultOptions allocator; // used in the macro below + SHERPA_ONNX_READ_META_DATA(meta_.n_fft, "n_fft"); + SHERPA_ONNX_READ_META_DATA(meta_.hop_length, "hop_length"); + SHERPA_ONNX_READ_META_DATA(meta_.win_length, "win_length"); + SHERPA_ONNX_READ_META_DATA(meta_.center, "center"); + SHERPA_ONNX_READ_META_DATA(meta_.normalized, "normalized"); + SHERPA_ONNX_READ_META_DATA_STR(meta_.window_type, "window_type"); + SHERPA_ONNX_READ_META_DATA_STR(meta_.pad_mode, "pad_mode"); + } + + private: + OfflineTtsModelConfig config_; + VocosModelMetaData meta_; + + Ort::Env env_; + Ort::SessionOptions sess_opts_; + Ort::AllocatorWithDefaultOptions allocator_; + + std::unique_ptr sess_; + + std::vector input_names_; + std::vector input_names_ptr_; + + std::vector output_names_; + std::vector output_names_ptr_; +}; + +VocosVocoder::VocosVocoder(const OfflineTtsModelConfig &config) + : impl_(std::make_unique(config)) {} + +template +VocosVocoder::VocosVocoder(Manager *mgr, const OfflineTtsModelConfig &config) + : impl_(std::make_unique(mgr, config)) {} + +VocosVocoder::~VocosVocoder() = default; + +std::vector VocosVocoder::Run(Ort::Value mel) const { + return impl_->Run(std::move(mel)); +} + +#if __ANDROID_API__ >= 9 +template VocosVocoder::VocosVocoder(AAssetManager *mgr, + const OfflineTtsModelConfig &config); +#endif + +#if __OHOS__ +template VocosVocoder::VocosVocoder(NativeResourceManager *mgr, + const OfflineTtsModelConfig &config); +#endif + +} // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/vocos-vocoder.h b/sherpa-onnx/csrc/vocos-vocoder.h new file mode 100644 index 00000000..1dcc0040 --- /dev/null +++ b/sherpa-onnx/csrc/vocos-vocoder.h @@ -0,0 +1,39 @@ +// sherpa-onnx/csrc/vocos-vocoder.h +// +// Copyright (c) 2025 Xiaomi Corporation + +#ifndef SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_ +#define SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_ + +#include +#include +#include + +#include "onnxruntime_cxx_api.h" // NOLINT +#include "sherpa-onnx/csrc/offline-tts-model-config.h" +#include "sherpa-onnx/csrc/vocoder.h" + +namespace sherpa_onnx { + +class VocosVocoder : public Vocoder { + public: + ~VocosVocoder() override; + + explicit VocosVocoder(const OfflineTtsModelConfig &config); + + template + VocosVocoder(Manager *mgr, const OfflineTtsModelConfig &config); + + /** @param mel A float32 tensor of shape (batch_size, feat_dim, num_frames). + * @return Return a float32 tensor of shape (batch_size, num_samples). + */ + std::vector Run(Ort::Value mel) const override; + + private: + class Impl; + std::unique_ptr impl_; +}; + +} // namespace sherpa_onnx + +#endif // SHERPA_ONNX_CSRC_VOCOS_VOCODER_H_ diff --git a/swift-api-examples/run-tts-matcha-en.sh b/swift-api-examples/run-tts-matcha-en.sh index f472b090..ec6c3fca 100755 --- a/swift-api-examples/run-tts-matcha-en.sh +++ b/swift-api-examples/run-tts-matcha-en.sh @@ -17,8 +17,8 @@ if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then rm matcha-icefall-en_US-ljspeech.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi if [ ! -e ./tts-matcha-en ]; then diff --git a/swift-api-examples/run-tts-matcha-zh.sh b/swift-api-examples/run-tts-matcha-zh.sh index 5d4f75c1..b8a8134f 100755 --- a/swift-api-examples/run-tts-matcha-zh.sh +++ b/swift-api-examples/run-tts-matcha-zh.sh @@ -16,8 +16,8 @@ if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then rm matcha-icefall-zh-baker.tar.bz2 fi -if [ ! -f ./hifigan_v2.onnx ]; then - curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx +if [ ! -f ./vocos-22khz-univ.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/vocos-22khz-univ.onnx fi if [ ! -e ./tts-matcha-zh ]; then diff --git a/swift-api-examples/tts-matcha-en.swift b/swift-api-examples/tts-matcha-en.swift index ec55f72d..1b976d30 100644 --- a/swift-api-examples/tts-matcha-en.swift +++ b/swift-api-examples/tts-matcha-en.swift @@ -6,7 +6,7 @@ class MyClass { func run() { let acousticModel = "./matcha-icefall-en_US-ljspeech/model-steps-3.onnx" - let vocoder = "./hifigan_v2.onnx" + let vocoder = "./vocos-22khz-univ.onnx" let tokens = "./matcha-icefall-en_US-ljspeech/tokens.txt" let dataDir = "./matcha-icefall-en_US-ljspeech/espeak-ng-data" let matcha = sherpaOnnxOfflineTtsMatchaModelConfig( diff --git a/swift-api-examples/tts-matcha-zh.swift b/swift-api-examples/tts-matcha-zh.swift index 0b782f58..e3435b14 100644 --- a/swift-api-examples/tts-matcha-zh.swift +++ b/swift-api-examples/tts-matcha-zh.swift @@ -6,7 +6,7 @@ class MyClass { func run() { let acousticModel = "./matcha-icefall-zh-baker/model-steps-3.onnx" - let vocoder = "./hifigan_v2.onnx" + let vocoder = "./vocos-22khz-univ.onnx" let lexicon = "./matcha-icefall-zh-baker/lexicon.txt" let tokens = "./matcha-icefall-zh-baker/tokens.txt" let dictDir = "./matcha-icefall-zh-baker/dict"