Add Go API for MatchaTTS models (#1685)

This commit is contained in:
Fangjun Kuang
2025-01-06 08:03:03 +08:00
committed by GitHub
parent 6f085babcc
commit 46330b25cc
9 changed files with 148 additions and 2 deletions

View File

@@ -17,11 +17,22 @@ func main() {
flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")
flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data")
flag.StringVar(&config.Model.Matcha.DictDir, "vits-dict-dir", "", "Path to dict for jieba")
flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")
flag.StringVar(&config.Model.Matcha.AcousticModel, "matcha-acoustic-model", "", "Path to the matcha acoustic model")
flag.StringVar(&config.Model.Matcha.Vocoder, "matcha-vocoder", "", "Path to the matcha vocoder model")
flag.StringVar(&config.Model.Matcha.Lexicon, "matcha-lexicon", "", "Path to lexicon.txt")
flag.StringVar(&config.Model.Matcha.Tokens, "matcha-tokens", "", "Path to tokens.txt")
flag.StringVar(&config.Model.Matcha.DataDir, "matcha-data-dir", "", "Path to espeak-ng-data")
flag.StringVar(&config.Model.Matcha.DictDir, "matcha-dict-dir", "", "Path to dict for jieba")
flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha")
flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster in speech speed; large -> slower")
flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -ex
# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
rm matcha-icefall-en_US-ljspeech.tar.bz2
fi
if [ ! -f ./hifigan_v2.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi
go mod tidy
go build
./non-streaming-tts \
--matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
--matcha-vocoder=./hifigan_v2.onnx \
--matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
--matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
--debug=1 \
--output-filename=./test-matcha-en.wav \
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."

View File

@@ -0,0 +1,31 @@
#!/usr/bin/env bash
set -ex
# please visit
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
# to download more models
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
tar xvf matcha-icefall-zh-baker.tar.bz2
rm matcha-icefall-zh-baker.tar.bz2
fi
if [ ! -f ./hifigan_v2.onnx ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
fi
go mod tidy
go build
./non-streaming-tts \
--matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
--matcha-vocoder=./hifigan_v2.onnx \
--matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
--matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
--matcha-dict-dir=./matcha-icefall-zh-baker/dict \
--debug=1 \
--tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
--output-filename=./test-matcha-zh.wav \
"某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号拨打110或者18920240511。123456块钱。"

View File

@@ -4,7 +4,7 @@ set -ex
if [ ! -d vits-piper-en_US-lessac-medium ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
tar xvf vits-piper-en_US-lessac-medium.tar.bz2
tar xf vits-piper-en_US-lessac-medium.tar.bz2
rm vits-piper-en_US-lessac-medium.tar.bz2
fi