Add Go API for MatchaTTS models (#1685)
This commit is contained in:
27
.github/workflows/test-go-package.yaml
vendored
27
.github/workflows/test-go-package.yaml
vendored
@@ -209,6 +209,15 @@ jobs:
|
|||||||
go build
|
go build
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
echo "Test matcha zh"
|
||||||
|
./run-matcha-zh.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
|
||||||
|
echo "Test matcha en"
|
||||||
|
./run-matcha-en.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
ls -lh *.wav
|
||||||
|
|
||||||
echo "Test vits-ljs"
|
echo "Test vits-ljs"
|
||||||
./run-vits-ljs.sh
|
./run-vits-ljs.sh
|
||||||
rm -rf vits-ljs
|
rm -rf vits-ljs
|
||||||
@@ -246,6 +255,15 @@ jobs:
|
|||||||
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
|
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/x86_64-pc-windows-gnu/*.dll .
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
echo "Test matcha zh"
|
||||||
|
./run-matcha-zh.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
|
||||||
|
echo "Test matcha en"
|
||||||
|
./run-matcha-en.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
ls -lh *.wav
|
||||||
|
|
||||||
echo "Test vits-ljs"
|
echo "Test vits-ljs"
|
||||||
./run-vits-ljs.sh
|
./run-vits-ljs.sh
|
||||||
rm -rf vits-ljs
|
rm -rf vits-ljs
|
||||||
@@ -291,6 +309,15 @@ jobs:
|
|||||||
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
|
cp -v /C/Users/runneradmin/go/pkg/mod/github.com/k2-fsa/sherpa-onnx-go-windows*/lib/i686-pc-windows-gnu/*.dll .
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
echo "Test matcha zh"
|
||||||
|
./run-matcha-zh.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
|
||||||
|
echo "Test matcha en"
|
||||||
|
./run-matcha-en.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
ls -lh *.wav
|
||||||
|
|
||||||
echo "Test vits-ljs"
|
echo "Test vits-ljs"
|
||||||
./run-vits-ljs.sh
|
./run-vits-ljs.sh
|
||||||
rm -rf vits-ljs
|
rm -rf vits-ljs
|
||||||
|
|||||||
9
.github/workflows/test-go.yaml
vendored
9
.github/workflows/test-go.yaml
vendored
@@ -226,6 +226,15 @@ jobs:
|
|||||||
go build
|
go build
|
||||||
ls -lh
|
ls -lh
|
||||||
|
|
||||||
|
echo "Test matcha zh"
|
||||||
|
./run-matcha-zh.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
|
||||||
|
echo "Test matcha en"
|
||||||
|
./run-matcha-en.sh
|
||||||
|
rm -rf matcha-icefall-*
|
||||||
|
ls -lh *.wav
|
||||||
|
|
||||||
echo "Test vits-ljs"
|
echo "Test vits-ljs"
|
||||||
./run-vits-ljs.sh
|
./run-vits-ljs.sh
|
||||||
rm -rf vits-ljs
|
rm -rf vits-ljs
|
||||||
|
|||||||
@@ -17,11 +17,22 @@ func main() {
|
|||||||
flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
|
flag.StringVar(&config.Model.Vits.Lexicon, "vits-lexicon", "", "Path to lexicon.txt")
|
||||||
flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")
|
flag.StringVar(&config.Model.Vits.Tokens, "vits-tokens", "", "Path to tokens.txt")
|
||||||
flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data")
|
flag.StringVar(&config.Model.Vits.DataDir, "vits-data-dir", "", "Path to espeak-ng-data")
|
||||||
|
flag.StringVar(&config.Model.Matcha.DictDir, "vits-dict-dir", "", "Path to dict for jieba")
|
||||||
|
|
||||||
flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
|
flag.Float32Var(&config.Model.Vits.NoiseScale, "vits-noise-scale", 0.667, "noise_scale for VITS")
|
||||||
flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
|
flag.Float32Var(&config.Model.Vits.NoiseScaleW, "vits-noise-scale-w", 0.8, "noise_scale_w for VITS")
|
||||||
flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")
|
flag.Float32Var(&config.Model.Vits.LengthScale, "vits-length-scale", 1.0, "length_scale for VITS. small -> faster in speech speed; large -> slower")
|
||||||
|
|
||||||
|
flag.StringVar(&config.Model.Matcha.AcousticModel, "matcha-acoustic-model", "", "Path to the matcha acoustic model")
|
||||||
|
flag.StringVar(&config.Model.Matcha.Vocoder, "matcha-vocoder", "", "Path to the matcha vocoder model")
|
||||||
|
flag.StringVar(&config.Model.Matcha.Lexicon, "matcha-lexicon", "", "Path to lexicon.txt")
|
||||||
|
flag.StringVar(&config.Model.Matcha.Tokens, "matcha-tokens", "", "Path to tokens.txt")
|
||||||
|
flag.StringVar(&config.Model.Matcha.DataDir, "matcha-data-dir", "", "Path to espeak-ng-data")
|
||||||
|
flag.StringVar(&config.Model.Matcha.DictDir, "matcha-dict-dir", "", "Path to dict for jieba")
|
||||||
|
|
||||||
|
flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha")
|
||||||
|
flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster in speech speed; large -> slower")
|
||||||
|
|
||||||
flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
|
flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing")
|
||||||
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
|
flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message")
|
||||||
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
|
flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use")
|
||||||
|
|||||||
31
go-api-examples/non-streaming-tts/run-matcha-en.sh
Executable file
31
go-api-examples/non-streaming-tts/run-matcha-en.sh
Executable file
@@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
# please visit
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
# matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
# to download more models
|
||||||
|
if [ ! -f ./matcha-icefall-en_US-ljspeech/model-steps-3.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
tar xf matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
rm matcha-icefall-en_US-ljspeech.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
go mod tidy
|
||||||
|
go build
|
||||||
|
|
||||||
|
./non-streaming-tts \
|
||||||
|
--matcha-acoustic-model=./matcha-icefall-en_US-ljspeech/model-steps-3.onnx \
|
||||||
|
--matcha-vocoder=./hifigan_v2.onnx \
|
||||||
|
--matcha-tokens=./matcha-icefall-en_US-ljspeech/tokens.txt \
|
||||||
|
--matcha-data-dir=./matcha-icefall-en_US-ljspeech/espeak-ng-data \
|
||||||
|
--debug=1 \
|
||||||
|
--output-filename=./test-matcha-en.wav \
|
||||||
|
"Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone."
|
||||||
|
|
||||||
|
|
||||||
31
go-api-examples/non-streaming-tts/run-matcha-zh.sh
Executable file
31
go-api-examples/non-streaming-tts/run-matcha-zh.sh
Executable file
@@ -0,0 +1,31 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
# please visit
|
||||||
|
# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||||
|
# to download more models
|
||||||
|
if [ ! -f ./matcha-icefall-zh-baker/model-steps-3.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/matcha-icefall-zh-baker.tar.bz2
|
||||||
|
tar xvf matcha-icefall-zh-baker.tar.bz2
|
||||||
|
rm matcha-icefall-zh-baker.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -f ./hifigan_v2.onnx ]; then
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/hifigan_v2.onnx
|
||||||
|
fi
|
||||||
|
|
||||||
|
go mod tidy
|
||||||
|
go build
|
||||||
|
|
||||||
|
./non-streaming-tts \
|
||||||
|
--matcha-acoustic-model=./matcha-icefall-zh-baker/model-steps-3.onnx \
|
||||||
|
--matcha-vocoder=./hifigan_v2.onnx \
|
||||||
|
--matcha-lexicon=./matcha-icefall-zh-baker/lexicon.txt \
|
||||||
|
--matcha-tokens=./matcha-icefall-zh-baker/tokens.txt \
|
||||||
|
--matcha-dict-dir=./matcha-icefall-zh-baker/dict \
|
||||||
|
--debug=1 \
|
||||||
|
--tts-rule-fsts=./matcha-icefall-zh-baker/phone.fst,./matcha-icefall-zh-baker/date.fst,./matcha-icefall-zh-baker/number.fst \
|
||||||
|
--output-filename=./test-matcha-zh.wav \
|
||||||
|
"某某银行的副行长和一些行政领导表示,他们去过长江和长白山; 经济不断增长。2024年12月31号,拨打110或者18920240511。123456块钱。"
|
||||||
|
|
||||||
@@ -4,7 +4,7 @@ set -ex
|
|||||||
|
|
||||||
if [ ! -d vits-piper-en_US-lessac-medium ]; then
|
if [ ! -d vits-piper-en_US-lessac-medium ]; then
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-lessac-medium.tar.bz2
|
||||||
tar xvf vits-piper-en_US-lessac-medium.tar.bz2
|
tar xf vits-piper-en_US-lessac-medium.tar.bz2
|
||||||
rm vits-piper-en_US-lessac-medium.tar.bz2
|
rm vits-piper-en_US-lessac-medium.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|||||||
1
scripts/go/_internal/non-streaming-tts/run-matcha-en.sh
Symbolic link
1
scripts/go/_internal/non-streaming-tts/run-matcha-en.sh
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../../../../go-api-examples/non-streaming-tts/run-matcha-en.sh
|
||||||
1
scripts/go/_internal/non-streaming-tts/run-matcha-zh.sh
Symbolic link
1
scripts/go/_internal/non-streaming-tts/run-matcha-zh.sh
Symbolic link
@@ -0,0 +1 @@
|
|||||||
|
../../../../go-api-examples/non-streaming-tts/run-matcha-zh.sh
|
||||||
@@ -671,8 +671,20 @@ type OfflineTtsVitsModelConfig struct {
|
|||||||
DictDir string // Path to dict directory for jieba (used only in Chinese tts)
|
DictDir string // Path to dict directory for jieba (used only in Chinese tts)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type OfflineTtsMatchaModelConfig struct {
|
||||||
|
AcousticModel string // Path to the acoustic model for MatchaTTS
|
||||||
|
Vocoder string // Path to the vocoder model for MatchaTTS
|
||||||
|
Lexicon string // Path to lexicon.txt
|
||||||
|
Tokens string // Path to tokens.txt
|
||||||
|
DataDir string // Path to espeak-ng-data directory
|
||||||
|
NoiseScale float32 // noise scale for vits models. Please use 0.667 in general
|
||||||
|
LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
|
||||||
|
DictDir string // Path to dict directory for jieba (used only in Chinese tts)
|
||||||
|
}
|
||||||
|
|
||||||
type OfflineTtsModelConfig struct {
|
type OfflineTtsModelConfig struct {
|
||||||
Vits OfflineTtsVitsModelConfig
|
Vits OfflineTtsVitsModelConfig
|
||||||
|
Matcha OfflineTtsMatchaModelConfig
|
||||||
|
|
||||||
// Number of threads to use for neural network computation
|
// Number of threads to use for neural network computation
|
||||||
NumThreads int
|
NumThreads int
|
||||||
@@ -722,6 +734,7 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
|
|||||||
|
|
||||||
c.max_num_sentences = C.int(config.MaxNumSentences)
|
c.max_num_sentences = C.int(config.MaxNumSentences)
|
||||||
|
|
||||||
|
// vits
|
||||||
c.model.vits.model = C.CString(config.Model.Vits.Model)
|
c.model.vits.model = C.CString(config.Model.Vits.Model)
|
||||||
defer C.free(unsafe.Pointer(c.model.vits.model))
|
defer C.free(unsafe.Pointer(c.model.vits.model))
|
||||||
|
|
||||||
@@ -741,6 +754,28 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
|
|||||||
c.model.vits.dict_dir = C.CString(config.Model.Vits.DictDir)
|
c.model.vits.dict_dir = C.CString(config.Model.Vits.DictDir)
|
||||||
defer C.free(unsafe.Pointer(c.model.vits.dict_dir))
|
defer C.free(unsafe.Pointer(c.model.vits.dict_dir))
|
||||||
|
|
||||||
|
// matcha
|
||||||
|
c.model.matcha.acoustic_model = C.CString(config.Model.Matcha.AcousticModel)
|
||||||
|
defer C.free(unsafe.Pointer(c.model.matcha.acoustic_model))
|
||||||
|
|
||||||
|
c.model.matcha.vocoder = C.CString(config.Model.Matcha.Vocoder)
|
||||||
|
defer C.free(unsafe.Pointer(c.model.matcha.vocoder))
|
||||||
|
|
||||||
|
c.model.matcha.lexicon = C.CString(config.Model.Matcha.Lexicon)
|
||||||
|
defer C.free(unsafe.Pointer(c.model.matcha.lexicon))
|
||||||
|
|
||||||
|
c.model.matcha.tokens = C.CString(config.Model.Matcha.Tokens)
|
||||||
|
defer C.free(unsafe.Pointer(c.model.matcha.tokens))
|
||||||
|
|
||||||
|
c.model.matcha.data_dir = C.CString(config.Model.Matcha.DataDir)
|
||||||
|
defer C.free(unsafe.Pointer(c.model.matcha.data_dir))
|
||||||
|
|
||||||
|
c.model.matcha.noise_scale = C.float(config.Model.Matcha.NoiseScale)
|
||||||
|
c.model.matcha.length_scale = C.float(config.Model.Matcha.LengthScale)
|
||||||
|
|
||||||
|
c.model.matcha.dict_dir = C.CString(config.Model.Matcha.DictDir)
|
||||||
|
defer C.free(unsafe.Pointer(c.model.matcha.dict_dir))
|
||||||
|
|
||||||
c.model.num_threads = C.int(config.Model.NumThreads)
|
c.model.num_threads = C.int(config.Model.NumThreads)
|
||||||
c.model.debug = C.int(config.Model.Debug)
|
c.model.debug = C.int(config.Model.Debug)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user