diff --git a/.github/workflows/test-go-package.yaml b/.github/workflows/test-go-package.yaml index 95c868b8..f13d7eb7 100644 --- a/.github/workflows/test-go-package.yaml +++ b/.github/workflows/test-go-package.yaml @@ -209,6 +209,11 @@ jobs: go build ls -lh + echo "Test kokoro en" + ./run-kokoro-en.sh + rm -rf kokoro-en-* + ls -lh + echo "Test matcha zh" ./run-matcha-zh.sh rm -rf matcha-icefall-* diff --git a/.github/workflows/test-go.yaml b/.github/workflows/test-go.yaml index 083f01d6..c3df3d75 100644 --- a/.github/workflows/test-go.yaml +++ b/.github/workflows/test-go.yaml @@ -224,6 +224,11 @@ jobs: go build ls -lh + echo "Test kokoro en" + ./run-kokoro-en.sh + rm -rf kokoro-en-* + ls -lh + echo "Test matcha zh" ./run-matcha-zh.sh rm -rf matcha-icefall-* diff --git a/go-api-examples/non-streaming-tts/main.go b/go-api-examples/non-streaming-tts/main.go index 73638d8f..f3df7f10 100644 --- a/go-api-examples/non-streaming-tts/main.go +++ b/go-api-examples/non-streaming-tts/main.go @@ -33,6 +33,12 @@ func main() { flag.Float32Var(&config.Model.Matcha.NoiseScale, "matcha-noise-scale", 0.667, "noise_scale for Matcha") flag.Float32Var(&config.Model.Matcha.LengthScale, "matcha-length-scale", 1.0, "length_scale for Matcha. small -> faster in speech speed; large -> slower") + flag.StringVar(&config.Model.Kokoro.Model, "kokoro-model", "", "Path to the Kokoro ONNX model") + flag.StringVar(&config.Model.Kokoro.Voices, "kokoro-voices", "", "Path to voices.bin for Kokoro") + flag.StringVar(&config.Model.Kokoro.Tokens, "kokoro-tokens", "", "Path to tokens.txt for Kokoro") + flag.StringVar(&config.Model.Kokoro.DataDir, "kokoro-data-dir", "", "Path to espeak-ng-data for Kokoro") + flag.Float32Var(&config.Model.Kokoro.LengthScale, "kokoro-length-scale", 1.0, "length_scale for Kokoro. small -> faster in speech speed; large -> slower") + flag.IntVar(&config.Model.NumThreads, "num-threads", 1, "Number of threads for computing") flag.IntVar(&config.Model.Debug, "debug", 0, "Whether to show debug message") flag.StringVar(&config.Model.Provider, "provider", "cpu", "Provider to use") diff --git a/go-api-examples/non-streaming-tts/run-kokoro-en.sh b/go-api-examples/non-streaming-tts/run-kokoro-en.sh new file mode 100755 index 00000000..a7d356d1 --- /dev/null +++ b/go-api-examples/non-streaming-tts/run-kokoro-en.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -f ./kokoro-en-v0_19/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-en-v0_19.tar.bz2 + tar xf kokoro-en-v0_19.tar.bz2 + rm kokoro-en-v0_19.tar.bz2 +fi + +go mod tidy +go build + +./non-streaming-tts \ + --kokoro-model=./kokoro-en-v0_19/model.onnx \ + --kokoro-voices=./kokoro-en-v0_19/voices.bin \ + --kokoro-tokens=./kokoro-en-v0_19/tokens.txt \ + --kokoro-data-dir=./kokoro-en-v0_19/espeak-ng-data \ + --debug=1 \ + --output-filename=./test-kokoro-en.wav \ + "Friends fell out often because life was changing so fast. The easiest thing in the world was to lose touch with someone." diff --git a/scripts/go/_internal/non-streaming-tts/run-kokoro-en.sh b/scripts/go/_internal/non-streaming-tts/run-kokoro-en.sh new file mode 120000 index 00000000..43687f27 --- /dev/null +++ b/scripts/go/_internal/non-streaming-tts/run-kokoro-en.sh @@ -0,0 +1 @@ +../../../../go-api-examples/non-streaming-tts/run-kokoro-en.sh \ No newline at end of file diff --git a/scripts/go/sherpa_onnx.go b/scripts/go/sherpa_onnx.go index 76375284..4da12cfb 100644 --- a/scripts/go/sherpa_onnx.go +++ b/scripts/go/sherpa_onnx.go @@ -682,9 +682,18 @@ type OfflineTtsMatchaModelConfig struct { DictDir string // Path to dict directory for jieba (used only in Chinese tts) } +type OfflineTtsKokoroModelConfig struct { + Model string // Path to the model for kokoro + Voices string // Path to the voices.bin for kokoro + Tokens string // Path to tokens.txt + DataDir string // Path to espeak-ng-data directory + LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed +} + type OfflineTtsModelConfig struct { Vits OfflineTtsVitsModelConfig Matcha OfflineTtsMatchaModelConfig + Kokoro OfflineTtsKokoroModelConfig // Number of threads to use for neural network computation NumThreads int @@ -776,6 +785,21 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts { c.model.matcha.dict_dir = C.CString(config.Model.Matcha.DictDir) defer C.free(unsafe.Pointer(c.model.matcha.dict_dir)) + // kokoro + c.model.kokoro.model = C.CString(config.Model.Kokoro.Model) + defer C.free(unsafe.Pointer(c.model.kokoro.model)) + + c.model.kokoro.voices = C.CString(config.Model.Kokoro.Voices) + defer C.free(unsafe.Pointer(c.model.kokoro.voices)) + + c.model.kokoro.tokens = C.CString(config.Model.Kokoro.Tokens) + defer C.free(unsafe.Pointer(c.model.kokoro.tokens)) + + c.model.kokoro.data_dir = C.CString(config.Model.Kokoro.DataDir) + defer C.free(unsafe.Pointer(c.model.kokoro.data_dir)) + + c.model.kokoro.length_scale = C.float(config.Model.Kokoro.LengthScale) + c.model.num_threads = C.int(config.Model.NumThreads) c.model.debug = C.int(config.Model.Debug)