diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh index 9ab682d2..65fe4588 100755 --- a/.github/scripts/test-swift.sh +++ b/.github/scripts/test-swift.sh @@ -11,6 +11,10 @@ ls -lh ls -lh rm -rf vits-piper-* +./run-tts-kokoro-zh-en.sh +ls -lh +rm -rf kokoro-multi-* + ./run-tts-kokoro-en.sh ls -lh rm -rf kokoro-en-* diff --git a/cxx-api-examples/kokoro-tts-en-cxx-api.cc b/cxx-api-examples/kokoro-tts-en-cxx-api.cc index ccd5bb1d..66b28f03 100644 --- a/cxx-api-examples/kokoro-tts-en-cxx-api.cc +++ b/cxx-api-examples/kokoro-tts-en-cxx-api.cc @@ -3,7 +3,7 @@ // Copyright (c) 2025 Xiaomi Corporation // This file shows how to use sherpa-onnx CXX API -// for Chinese TTS with Kokoro. +// for English TTS with Kokoro. // // clang-format off /* diff --git a/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc b/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc index a47e1964..c0228ad9 100644 --- a/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc +++ b/cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc @@ -3,7 +3,7 @@ // Copyright (c) 2025 Xiaomi Corporation // This file shows how to use sherpa-onnx CXX API -// for Chinese TTS with Kokoro. +// for Chinese + English TTS with Kokoro. // // clang-format off /* diff --git a/swift-api-examples/.gitignore b/swift-api-examples/.gitignore index 5bcf3148..91dea72a 100644 --- a/swift-api-examples/.gitignore +++ b/swift-api-examples/.gitignore @@ -13,3 +13,4 @@ add-punctuations tts-matcha-zh tts-matcha-en tts-kokoro-en +tts-kokoro-zh-en diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index 81a1c9e4..cb64218b 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -767,14 +767,18 @@ func sherpaOnnxOfflineTtsKokoroModelConfig( voices: String = "", tokens: String = "", dataDir: String = "", - lengthScale: Float = 1.0 + lengthScale: Float = 1.0, + dictDir: String = "", + lexicon: String = "" ) -> SherpaOnnxOfflineTtsKokoroModelConfig { return SherpaOnnxOfflineTtsKokoroModelConfig( model: toCPointer(model), voices: toCPointer(voices), tokens: toCPointer(tokens), data_dir: toCPointer(dataDir), - length_scale: lengthScale + length_scale: lengthScale, + dict_dir: toCPointer(dictDir), + lexicon: toCPointer(lexicon) ) } diff --git a/swift-api-examples/run-tts-kokoro-zh-en.sh b/swift-api-examples/run-tts-kokoro-zh-en.sh new file mode 100755 index 00000000..d7763cc8 --- /dev/null +++ b/swift-api-examples/run-tts-kokoro-zh-en.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -d ../build-swift-macos ]; then + echo "Please run ../build-swift-macos.sh first!" + exit 1 +fi + +# please visit +# https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html +# to download more models +if [ ! -f ./kokoro-multi-lang-v1_0/model.onnx ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 + tar xf kokoro-multi-lang-v1_0.tar.bz2 + rm kokoro-multi-lang-v1_0.tar.bz2 +fi + +if [ ! -e ./tts-kokoro-zh-en ]; then + # Note: We use -lc++ to link against libc++ instead of libstdc++ + swiftc \ + -lc++ \ + -I ../build-swift-macos/install/include \ + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ + ./tts-kokoro-zh-en.swift ./SherpaOnnx.swift \ + -L ../build-swift-macos/install/lib/ \ + -l sherpa-onnx \ + -l onnxruntime \ + -o tts-kokoro-zh-en + + strip tts-kokoro-zh-en +else + echo "./tts-kokoro-zh-en exists - skip building" +fi + +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH +./tts-kokoro-zh-en diff --git a/swift-api-examples/tts-kokoro-zh-en.swift b/swift-api-examples/tts-kokoro-zh-en.swift new file mode 100644 index 00000000..bd4e075d --- /dev/null +++ b/swift-api-examples/tts-kokoro-zh-en.swift @@ -0,0 +1,69 @@ +class MyClass { + func playSamples(samples: [Float]) { + print("Play \(samples.count) samples") + } +} + +func run() { + let model = "./kokoro-multi-lang-v1_0/model.onnx" + let voices = "./kokoro-multi-lang-v1_0/voices.bin" + let tokens = "./kokoro-multi-lang-v1_0/tokens.txt" + let dataDir = "./kokoro-multi-lang-v1_0/espeak-ng-data" + let dictDir = "./kokoro-multi-lang-v1_0/dict" + let lexicon = "./kokoro-multi-lang-v1_0/lexicon-us-en.txt,./kokoro-multi-lang-v1_0/lexicon-zh.txt" + let kokoro = sherpaOnnxOfflineTtsKokoroModelConfig( + model: model, + voices: voices, + tokens: tokens, + dataDir: dataDir, + dictDir: dictDir, + lexicon: lexicon + ) + let modelConfig = sherpaOnnxOfflineTtsModelConfig(kokoro: kokoro, debug: 0) + var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig) + + let myClass = MyClass() + + // We use Unretained here so myClass must be kept alive as the callback is invoked + // + // See also + // https://medium.com/codex/swift-c-callback-interoperability-6d57da6c8ee6 + let arg = Unmanaged.passUnretained(myClass).toOpaque() + + let callback: TtsCallbackWithArg = { samples, n, arg in + let o = Unmanaged.fromOpaque(arg!).takeUnretainedValue() + var savedSamples: [Float] = [] + for index in 0..