Add Swift API for TTS (#439)
This commit is contained in:
28
.github/scripts/test-swift.sh
vendored
Executable file
28
.github/scripts/test-swift.sh
vendored
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
echo "pwd: $PWD"
|
||||
|
||||
cd swift-api-examples
|
||||
ls -lh
|
||||
|
||||
mkdir -p /Users/fangjun/Desktop
|
||||
pushd /Users/fangjun/Desktop
|
||||
wget -q https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
./run-generate-subtitles.sh
|
||||
|
||||
ls -lh /Users/fangjun/Desktop
|
||||
cat /Users/fangjun/Desktop/Obama.srt
|
||||
|
||||
./run-tts.sh
|
||||
ls -lh
|
||||
|
||||
./run-decode-file.sh
|
||||
|
||||
./run-decode-file-non-streaming.sh
|
||||
|
||||
ls -lh
|
||||
53
.github/workflows/swift.yaml
vendored
Normal file
53
.github/workflows/swift.yaml
vendored
Normal file
@@ -0,0 +1,53 @@
|
||||
name: swift
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
workflow_dispatch:
|
||||
|
||||
concurrency:
|
||||
group: swift-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
swift:
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [macos-13]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: ccache
|
||||
uses: hendrikmuhs/ccache-action@v1.2
|
||||
with:
|
||||
key: ${{ matrix.os }}-swift
|
||||
|
||||
- name: Build
|
||||
shell: bash
|
||||
run: |
|
||||
sudo mkdir -p /Users/fangjun/Desktop
|
||||
sudo chmod a=rwx /Users/fangjun/Desktop
|
||||
ls -lhd /Users/fangjun/Desktop
|
||||
ls -lh /Users/fangjun/Desktop
|
||||
|
||||
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
|
||||
cmake --version
|
||||
|
||||
./build-swift-macos.sh
|
||||
|
||||
- name: test
|
||||
shell: bash
|
||||
run: |
|
||||
.github/scripts/test-swift.sh
|
||||
4
swift-api-examples/.gitignore
vendored
4
swift-api-examples/.gitignore
vendored
@@ -1,3 +1,7 @@
|
||||
decode-file
|
||||
decode-file-non-streaming
|
||||
generate-subtitles
|
||||
tts
|
||||
vits-vctk
|
||||
sherpa-onnx-paraformer-zh-2023-09-14
|
||||
!*.sh
|
||||
|
||||
@@ -572,3 +572,110 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
|
||||
SherpaOnnxVoiceActivityDetectorReset(vad)
|
||||
}
|
||||
}
|
||||
|
||||
// offline tts
|
||||
func sherpaOnnxOfflineTtsVitsModelConfig(
|
||||
model: String,
|
||||
lexicon: String,
|
||||
tokens: String,
|
||||
noiseScale: Float = 0.667,
|
||||
noiseScaleW: Float = 0.8,
|
||||
lengthScale: Float = 1.0
|
||||
) -> SherpaOnnxOfflineTtsVitsModelConfig {
|
||||
return SherpaOnnxOfflineTtsVitsModelConfig(
|
||||
model: toCPointer(model),
|
||||
lexicon: toCPointer(lexicon),
|
||||
tokens: toCPointer(tokens),
|
||||
noise_scale: noiseScale,
|
||||
noise_scale_w: noiseScaleW,
|
||||
length_scale: lengthScale)
|
||||
}
|
||||
|
||||
func sherpaOnnxOfflineTtsModelConfig(
|
||||
vits: SherpaOnnxOfflineTtsVitsModelConfig,
|
||||
numThreads: Int = 1,
|
||||
debug: Int = 0,
|
||||
provider: String = "cpu"
|
||||
) -> SherpaOnnxOfflineTtsModelConfig {
|
||||
return SherpaOnnxOfflineTtsModelConfig(
|
||||
vits: vits,
|
||||
num_threads: Int32(numThreads),
|
||||
debug: Int32(debug),
|
||||
provider: toCPointer(provider)
|
||||
)
|
||||
}
|
||||
|
||||
func sherpaOnnxOfflineTtsConfig(
|
||||
model: SherpaOnnxOfflineTtsModelConfig,
|
||||
ruleFsts: String = ""
|
||||
) -> SherpaOnnxOfflineTtsConfig {
|
||||
return SherpaOnnxOfflineTtsConfig(
|
||||
model: model,
|
||||
rule_fsts: toCPointer(ruleFsts)
|
||||
)
|
||||
}
|
||||
|
||||
class SherpaOnnxGeneratedAudioWrapper {
|
||||
/// A pointer to the underlying counterpart in C
|
||||
let audio: UnsafePointer<SherpaOnnxGeneratedAudio>!
|
||||
|
||||
init(audio: UnsafePointer<SherpaOnnxGeneratedAudio>!) {
|
||||
self.audio = audio
|
||||
}
|
||||
|
||||
deinit {
|
||||
if let audio {
|
||||
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)
|
||||
}
|
||||
}
|
||||
|
||||
var n: Int32 {
|
||||
return audio.pointee.n
|
||||
}
|
||||
|
||||
var sampleRate: Int32 {
|
||||
return audio.pointee.sample_rate
|
||||
}
|
||||
|
||||
var samples: [Float] {
|
||||
if let p = audio.pointee.samples {
|
||||
var samples: [Float] = []
|
||||
for index in 0..<n {
|
||||
samples.append(p[Int(index)])
|
||||
}
|
||||
return samples
|
||||
} else {
|
||||
let samples: [Float] = []
|
||||
return samples
|
||||
}
|
||||
}
|
||||
|
||||
func save(filename: String) {
|
||||
SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename))
|
||||
}
|
||||
}
|
||||
|
||||
class SherpaOnnxOfflineTtsWrapper {
|
||||
/// A pointer to the underlying counterpart in C
|
||||
let tts: OpaquePointer!
|
||||
|
||||
/// Constructor taking a model config
|
||||
init(
|
||||
config: UnsafePointer<SherpaOnnxOfflineTtsConfig>!
|
||||
) {
|
||||
tts = SherpaOnnxCreateOfflineTts(config)
|
||||
}
|
||||
|
||||
deinit {
|
||||
if let tts {
|
||||
SherpaOnnxDestroyOfflineTts(tts)
|
||||
}
|
||||
}
|
||||
|
||||
func generate(text: String, sid: Int = 0, speed: Float = 1.0) -> SherpaOnnxGeneratedAudioWrapper {
|
||||
let audio: UnsafePointer<SherpaOnnxGeneratedAudio>? = SherpaOnnxOfflineTtsGenerate(
|
||||
tts, toCPointer(text), Int32(sid), speed)
|
||||
|
||||
return SherpaOnnxGeneratedAudioWrapper(audio: audio)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -175,8 +175,8 @@ func run() {
|
||||
var segments: [SpeechSegment] = []
|
||||
|
||||
for offset in stride(from: 0, to: array.count, by: windowSize) {
|
||||
let end = min(offset + windowSize, array.count)
|
||||
vad.acceptWaveform(samples: [Float](array[offset ..< end]))
|
||||
let end = min(offset + windowSize, array.count)
|
||||
vad.acceptWaveform(samples: [Float](array[offset..<end]))
|
||||
}
|
||||
|
||||
var index: Int = 0
|
||||
|
||||
41
swift-api-examples/run-decode-file-non-streaming.sh
Executable file
41
swift-api-examples/run-decode-file-non-streaming.sh
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -d ../build-swift-macos ]; then
|
||||
echo "Please run ../build-swift-macos.sh first!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
|
||||
echo "Please download the pre-trained model for testing."
|
||||
echo "You can refer to"
|
||||
echo ""
|
||||
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
|
||||
echo ""
|
||||
echo "for help"
|
||||
|
||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -e ./decode-file-non-streaming ]; then
|
||||
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||
swiftc \
|
||||
-lc++ \
|
||||
-I ../build-swift-macos/install/include \
|
||||
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||
./decode-file-non-streaming.swift ./SherpaOnnx.swift \
|
||||
-L ../build-swift-macos/install/lib/ \
|
||||
-l sherpa-onnx \
|
||||
-l onnxruntime \
|
||||
-o decode-file-non-streaming
|
||||
|
||||
strip decode-file-non-streaming
|
||||
else
|
||||
echo "./decode-file-non-streaming exists - skip building"
|
||||
fi
|
||||
|
||||
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||
./decode-file-non-streaming
|
||||
@@ -14,7 +14,10 @@ if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
|
||||
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english"
|
||||
echo ""
|
||||
echo "for help"
|
||||
exit 1
|
||||
|
||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -e ./decode-file ]; then
|
||||
@@ -28,6 +31,8 @@ if [ ! -e ./decode-file ]; then
|
||||
-l sherpa-onnx \
|
||||
-l onnxruntime \
|
||||
-o decode-file
|
||||
|
||||
strip decode-file
|
||||
else
|
||||
echo "./decode-file exists - skip building"
|
||||
fi
|
||||
|
||||
@@ -14,7 +14,15 @@ if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
|
||||
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
|
||||
echo ""
|
||||
echo "for help"
|
||||
exit 1
|
||||
|
||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||
ls -lh sherpa-onnx-whisper-tiny.en
|
||||
fi
|
||||
if [ ! -f ./silero_vad.onnx ]; then
|
||||
echo "downloading silero_vad"
|
||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||
fi
|
||||
|
||||
if [ ! -e ./generate-subtitles ]; then
|
||||
@@ -28,6 +36,8 @@ if [ ! -e ./generate-subtitles ]; then
|
||||
-l sherpa-onnx \
|
||||
-l onnxruntime \
|
||||
-o generate-subtitles
|
||||
|
||||
strip generate-subtitles
|
||||
else
|
||||
echo "./generate-subtitles exists - skip building"
|
||||
fi
|
||||
|
||||
41
swift-api-examples/run-tts.sh
Executable file
41
swift-api-examples/run-tts.sh
Executable file
@@ -0,0 +1,41 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
if [ ! -d ../build-swift-macos ]; then
|
||||
echo "Please run ../build-swift-macos.sh first!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -d ./vits-vctk ]; then
|
||||
echo "Please download the pre-trained model for testing."
|
||||
echo "You can refer to"
|
||||
echo ""
|
||||
echo "https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers"
|
||||
echo ""
|
||||
echo "for help"
|
||||
|
||||
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
|
||||
tar xvf vits-vctk.tar.bz2
|
||||
rm vits-vctk.tar.bz2
|
||||
fi
|
||||
|
||||
if [ ! -e ./tts ]; then
|
||||
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||
swiftc \
|
||||
-lc++ \
|
||||
-I ../build-swift-macos/install/include \
|
||||
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||
./tts.swift ./SherpaOnnx.swift \
|
||||
-L ../build-swift-macos/install/lib/ \
|
||||
-l sherpa-onnx \
|
||||
-l onnxruntime \
|
||||
-o tts
|
||||
|
||||
strip tts
|
||||
else
|
||||
echo "./tts exists - skip building"
|
||||
fi
|
||||
|
||||
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||
./tts
|
||||
31
swift-api-examples/tts.swift
Normal file
31
swift-api-examples/tts.swift
Normal file
@@ -0,0 +1,31 @@
|
||||
func run() {
|
||||
let model = "./vits-vctk/vits-vctk.onnx"
|
||||
let lexicon = "./vits-vctk/lexicon.txt"
|
||||
let tokens = "./vits-vctk/tokens.txt"
|
||||
let vits = sherpaOnnxOfflineTtsVitsModelConfig(
|
||||
model: model,
|
||||
lexicon: lexicon,
|
||||
tokens: tokens
|
||||
)
|
||||
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
|
||||
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
|
||||
|
||||
let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
|
||||
|
||||
let text = "How are you doing? Fantastic!"
|
||||
let sid = 99
|
||||
let speed: Float = 1.0
|
||||
|
||||
let audio = tts.generate(text: text, sid: sid, speed: speed)
|
||||
let filename = "test.wav"
|
||||
audio.save(filename: filename)
|
||||
|
||||
print("\nSaved to:\n\(filename)")
|
||||
}
|
||||
|
||||
@main
|
||||
struct App {
|
||||
static func main() {
|
||||
run()
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user