Add Swift API for TTS (#439)
This commit is contained in:
28
.github/scripts/test-swift.sh
vendored
Executable file
28
.github/scripts/test-swift.sh
vendored
Executable file
@@ -0,0 +1,28 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
echo "pwd: $PWD"
|
||||||
|
|
||||||
|
cd swift-api-examples
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
mkdir -p /Users/fangjun/Desktop
|
||||||
|
pushd /Users/fangjun/Desktop
|
||||||
|
wget -q https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav
|
||||||
|
ls -lh
|
||||||
|
popd
|
||||||
|
|
||||||
|
./run-generate-subtitles.sh
|
||||||
|
|
||||||
|
ls -lh /Users/fangjun/Desktop
|
||||||
|
cat /Users/fangjun/Desktop/Obama.srt
|
||||||
|
|
||||||
|
./run-tts.sh
|
||||||
|
ls -lh
|
||||||
|
|
||||||
|
./run-decode-file.sh
|
||||||
|
|
||||||
|
./run-decode-file-non-streaming.sh
|
||||||
|
|
||||||
|
ls -lh
|
||||||
53
.github/workflows/swift.yaml
vendored
Normal file
53
.github/workflows/swift.yaml
vendored
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
name: swift
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
|
||||||
|
workflow_dispatch:
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: swift-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
swift:
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [macos-13]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: ccache
|
||||||
|
uses: hendrikmuhs/ccache-action@v1.2
|
||||||
|
with:
|
||||||
|
key: ${{ matrix.os }}-swift
|
||||||
|
|
||||||
|
- name: Build
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
sudo mkdir -p /Users/fangjun/Desktop
|
||||||
|
sudo chmod a=rwx /Users/fangjun/Desktop
|
||||||
|
ls -lhd /Users/fangjun/Desktop
|
||||||
|
ls -lh /Users/fangjun/Desktop
|
||||||
|
|
||||||
|
export CMAKE_CXX_COMPILER_LAUNCHER=ccache
|
||||||
|
export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
|
||||||
|
cmake --version
|
||||||
|
|
||||||
|
./build-swift-macos.sh
|
||||||
|
|
||||||
|
- name: test
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
.github/scripts/test-swift.sh
|
||||||
4
swift-api-examples/.gitignore
vendored
4
swift-api-examples/.gitignore
vendored
@@ -1,3 +1,7 @@
|
|||||||
decode-file
|
decode-file
|
||||||
decode-file-non-streaming
|
decode-file-non-streaming
|
||||||
generate-subtitles
|
generate-subtitles
|
||||||
|
tts
|
||||||
|
vits-vctk
|
||||||
|
sherpa-onnx-paraformer-zh-2023-09-14
|
||||||
|
!*.sh
|
||||||
|
|||||||
@@ -572,3 +572,110 @@ class SherpaOnnxVoiceActivityDetectorWrapper {
|
|||||||
SherpaOnnxVoiceActivityDetectorReset(vad)
|
SherpaOnnxVoiceActivityDetectorReset(vad)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// offline tts
|
||||||
|
func sherpaOnnxOfflineTtsVitsModelConfig(
|
||||||
|
model: String,
|
||||||
|
lexicon: String,
|
||||||
|
tokens: String,
|
||||||
|
noiseScale: Float = 0.667,
|
||||||
|
noiseScaleW: Float = 0.8,
|
||||||
|
lengthScale: Float = 1.0
|
||||||
|
) -> SherpaOnnxOfflineTtsVitsModelConfig {
|
||||||
|
return SherpaOnnxOfflineTtsVitsModelConfig(
|
||||||
|
model: toCPointer(model),
|
||||||
|
lexicon: toCPointer(lexicon),
|
||||||
|
tokens: toCPointer(tokens),
|
||||||
|
noise_scale: noiseScale,
|
||||||
|
noise_scale_w: noiseScaleW,
|
||||||
|
length_scale: lengthScale)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sherpaOnnxOfflineTtsModelConfig(
|
||||||
|
vits: SherpaOnnxOfflineTtsVitsModelConfig,
|
||||||
|
numThreads: Int = 1,
|
||||||
|
debug: Int = 0,
|
||||||
|
provider: String = "cpu"
|
||||||
|
) -> SherpaOnnxOfflineTtsModelConfig {
|
||||||
|
return SherpaOnnxOfflineTtsModelConfig(
|
||||||
|
vits: vits,
|
||||||
|
num_threads: Int32(numThreads),
|
||||||
|
debug: Int32(debug),
|
||||||
|
provider: toCPointer(provider)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sherpaOnnxOfflineTtsConfig(
|
||||||
|
model: SherpaOnnxOfflineTtsModelConfig,
|
||||||
|
ruleFsts: String = ""
|
||||||
|
) -> SherpaOnnxOfflineTtsConfig {
|
||||||
|
return SherpaOnnxOfflineTtsConfig(
|
||||||
|
model: model,
|
||||||
|
rule_fsts: toCPointer(ruleFsts)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
class SherpaOnnxGeneratedAudioWrapper {
|
||||||
|
/// A pointer to the underlying counterpart in C
|
||||||
|
let audio: UnsafePointer<SherpaOnnxGeneratedAudio>!
|
||||||
|
|
||||||
|
init(audio: UnsafePointer<SherpaOnnxGeneratedAudio>!) {
|
||||||
|
self.audio = audio
|
||||||
|
}
|
||||||
|
|
||||||
|
deinit {
|
||||||
|
if let audio {
|
||||||
|
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var n: Int32 {
|
||||||
|
return audio.pointee.n
|
||||||
|
}
|
||||||
|
|
||||||
|
var sampleRate: Int32 {
|
||||||
|
return audio.pointee.sample_rate
|
||||||
|
}
|
||||||
|
|
||||||
|
var samples: [Float] {
|
||||||
|
if let p = audio.pointee.samples {
|
||||||
|
var samples: [Float] = []
|
||||||
|
for index in 0..<n {
|
||||||
|
samples.append(p[Int(index)])
|
||||||
|
}
|
||||||
|
return samples
|
||||||
|
} else {
|
||||||
|
let samples: [Float] = []
|
||||||
|
return samples
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func save(filename: String) {
|
||||||
|
SherpaOnnxWriteWave(audio.pointee.samples, n, sampleRate, toCPointer(filename))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class SherpaOnnxOfflineTtsWrapper {
|
||||||
|
/// A pointer to the underlying counterpart in C
|
||||||
|
let tts: OpaquePointer!
|
||||||
|
|
||||||
|
/// Constructor taking a model config
|
||||||
|
init(
|
||||||
|
config: UnsafePointer<SherpaOnnxOfflineTtsConfig>!
|
||||||
|
) {
|
||||||
|
tts = SherpaOnnxCreateOfflineTts(config)
|
||||||
|
}
|
||||||
|
|
||||||
|
deinit {
|
||||||
|
if let tts {
|
||||||
|
SherpaOnnxDestroyOfflineTts(tts)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func generate(text: String, sid: Int = 0, speed: Float = 1.0) -> SherpaOnnxGeneratedAudioWrapper {
|
||||||
|
let audio: UnsafePointer<SherpaOnnxGeneratedAudio>? = SherpaOnnxOfflineTtsGenerate(
|
||||||
|
tts, toCPointer(text), Int32(sid), speed)
|
||||||
|
|
||||||
|
return SherpaOnnxGeneratedAudioWrapper(audio: audio)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -175,8 +175,8 @@ func run() {
|
|||||||
var segments: [SpeechSegment] = []
|
var segments: [SpeechSegment] = []
|
||||||
|
|
||||||
for offset in stride(from: 0, to: array.count, by: windowSize) {
|
for offset in stride(from: 0, to: array.count, by: windowSize) {
|
||||||
let end = min(offset + windowSize, array.count)
|
let end = min(offset + windowSize, array.count)
|
||||||
vad.acceptWaveform(samples: [Float](array[offset ..< end]))
|
vad.acceptWaveform(samples: [Float](array[offset..<end]))
|
||||||
}
|
}
|
||||||
|
|
||||||
var index: Int = 0
|
var index: Int = 0
|
||||||
|
|||||||
41
swift-api-examples/run-decode-file-non-streaming.sh
Executable file
41
swift-api-examples/run-decode-file-non-streaming.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -d ../build-swift-macos ]; then
|
||||||
|
echo "Please run ../build-swift-macos.sh first!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
|
||||||
|
echo "Please download the pre-trained model for testing."
|
||||||
|
echo "You can refer to"
|
||||||
|
echo ""
|
||||||
|
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
|
||||||
|
echo ""
|
||||||
|
echo "for help"
|
||||||
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -e ./decode-file-non-streaming ]; then
|
||||||
|
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||||
|
swiftc \
|
||||||
|
-lc++ \
|
||||||
|
-I ../build-swift-macos/install/include \
|
||||||
|
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||||
|
./decode-file-non-streaming.swift ./SherpaOnnx.swift \
|
||||||
|
-L ../build-swift-macos/install/lib/ \
|
||||||
|
-l sherpa-onnx \
|
||||||
|
-l onnxruntime \
|
||||||
|
-o decode-file-non-streaming
|
||||||
|
|
||||||
|
strip decode-file-non-streaming
|
||||||
|
else
|
||||||
|
echo "./decode-file-non-streaming exists - skip building"
|
||||||
|
fi
|
||||||
|
|
||||||
|
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
./decode-file-non-streaming
|
||||||
@@ -14,7 +14,10 @@ if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
|
|||||||
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english"
|
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english"
|
||||||
echo ""
|
echo ""
|
||||||
echo "for help"
|
echo "for help"
|
||||||
exit 1
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -e ./decode-file ]; then
|
if [ ! -e ./decode-file ]; then
|
||||||
@@ -28,6 +31,8 @@ if [ ! -e ./decode-file ]; then
|
|||||||
-l sherpa-onnx \
|
-l sherpa-onnx \
|
||||||
-l onnxruntime \
|
-l onnxruntime \
|
||||||
-o decode-file
|
-o decode-file
|
||||||
|
|
||||||
|
strip decode-file
|
||||||
else
|
else
|
||||||
echo "./decode-file exists - skip building"
|
echo "./decode-file exists - skip building"
|
||||||
fi
|
fi
|
||||||
|
|||||||
@@ -14,7 +14,15 @@ if [ ! -d ./sherpa-onnx-whisper-tiny.en ]; then
|
|||||||
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
|
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/whisper/tiny.en.html"
|
||||||
echo ""
|
echo ""
|
||||||
echo "for help"
|
echo "for help"
|
||||||
exit 1
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
rm sherpa-onnx-whisper-tiny.en.tar.bz2
|
||||||
|
ls -lh sherpa-onnx-whisper-tiny.en
|
||||||
|
fi
|
||||||
|
if [ ! -f ./silero_vad.onnx ]; then
|
||||||
|
echo "downloading silero_vad"
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -e ./generate-subtitles ]; then
|
if [ ! -e ./generate-subtitles ]; then
|
||||||
@@ -28,6 +36,8 @@ if [ ! -e ./generate-subtitles ]; then
|
|||||||
-l sherpa-onnx \
|
-l sherpa-onnx \
|
||||||
-l onnxruntime \
|
-l onnxruntime \
|
||||||
-o generate-subtitles
|
-o generate-subtitles
|
||||||
|
|
||||||
|
strip generate-subtitles
|
||||||
else
|
else
|
||||||
echo "./generate-subtitles exists - skip building"
|
echo "./generate-subtitles exists - skip building"
|
||||||
fi
|
fi
|
||||||
|
|||||||
41
swift-api-examples/run-tts.sh
Executable file
41
swift-api-examples/run-tts.sh
Executable file
@@ -0,0 +1,41 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [ ! -d ../build-swift-macos ]; then
|
||||||
|
echo "Please run ../build-swift-macos.sh first!"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -d ./vits-vctk ]; then
|
||||||
|
echo "Please download the pre-trained model for testing."
|
||||||
|
echo "You can refer to"
|
||||||
|
echo ""
|
||||||
|
echo "https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers"
|
||||||
|
echo ""
|
||||||
|
echo "for help"
|
||||||
|
|
||||||
|
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
|
||||||
|
tar xvf vits-vctk.tar.bz2
|
||||||
|
rm vits-vctk.tar.bz2
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ ! -e ./tts ]; then
|
||||||
|
# Note: We use -lc++ to link against libc++ instead of libstdc++
|
||||||
|
swiftc \
|
||||||
|
-lc++ \
|
||||||
|
-I ../build-swift-macos/install/include \
|
||||||
|
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
|
||||||
|
./tts.swift ./SherpaOnnx.swift \
|
||||||
|
-L ../build-swift-macos/install/lib/ \
|
||||||
|
-l sherpa-onnx \
|
||||||
|
-l onnxruntime \
|
||||||
|
-o tts
|
||||||
|
|
||||||
|
strip tts
|
||||||
|
else
|
||||||
|
echo "./tts exists - skip building"
|
||||||
|
fi
|
||||||
|
|
||||||
|
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
./tts
|
||||||
31
swift-api-examples/tts.swift
Normal file
31
swift-api-examples/tts.swift
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
func run() {
|
||||||
|
let model = "./vits-vctk/vits-vctk.onnx"
|
||||||
|
let lexicon = "./vits-vctk/lexicon.txt"
|
||||||
|
let tokens = "./vits-vctk/tokens.txt"
|
||||||
|
let vits = sherpaOnnxOfflineTtsVitsModelConfig(
|
||||||
|
model: model,
|
||||||
|
lexicon: lexicon,
|
||||||
|
tokens: tokens
|
||||||
|
)
|
||||||
|
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
|
||||||
|
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
|
||||||
|
|
||||||
|
let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
|
||||||
|
|
||||||
|
let text = "How are you doing? Fantastic!"
|
||||||
|
let sid = 99
|
||||||
|
let speed: Float = 1.0
|
||||||
|
|
||||||
|
let audio = tts.generate(text: text, sid: sid, speed: speed)
|
||||||
|
let filename = "test.wav"
|
||||||
|
audio.save(filename: filename)
|
||||||
|
|
||||||
|
print("\nSaved to:\n\(filename)")
|
||||||
|
}
|
||||||
|
|
||||||
|
@main
|
||||||
|
struct App {
|
||||||
|
static func main() {
|
||||||
|
run()
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user