From 2f22e6ed63acd85e9780604fdc493c44b9438e48 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 22 Nov 2023 16:04:26 +0800 Subject: [PATCH] Add Swift API for TTS (#439) --- .github/scripts/test-swift.sh | 28 +++++ .github/workflows/swift.yaml | 53 +++++++++ swift-api-examples/.gitignore | 4 + swift-api-examples/SherpaOnnx.swift | 107 ++++++++++++++++++ swift-api-examples/generate-subtitles.swift | 4 +- .../run-decode-file-non-streaming.sh | 41 +++++++ swift-api-examples/run-decode-file.sh | 7 +- swift-api-examples/run-generate-subtitles.sh | 12 +- swift-api-examples/run-tts.sh | 41 +++++++ swift-api-examples/tts.swift | 31 +++++ 10 files changed, 324 insertions(+), 4 deletions(-) create mode 100755 .github/scripts/test-swift.sh create mode 100644 .github/workflows/swift.yaml create mode 100755 swift-api-examples/run-decode-file-non-streaming.sh create mode 100755 swift-api-examples/run-tts.sh create mode 100644 swift-api-examples/tts.swift diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh new file mode 100755 index 00000000..6695b51f --- /dev/null +++ b/.github/scripts/test-swift.sh @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -ex + +echo "pwd: $PWD" + +cd swift-api-examples +ls -lh + +mkdir -p /Users/fangjun/Desktop +pushd /Users/fangjun/Desktop +wget -q https://huggingface.co/csukuangfj/test-data/resolve/main/Obama.wav +ls -lh +popd + +./run-generate-subtitles.sh + +ls -lh /Users/fangjun/Desktop +cat /Users/fangjun/Desktop/Obama.srt + +./run-tts.sh +ls -lh + +./run-decode-file.sh + +./run-decode-file-non-streaming.sh + +ls -lh diff --git a/.github/workflows/swift.yaml b/.github/workflows/swift.yaml new file mode 100644 index 00000000..9971606f --- /dev/null +++ b/.github/workflows/swift.yaml @@ -0,0 +1,53 @@ +name: swift + +on: + push: + branches: + - master + + pull_request: + branches: + - master + + workflow_dispatch: + +concurrency: + group: swift-${{ github.ref }} + cancel-in-progress: true + +jobs: + swift: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-13] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-swift + + - name: Build + shell: bash + run: | + sudo mkdir -p /Users/fangjun/Desktop + sudo chmod a=rwx /Users/fangjun/Desktop + ls -lhd /Users/fangjun/Desktop + ls -lh /Users/fangjun/Desktop + + export CMAKE_CXX_COMPILER_LAUNCHER=ccache + export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH" + cmake --version + + ./build-swift-macos.sh + + - name: test + shell: bash + run: | + .github/scripts/test-swift.sh diff --git a/swift-api-examples/.gitignore b/swift-api-examples/.gitignore index cbcf0750..95c397c3 100644 --- a/swift-api-examples/.gitignore +++ b/swift-api-examples/.gitignore @@ -1,3 +1,7 @@ decode-file decode-file-non-streaming generate-subtitles +tts +vits-vctk +sherpa-onnx-paraformer-zh-2023-09-14 +!*.sh diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index e786d7cc..206e95bd 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -572,3 +572,110 @@ class SherpaOnnxVoiceActivityDetectorWrapper { SherpaOnnxVoiceActivityDetectorReset(vad) } } + +// offline tts +func sherpaOnnxOfflineTtsVitsModelConfig( + model: String, + lexicon: String, + tokens: String, + noiseScale: Float = 0.667, + noiseScaleW: Float = 0.8, + lengthScale: Float = 1.0 +) -> SherpaOnnxOfflineTtsVitsModelConfig { + return SherpaOnnxOfflineTtsVitsModelConfig( + model: toCPointer(model), + lexicon: toCPointer(lexicon), + tokens: toCPointer(tokens), + noise_scale: noiseScale, + noise_scale_w: noiseScaleW, + length_scale: lengthScale) +} + +func sherpaOnnxOfflineTtsModelConfig( + vits: SherpaOnnxOfflineTtsVitsModelConfig, + numThreads: Int = 1, + debug: Int = 0, + provider: String = "cpu" +) -> SherpaOnnxOfflineTtsModelConfig { + return SherpaOnnxOfflineTtsModelConfig( + vits: vits, + num_threads: Int32(numThreads), + debug: Int32(debug), + provider: toCPointer(provider) + ) +} + +func sherpaOnnxOfflineTtsConfig( + model: SherpaOnnxOfflineTtsModelConfig, + ruleFsts: String = "" +) -> SherpaOnnxOfflineTtsConfig { + return SherpaOnnxOfflineTtsConfig( + model: model, + rule_fsts: toCPointer(ruleFsts) + ) +} + +class SherpaOnnxGeneratedAudioWrapper { + /// A pointer to the underlying counterpart in C + let audio: UnsafePointer! + + init(audio: UnsafePointer!) { + self.audio = audio + } + + deinit { + if let audio { + SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio) + } + } + + var n: Int32 { + return audio.pointee.n + } + + var sampleRate: Int32 { + return audio.pointee.sample_rate + } + + var samples: [Float] { + if let p = audio.pointee.samples { + var samples: [Float] = [] + for index in 0..! + ) { + tts = SherpaOnnxCreateOfflineTts(config) + } + + deinit { + if let tts { + SherpaOnnxDestroyOfflineTts(tts) + } + } + + func generate(text: String, sid: Int = 0, speed: Float = 1.0) -> SherpaOnnxGeneratedAudioWrapper { + let audio: UnsafePointer? = SherpaOnnxOfflineTtsGenerate( + tts, toCPointer(text), Int32(sid), speed) + + return SherpaOnnxGeneratedAudioWrapper(audio: audio) + } +} diff --git a/swift-api-examples/generate-subtitles.swift b/swift-api-examples/generate-subtitles.swift index b04b6fae..d24c9efa 100644 --- a/swift-api-examples/generate-subtitles.swift +++ b/swift-api-examples/generate-subtitles.swift @@ -175,8 +175,8 @@ func run() { var segments: [SpeechSegment] = [] for offset in stride(from: 0, to: array.count, by: windowSize) { - let end = min(offset + windowSize, array.count) - vad.acceptWaveform(samples: [Float](array[offset ..< end])) + let end = min(offset + windowSize, array.count) + vad.acceptWaveform(samples: [Float](array[offset..