Add iOS support (#65)

This commit is contained in:
Fangjun Kuang
2023-02-25 21:56:25 +08:00
committed by GitHub
parent fb1e24bebb
commit 475caf22f9
34 changed files with 2669 additions and 23 deletions

View File

@@ -0,0 +1,9 @@
// swfit-api-examples/SherpaOnnx-Bridging-Header.h
//
// Copyright (c) 2023 Xiaomi Corporation
#ifndef SWIFT_API_EXAMPLES_SHERPAONNX_BRIDGING_HEADER_H_
#define SWIFT_API_EXAMPLES_SHERPAONNX_BRIDGING_HEADER_H_
#import "sherpa-onnx/c-api/c-api.h"
#endif // SWIFT_API_EXAMPLES_SHERPAONNX_BRIDGING_HEADER_H_

View File

@@ -0,0 +1,171 @@
/// swfit-api-examples/SherpaOnnx.swift
/// Copyright (c) 2023 Xiaomi Corporation
import Foundation // For NSString
/// Convert a String from swift to a `const char*` so that we can pass it to
/// the C language.
///
/// - Parameters:
/// - s: The String to convert.
/// - Returns: A pointer that can be passed to C as `const char*`
func toCPointer(_ s: String) -> UnsafePointer<Int8>! {
let cs = (s as NSString).utf8String
return UnsafePointer<Int8>(cs)
}
/// Return an instance of SherpaOnnxOnlineTransducerModelConfig.
///
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download the required `.onnx` files.
///
/// - Parameters:
/// - encoder: Path to encoder.onnx
/// - decoder: Path to decoder.onnx
/// - joiner: Path to joiner.onnx
/// - tokens: Path to tokens.txt
/// - numThreads: Number of threads to use for neural network computation.
///
/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
func sherpaOnnxOnlineTransducerModelConfig(
encoder: String,
decoder: String,
joiner: String,
tokens: String,
numThreads: Int = 2,
debug: Int = 0
) -> SherpaOnnxOnlineTransducerModelConfig{
return SherpaOnnxOnlineTransducerModelConfig(
encoder: toCPointer(encoder),
decoder: toCPointer(decoder),
joiner: toCPointer(joiner),
tokens: toCPointer(tokens),
num_threads: Int32(numThreads),
debug: Int32(debug)
)
}
func sherpaOnnxFeatureConfig(
sampleRate: Int = 16000,
featureDim: Int = 80
) -> SherpaOnnxFeatureConfig {
return SherpaOnnxFeatureConfig(
sample_rate: Int32(sampleRate),
feature_dim: Int32(featureDim))
}
func sherpaOnnxOnlineRecognizerConfig(
featConfig: SherpaOnnxFeatureConfig,
modelConfig: SherpaOnnxOnlineTransducerModelConfig,
enableEndpoint: Bool = false,
rule1MinTrailingSilence: Float = 2.4,
rule2MinTrailingSilence: Float = 1.2,
rule3MinUtteranceLength: Float = 30
) -> SherpaOnnxOnlineRecognizerConfig{
return SherpaOnnxOnlineRecognizerConfig(
feat_config: featConfig,
model_config: modelConfig,
enable_endpoint: enableEndpoint ? 1 : 0,
rule1_min_trailing_silence: rule1MinTrailingSilence,
rule2_min_trailing_silence: rule2MinTrailingSilence,
rule3_min_utterance_length: rule3MinUtteranceLength)
}
/// Wrapper for recognition result.
///
/// Usage:
///
/// let result = recognizer.getResult()
/// print("text: \(result.text)")
///
class SherpaOnnxOnlineRecongitionResult {
/// A pointer to the underlying counterpart in C
let result: UnsafePointer<SherpaOnnxOnlineRecognizerResult>!
/// Return the actual recognition result.
/// For English models, it contains words separated by spaces.
/// For Chinese models, it contains Chinese words.
var text: String {
return String(cString: result.pointee.text)
}
init(result: UnsafePointer<SherpaOnnxOnlineRecognizerResult>!) {
self.result = result
}
deinit {
if let result {
DestroyOnlineRecognizerResult(result)
}
}
}
class SherpaOnnxRecognizer {
/// A pointer to the underlying counterpart in C
let recognizer: OpaquePointer!
let stream: OpaquePointer!
/// Constructor taking a model config and a decoder config.
init(
config: UnsafePointer<SherpaOnnxOnlineRecognizerConfig>!
) {
recognizer = CreateOnlineRecognizer(config)
stream = CreateOnlineStream(recognizer)
}
deinit {
if let stream {
DestoryOnlineStream(stream)
}
if let recognizer {
DestroyOnlineRecognizer(recognizer)
}
}
/// Decode wave samples.
///
/// - Parameters:
/// - samples: Audio samples normalzed to the range [-1, 1]
/// - sampleRate: Sample rate of the input audio samples. Must match
/// the one expected by the model. It must be 16000 for
/// models from icefall.
func acceptWaveform(samples: [Float], sampleRate: Float = 16000) {
AcceptWaveform(stream, sampleRate, samples, Int32(samples.count))
}
func isReady() -> Bool {
return IsOnlineStreamReady(recognizer, stream) == 1 ? true : false
}
/// If there are enough number of feature frames, it invokes the neural
/// network computation and decoding. Otherwise, it is a no-op.
func decode() {
DecodeOnlineStream(recognizer, stream)
}
/// Get the decoding results so far
func getResult() -> SherpaOnnxOnlineRecongitionResult {
let result: UnsafeMutablePointer<SherpaOnnxOnlineRecognizerResult>? = GetOnlineStreamResult(recognizer, stream)
return SherpaOnnxOnlineRecongitionResult(result: result)
}
/// Reset the recognizer, which clears the neural network model state
/// and the state for decoding.
func reset() {
Reset(recognizer, stream)
}
/// Signal that no more audio samples would be available.
/// After this call, you cannot call acceptWaveform() any more.
func inputFinished() {
InputFinished(stream)
}
/// Return true is an endpoint has been detected.
func isEndpoint() -> Bool {
return IsEndpoint(recognizer, stream) == 1 ? true : false
}
}

View File

@@ -0,0 +1,74 @@
import AVFoundation
extension AudioBuffer {
func array() -> [Float] {
return Array(UnsafeBufferPointer(self))
}
}
extension AVAudioPCMBuffer {
func array() -> [Float] {
return self.audioBufferList.pointee.mBuffers.array()
}
}
func run() {
let encoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
let decoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
let joiner = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"
let tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
let modelConfig = sherpaOnnxOnlineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner,
tokens: tokens,
numThreads: 2)
let featConfig = sherpaOnnxFeatureConfig(
sampleRate: 16000,
featureDim: 80
)
var config = sherpaOnnxOnlineRecognizerConfig(
featConfig: featConfig,
modelConfig: modelConfig,
enableEndpoint: false
)
let recognizer = SherpaOnnxRecognizer(config: &config)
let filePath = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav"
let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
let audioFile = try! AVAudioFile(forReading: fileURL as URL)
let audioFormat = audioFile.processingFormat
assert(audioFormat.sampleRate == 16000)
assert(audioFormat.channelCount == 1)
assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
let audioFrameCount = UInt32(audioFile.length)
let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
try! audioFile.read(into: audioFileBuffer!)
let array: [Float]! = audioFileBuffer?.array()
recognizer.acceptWaveform(samples: array)
let tailPadding = [Float](repeating: 0.0, count: 3200)
recognizer.acceptWaveform(samples: tailPadding)
recognizer.inputFinished()
while (recognizer.isReady()) {
recognizer.decode()
}
let result = recognizer.getResult()
print("\nresult is:\n\(result.text)")
}
@main
struct App {
static func main() {
run()
}
}

View File

@@ -0,0 +1,36 @@
#!/usr/bin/env bash
set -ex
if [ ! -d ../build-swift-macos ]; then
echo "Please run ../build-swift-macos.sh first!"
exit 1
fi
if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
echo "Please download the pre-trained model for testing."
echo "You can refer to"
echo ""
echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english"
echo ""
echo "for help"
exit 1
fi
if [ ! -e ./decode-file ]; then
# Note: We use -lc++ to link against libc++ instead of libstdc++
swiftc \
-lc++ \
-I ../build-swift-macos/install/include \
-import-objc-header ./SherpaOnnx-Bridging-Header.h \
./decode-file.swift ./SherpaOnnx.swift \
-L ../build-swift-macos/install/lib/ \
-l sherpa-onnx \
-l onnxruntime \
-o decode-file
else
echo "./decode-file exists - skip building"
fi
export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
./decode-file