Add iOS support (#65)

2023-02-25 21:56:25 +08:00
parent fb1e24bebb
commit 475caf22f9
34 changed files with 2669 additions and 23 deletions
--- a/swift-api-examples/SherpaOnnx-Bridging-Header.h
+++ b/swift-api-examples/SherpaOnnx-Bridging-Header.h
@@ -0,0 +1,9 @@
+// swfit-api-examples/SherpaOnnx-Bridging-Header.h
+//
+// Copyright (c)  2023  Xiaomi Corporation
+#ifndef SWIFT_API_EXAMPLES_SHERPAONNX_BRIDGING_HEADER_H_
+#define SWIFT_API_EXAMPLES_SHERPAONNX_BRIDGING_HEADER_H_
+
+#import "sherpa-onnx/c-api/c-api.h"
+
+#endif  // SWIFT_API_EXAMPLES_SHERPAONNX_BRIDGING_HEADER_H_
--- a/swift-api-examples/SherpaOnnx.swift
+++ b/swift-api-examples/SherpaOnnx.swift
@@ -0,0 +1,171 @@
+/// swfit-api-examples/SherpaOnnx.swift
+/// Copyright (c)  2023  Xiaomi Corporation
+
+import Foundation  // For NSString
+
+/// Convert a String from swift to a `const char*` so that we can pass it to
+/// the C language.
+///
+/// - Parameters:
+///   - s: The String to convert.
+/// - Returns: A pointer that can be passed to C as `const char*`
+
+func toCPointer(_ s: String) -> UnsafePointer<Int8>! {
+  let cs = (s as NSString).utf8String
+  return UnsafePointer<Int8>(cs)
+}
+
+/// Return an instance of SherpaOnnxOnlineTransducerModelConfig.
+///
+/// Please refer to
+/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+/// to download the required `.onnx` files.
+///
+/// - Parameters:
+///   - encoder: Path to encoder.onnx
+///   - decoder: Path to decoder.onnx
+///   - joiner: Path to joiner.onnx
+///   - tokens: Path to tokens.txt
+///   - numThreads:  Number of threads to use for neural network computation.
+///
+/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
+func sherpaOnnxOnlineTransducerModelConfig(
+  encoder: String,
+  decoder: String,
+  joiner: String,
+  tokens: String,
+  numThreads: Int = 2,
+  debug: Int = 0
+) -> SherpaOnnxOnlineTransducerModelConfig{
+  return SherpaOnnxOnlineTransducerModelConfig(
+    encoder: toCPointer(encoder),
+    decoder: toCPointer(decoder),
+    joiner: toCPointer(joiner),
+    tokens: toCPointer(tokens),
+    num_threads: Int32(numThreads),
+    debug: Int32(debug)
+  )
+}
+
+func sherpaOnnxFeatureConfig(
+  sampleRate: Int = 16000,
+  featureDim: Int = 80
+) -> SherpaOnnxFeatureConfig {
+  return SherpaOnnxFeatureConfig(
+    sample_rate: Int32(sampleRate),
+    feature_dim: Int32(featureDim))
+}
+
+func sherpaOnnxOnlineRecognizerConfig(
+    featConfig: SherpaOnnxFeatureConfig,
+    modelConfig: SherpaOnnxOnlineTransducerModelConfig,
+    enableEndpoint: Bool = false,
+    rule1MinTrailingSilence: Float = 2.4,
+    rule2MinTrailingSilence: Float = 1.2,
+    rule3MinUtteranceLength: Float = 30
+) ->  SherpaOnnxOnlineRecognizerConfig{
+  return SherpaOnnxOnlineRecognizerConfig(
+    feat_config: featConfig,
+    model_config: modelConfig,
+    enable_endpoint: enableEndpoint ? 1 : 0,
+    rule1_min_trailing_silence: rule1MinTrailingSilence,
+    rule2_min_trailing_silence: rule2MinTrailingSilence,
+    rule3_min_utterance_length: rule3MinUtteranceLength)
+}
+
+/// Wrapper for recognition result.
+///
+/// Usage:
+///
+///  let result = recognizer.getResult()
+///  print("text: \(result.text)")
+///
+class SherpaOnnxOnlineRecongitionResult {
+  /// A pointer to the underlying counterpart in C
+  let result: UnsafePointer<SherpaOnnxOnlineRecognizerResult>!
+
+  /// Return the actual recognition result.
+  /// For English models, it contains words separated by spaces.
+  /// For Chinese models, it contains Chinese words.
+  var text: String {
+    return String(cString: result.pointee.text)
+  }
+
+  init(result: UnsafePointer<SherpaOnnxOnlineRecognizerResult>!) {
+    self.result = result
+  }
+
+  deinit {
+    if let result {
+      DestroyOnlineRecognizerResult(result)
+    }
+  }
+}
+
+class SherpaOnnxRecognizer {
+  /// A pointer to the underlying counterpart in C
+  let recognizer: OpaquePointer!
+  let stream: OpaquePointer!
+
+  /// Constructor taking a model config and a decoder config.
+  init(
+    config: UnsafePointer<SherpaOnnxOnlineRecognizerConfig>!
+  ) {
+    recognizer = CreateOnlineRecognizer(config)
+    stream = CreateOnlineStream(recognizer)
+  }
+
+  deinit {
+    if let stream {
+      DestoryOnlineStream(stream)
+    }
+
+    if let recognizer {
+      DestroyOnlineRecognizer(recognizer)
+    }
+  }
+
+  /// Decode wave samples.
+  ///
+  /// - Parameters:
+  ///   - samples: Audio samples normalzed to the range [-1, 1]
+  ///   - sampleRate: Sample rate of the input audio samples. Must match
+  ///                 the one expected by the model. It must be 16000 for
+  ///                 models from icefall.
+  func acceptWaveform(samples: [Float], sampleRate: Float = 16000) {
+    AcceptWaveform(stream, sampleRate, samples, Int32(samples.count))
+  }
+
+  func isReady() -> Bool {
+    return IsOnlineStreamReady(recognizer, stream) == 1 ? true : false
+  }
+
+  /// If there are enough number of feature frames, it invokes the neural
+  /// network computation and decoding. Otherwise, it is a no-op.
+  func decode() {
+    DecodeOnlineStream(recognizer, stream)
+  }
+
+  /// Get the decoding results so far
+  func getResult() -> SherpaOnnxOnlineRecongitionResult {
+    let result: UnsafeMutablePointer<SherpaOnnxOnlineRecognizerResult>? = GetOnlineStreamResult(recognizer, stream)
+    return SherpaOnnxOnlineRecongitionResult(result: result)
+  }
+
+  /// Reset the recognizer, which clears the neural network model state
+  /// and the state for decoding.
+  func reset() {
+    Reset(recognizer, stream)
+  }
+
+  /// Signal that no more audio samples would be available.
+  /// After this call, you cannot call acceptWaveform() any more.
+  func inputFinished() {
+    InputFinished(stream)
+  }
+
+  /// Return true is an endpoint has been detected.
+  func isEndpoint() -> Bool {
+    return IsEndpoint(recognizer, stream) == 1 ? true : false
+  }
+}
--- a/swift-api-examples/decode-file.swift
+++ b/swift-api-examples/decode-file.swift
@@ -0,0 +1,74 @@
+import AVFoundation
+
+extension AudioBuffer {
+  func array() -> [Float] {
+    return Array(UnsafeBufferPointer(self))
+  }
+}
+
+extension AVAudioPCMBuffer {
+  func array() -> [Float] {
+    return self.audioBufferList.pointee.mBuffers.array()
+  }
+}
+
+func run() {
+  let encoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
+  let decoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
+  let joiner = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"
+  let tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
+
+  let modelConfig = sherpaOnnxOnlineTransducerModelConfig(
+    encoder: encoder,
+    decoder: decoder,
+    joiner: joiner,
+    tokens: tokens,
+    numThreads: 2)
+
+  let featConfig = sherpaOnnxFeatureConfig(
+    sampleRate: 16000,
+    featureDim: 80
+  )
+  var config = sherpaOnnxOnlineRecognizerConfig(
+      featConfig: featConfig,
+      modelConfig: modelConfig,
+      enableEndpoint: false
+  )
+
+
+  let recognizer = SherpaOnnxRecognizer(config: &config)
+
+  let filePath = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav"
+  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
+  let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+
+  let audioFormat = audioFile.processingFormat
+  assert(audioFormat.sampleRate == 16000)
+  assert(audioFormat.channelCount == 1)
+  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+
+  let audioFrameCount = UInt32(audioFile.length)
+  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+
+  try! audioFile.read(into: audioFileBuffer!)
+  let array: [Float]! = audioFileBuffer?.array()
+  recognizer.acceptWaveform(samples: array)
+
+  let tailPadding = [Float](repeating: 0.0, count: 3200)
+  recognizer.acceptWaveform(samples: tailPadding)
+
+  recognizer.inputFinished()
+  while (recognizer.isReady()) {
+    recognizer.decode()
+  }
+
+  let result = recognizer.getResult()
+  print("\nresult is:\n\(result.text)")
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}
--- a/swift-api-examples/run-decode-file.sh
+++ b/swift-api-examples/run-decode-file.sh
@@ -0,0 +1,36 @@
+#!/usr/bin/env bash
+
+set -ex
+
+if [ ! -d ../build-swift-macos ]; then
+  echo "Please run ../build-swift-macos.sh first!"
+  exit 1
+fi
+
+if [ ! -d ./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 ]; then
+  echo "Please download the pre-trained model for testing."
+  echo "You can refer to"
+  echo ""
+  echo "https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20-bilingual-chinese-english"
+  echo ""
+  echo "for help"
+  exit 1
+fi
+
+if [ ! -e ./decode-file ]; then
+  # Note: We use -lc++ to link against libc++ instead of libstdc++
+  swiftc \
+    -lc++ \
+    -I ../build-swift-macos/install/include \
+    -import-objc-header ./SherpaOnnx-Bridging-Header.h \
+    ./decode-file.swift  ./SherpaOnnx.swift \
+    -L ../build-swift-macos/install/lib/ \
+    -l sherpa-onnx \
+    -l onnxruntime \
+    -o decode-file
+else
+  echo "./decode-file exists - skip building"
+fi
+
+export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH
+./decode-file