Add Swift API for homophone replacer. (#2164)

2025-04-29 18:50:41 +08:00
parent 4a7a974a04
commit 63d01a9534
5 changed files with 142 additions and 4 deletions
--- a/swift-api-examples/decode-file-sense-voice-with-hr.swift
+++ b/swift-api-examples/decode-file-sense-voice-with-hr.swift
@@ -0,0 +1,74 @@
+import AVFoundation
+
+extension AudioBuffer {
+  func array() -> [Float] {
+    return Array(UnsafeBufferPointer(self))
+  }
+}
+
+extension AVAudioPCMBuffer {
+  func array() -> [Float] {
+    return self.audioBufferList.pointee.mBuffers.array()
+  }
+}
+
+func run() {
+  var recognizer: SherpaOnnxOfflineRecognizer
+  let model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx"
+  let tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt"
+  let senseVoiceConfig = sherpaOnnxOfflineSenseVoiceModelConfig(
+    model: model,
+    useInverseTextNormalization: true
+  )
+
+  let modelConfig = sherpaOnnxOfflineModelConfig(
+    tokens: tokens,
+    debug: 0,
+    senseVoice: senseVoiceConfig
+  )
+
+  let featConfig = sherpaOnnxFeatureConfig(
+    sampleRate: 16000,
+    featureDim: 80
+  )
+
+  let hrConfig = sherpaOnnxHomophoneReplacerConfig(
+    dictDir: "./dict",
+    lexicon: "./lexicon.txt",
+    ruleFsts: "./replace.fst"
+  )
+  var config = sherpaOnnxOfflineRecognizerConfig(
+    featConfig: featConfig,
+    modelConfig: modelConfig,
+    hr: hrConfig
+  )
+
+  recognizer = SherpaOnnxOfflineRecognizer(config: &config)
+
+  let filePath = "./test-hr.wav"
+  let fileURL: NSURL = NSURL(fileURLWithPath: filePath)
+  let audioFile = try! AVAudioFile(forReading: fileURL as URL)
+
+  let audioFormat = audioFile.processingFormat
+  assert(audioFormat.channelCount == 1)
+  assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32)
+
+  let audioFrameCount = UInt32(audioFile.length)
+  let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount)
+
+  try! audioFile.read(into: audioFileBuffer!)
+  let array: [Float]! = audioFileBuffer?.array()
+  let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate))
+  print("\nresult is:\n\(result.text)")
+  if result.timestamps.count != 0 {
+    print("\ntimestamps is:\n\(result.timestamps)")
+  }
+
+}
+
+@main
+struct App {
+  static func main() {
+    run()
+  }
+}