diff --git a/.github/scripts/test-swift.sh b/.github/scripts/test-swift.sh index c588cd4f..14b527be 100755 --- a/.github/scripts/test-swift.sh +++ b/.github/scripts/test-swift.sh @@ -7,6 +7,10 @@ echo "pwd: $PWD" cd swift-api-examples ls -lh +./run-decode-file-sense-voice-with-hr.sh +rm -rf sherpa-onnx-sense-voice-* +rm -rf dict lexicon.txt replace.fst test-hr.wav + ./run-dolphin-ctc-asr.sh rm -rf sherpa-onnx-dolphin-* diff --git a/swift-api-examples/.gitignore b/swift-api-examples/.gitignore index 72740947..2363d0ec 100644 --- a/swift-api-examples/.gitignore +++ b/swift-api-examples/.gitignore @@ -15,3 +15,4 @@ tts-matcha-en tts-kokoro-en tts-kokoro-zh-en speech-enhancement-gtcrn +decode-file-sense-voice-with-hr diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index 6a9dd6fe..b2cef56b 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -128,6 +128,17 @@ func sherpaOnnxOnlineCtcFstDecoderConfig( max_active: Int32(maxActive)) } +func sherpaOnnxHomophoneReplacerConfig( + dictDir: String = "", + lexicon: String = "", + ruleFsts: String = "" +) -> SherpaOnnxHomophoneReplacerConfig { + return SherpaOnnxHomophoneReplacerConfig( + dict_dir: toCPointer(dictDir), + lexicon: toCPointer(lexicon), + rule_fsts: toCPointer(ruleFsts)) +} + func sherpaOnnxOnlineRecognizerConfig( featConfig: SherpaOnnxFeatureConfig, modelConfig: SherpaOnnxOnlineModelConfig, @@ -144,7 +155,8 @@ func sherpaOnnxOnlineRecognizerConfig( ruleFars: String = "", blankPenalty: Float = 0.0, hotwordsBuf: String = "", - hotwordsBufSize: Int = 0 + hotwordsBufSize: Int = 0, + hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig() ) -> SherpaOnnxOnlineRecognizerConfig { return SherpaOnnxOnlineRecognizerConfig( feat_config: featConfig, @@ -162,7 +174,8 @@ func sherpaOnnxOnlineRecognizerConfig( rule_fars: toCPointer(ruleFars), blank_penalty: blankPenalty, hotwords_buf: toCPointer(hotwordsBuf), - hotwords_buf_size: Int32(hotwordsBufSize) + hotwords_buf_size: Int32(hotwordsBufSize), + hr: hr ) } @@ -469,7 +482,8 @@ func sherpaOnnxOfflineRecognizerConfig( hotwordsScore: Float = 1.5, ruleFsts: String = "", ruleFars: String = "", - blankPenalty: Float = 0.0 + blankPenalty: Float = 0.0, + hr: SherpaOnnxHomophoneReplacerConfig = sherpaOnnxHomophoneReplacerConfig() ) -> SherpaOnnxOfflineRecognizerConfig { return SherpaOnnxOfflineRecognizerConfig( feat_config: featConfig, @@ -481,7 +495,8 @@ func sherpaOnnxOfflineRecognizerConfig( hotwords_score: hotwordsScore, rule_fsts: toCPointer(ruleFsts), rule_fars: toCPointer(ruleFars), - blank_penalty: blankPenalty + blank_penalty: blankPenalty, + hr: hr ) } diff --git a/swift-api-examples/decode-file-sense-voice-with-hr.swift b/swift-api-examples/decode-file-sense-voice-with-hr.swift new file mode 100644 index 00000000..f9d00272 --- /dev/null +++ b/swift-api-examples/decode-file-sense-voice-with-hr.swift @@ -0,0 +1,74 @@ +import AVFoundation + +extension AudioBuffer { + func array() -> [Float] { + return Array(UnsafeBufferPointer(self)) + } +} + +extension AVAudioPCMBuffer { + func array() -> [Float] { + return self.audioBufferList.pointee.mBuffers.array() + } +} + +func run() { + var recognizer: SherpaOnnxOfflineRecognizer + let model = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/model.int8.onnx" + let tokens = "./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17/tokens.txt" + let senseVoiceConfig = sherpaOnnxOfflineSenseVoiceModelConfig( + model: model, + useInverseTextNormalization: true + ) + + let modelConfig = sherpaOnnxOfflineModelConfig( + tokens: tokens, + debug: 0, + senseVoice: senseVoiceConfig + ) + + let featConfig = sherpaOnnxFeatureConfig( + sampleRate: 16000, + featureDim: 80 + ) + + let hrConfig = sherpaOnnxHomophoneReplacerConfig( + dictDir: "./dict", + lexicon: "./lexicon.txt", + ruleFsts: "./replace.fst" + ) + var config = sherpaOnnxOfflineRecognizerConfig( + featConfig: featConfig, + modelConfig: modelConfig, + hr: hrConfig + ) + + recognizer = SherpaOnnxOfflineRecognizer(config: &config) + + let filePath = "./test-hr.wav" + let fileURL: NSURL = NSURL(fileURLWithPath: filePath) + let audioFile = try! AVAudioFile(forReading: fileURL as URL) + + let audioFormat = audioFile.processingFormat + assert(audioFormat.channelCount == 1) + assert(audioFormat.commonFormat == AVAudioCommonFormat.pcmFormatFloat32) + + let audioFrameCount = UInt32(audioFile.length) + let audioFileBuffer = AVAudioPCMBuffer(pcmFormat: audioFormat, frameCapacity: audioFrameCount) + + try! audioFile.read(into: audioFileBuffer!) + let array: [Float]! = audioFileBuffer?.array() + let result = recognizer.decode(samples: array, sampleRate: Int(audioFormat.sampleRate)) + print("\nresult is:\n\(result.text)") + if result.timestamps.count != 0 { + print("\ntimestamps is:\n\(result.timestamps)") + } + +} + +@main +struct App { + static func main() { + run() + } +} diff --git a/swift-api-examples/run-decode-file-sense-voice-with-hr.sh b/swift-api-examples/run-decode-file-sense-voice-with-hr.sh new file mode 100755 index 00000000..96e1a10b --- /dev/null +++ b/swift-api-examples/run-decode-file-sense-voice-with-hr.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +set -ex + +if [ ! -d ../build-swift-macos ]; then + echo "Please run ../build-swift-macos.sh first!" + exit 1 +fi + +if [ ! -d ./sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17 ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + tar xvf sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 + rm sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 +fi + +if [ ! -d dict ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/dict.tar.bz2 + tar xf dict.tar.bz2 + rm -rf dict.tar.bz2 + + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/replace.fst + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/test-hr.wav + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/hr-files/lexicon.txt +fi + +if [ ! -e ./decode-file-sense-voice-with-hr ]; then + # Note: We use -lc++ to link against libc++ instead of libstdc++ + swiftc \ + -lc++ \ + -I ../build-swift-macos/install/include \ + -import-objc-header ./SherpaOnnx-Bridging-Header.h \ + ./decode-file-sense-voice-with-hr.swift ./SherpaOnnx.swift \ + -L ../build-swift-macos/install/lib/ \ + -l sherpa-onnx \ + -l onnxruntime \ + -o decode-file-sense-voice-with-hr + + strip decode-file-sense-voice-with-hr +else + echo "./decode-file-sense-voice-with-hr exists - skip building" +fi + +export DYLD_LIBRARY_PATH=$PWD/../build-swift-macos/install/lib:$DYLD_LIBRARY_PATH +./decode-file-sense-voice-with-hr