Fix modified beam search for iOS and android (#76)

* Use Int type for sampling rate

* Fix swift

* Fix iOS
This commit is contained in:
Fangjun Kuang
2023-03-03 15:18:31 +08:00
committed by GitHub
parent 7f72c13d9a
commit 5f31b22c12
15 changed files with 125 additions and 93 deletions

View File

@@ -23,10 +23,10 @@ extension AVAudioPCMBuffer {
class ViewController: UIViewController {
@IBOutlet weak var resultLabel: UILabel!
@IBOutlet weak var recordBtn: UIButton!
var audioEngine: AVAudioEngine? = nil
var recognizer: SherpaOnnxRecognizer! = nil
/// It saves the decoded results so far
var sentences: [String] = [] {
didSet {
@@ -42,7 +42,7 @@ class ViewController: UIViewController {
if sentences.isEmpty {
return "0: \(lastSentence.lowercased())"
}
let start = max(sentences.count - maxSentence, 0)
if lastSentence.isEmpty {
return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
@@ -52,23 +52,23 @@ class ViewController: UIViewController {
.joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
}
}
func updateLabel() {
DispatchQueue.main.async {
self.resultLabel.text = self.results
}
}
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view.
resultLabel.text = "ASR with Next-gen Kaldi\n\nSee https://github.com/k2-fsa/sherpa-onnx\n\nPress the Start button to run!"
recordBtn.setTitle("Start", for: .normal)
initRecognizer()
initRecorder()
}
@IBAction func onRecordBtnClick(_ sender: UIButton) {
if recordBtn.currentTitle == "Start" {
startRecorder()
@@ -78,30 +78,32 @@ class ViewController: UIViewController {
recordBtn.setTitle("Start", for: .normal)
}
}
func initRecognizer() {
// Please select one model that is best suitable for you.
//
// You can also modify Model.swift to add new pre-trained models from
// https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
let modelConfig = getBilingualStreamZhEnZipformer20230220()
let featConfig = sherpaOnnxFeatureConfig(
sampleRate: 16000,
featureDim: 80)
var config = sherpaOnnxOnlineRecognizerConfig(
featConfig: featConfig,
modelConfig: modelConfig,
enableEndpoint: true,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 0.8,
rule3MinUtteranceLength: 30
rule3MinUtteranceLength: 30,
decodingMethod: "greedy_search",
maxActivePaths: 4
)
recognizer = SherpaOnnxRecognizer(config: &config)
}
func initRecorder() {
print("init recorder")
audioEngine = AVAudioEngine()
@@ -112,9 +114,9 @@ class ViewController: UIViewController {
commonFormat: .pcmFormatFloat32,
sampleRate: 16000, channels: 1,
interleaved: false)!
let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!
inputNode!.installTap(
onBus: bus,
bufferSize: 1024,
@@ -122,34 +124,34 @@ class ViewController: UIViewController {
) {
(buffer: AVAudioPCMBuffer, when: AVAudioTime) in
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = {
inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(
pcmFormat: outputFormat,
frameCapacity:
AVAudioFrameCount(outputFormat.sampleRate)
* buffer.frameLength
/ AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError?
let _ = converter.convert(
to: convertedBuffer,
error: &error, withInputFrom: inputCallback)
// TODO(fangjun): Handle status != haveData
let array = convertedBuffer.array()
if !array.isEmpty {
self.recognizer.acceptWaveform(samples: array)
@@ -158,13 +160,13 @@ class ViewController: UIViewController {
}
let isEndpoint = self.recognizer.isEndpoint()
let text = self.recognizer.getResult().text
if !text.isEmpty && self.lastSentence != text {
self.lastSentence = text
self.updateLabel()
print(text)
}
if isEndpoint {
if !text.isEmpty {
let tmp = self.lastSentence
@@ -175,13 +177,13 @@ class ViewController: UIViewController {
}
}
}
}
func startRecorder() {
lastSentence = ""
sentences = []
do {
try self.audioEngine?.start()
} catch let error as NSError {
@@ -189,7 +191,7 @@ class ViewController: UIViewController {
}
print("started")
}
func stopRecorder() {
audioEngine?.stop()
print("stopped")