Fix modified beam search for iOS and android (#76)
* Use Int type for sampling rate * Fix swift * Fix iOS
This commit is contained in:
@@ -23,10 +23,10 @@ extension AVAudioPCMBuffer {
|
||||
class ViewController: UIViewController {
|
||||
@IBOutlet weak var resultLabel: UILabel!
|
||||
@IBOutlet weak var recordBtn: UIButton!
|
||||
|
||||
|
||||
var audioEngine: AVAudioEngine? = nil
|
||||
var recognizer: SherpaOnnxRecognizer! = nil
|
||||
|
||||
|
||||
/// It saves the decoded results so far
|
||||
var sentences: [String] = [] {
|
||||
didSet {
|
||||
@@ -42,7 +42,7 @@ class ViewController: UIViewController {
|
||||
if sentences.isEmpty {
|
||||
return "0: \(lastSentence.lowercased())"
|
||||
}
|
||||
|
||||
|
||||
let start = max(sentences.count - maxSentence, 0)
|
||||
if lastSentence.isEmpty {
|
||||
return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
|
||||
@@ -52,23 +52,23 @@ class ViewController: UIViewController {
|
||||
.joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func updateLabel() {
|
||||
DispatchQueue.main.async {
|
||||
self.resultLabel.text = self.results
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
override func viewDidLoad() {
|
||||
super.viewDidLoad()
|
||||
// Do any additional setup after loading the view.
|
||||
|
||||
|
||||
resultLabel.text = "ASR with Next-gen Kaldi\n\nSee https://github.com/k2-fsa/sherpa-onnx\n\nPress the Start button to run!"
|
||||
recordBtn.setTitle("Start", for: .normal)
|
||||
initRecognizer()
|
||||
initRecorder()
|
||||
}
|
||||
|
||||
|
||||
@IBAction func onRecordBtnClick(_ sender: UIButton) {
|
||||
if recordBtn.currentTitle == "Start" {
|
||||
startRecorder()
|
||||
@@ -78,30 +78,32 @@ class ViewController: UIViewController {
|
||||
recordBtn.setTitle("Start", for: .normal)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func initRecognizer() {
|
||||
// Please select one model that is best suitable for you.
|
||||
//
|
||||
// You can also modify Model.swift to add new pre-trained models from
|
||||
// https://k2-fsa.github.io/sherpa/ncnn/pretrained_models/index.html
|
||||
|
||||
|
||||
let modelConfig = getBilingualStreamZhEnZipformer20230220()
|
||||
|
||||
|
||||
let featConfig = sherpaOnnxFeatureConfig(
|
||||
sampleRate: 16000,
|
||||
featureDim: 80)
|
||||
|
||||
|
||||
var config = sherpaOnnxOnlineRecognizerConfig(
|
||||
featConfig: featConfig,
|
||||
modelConfig: modelConfig,
|
||||
enableEndpoint: true,
|
||||
rule1MinTrailingSilence: 2.4,
|
||||
rule2MinTrailingSilence: 0.8,
|
||||
rule3MinUtteranceLength: 30
|
||||
rule3MinUtteranceLength: 30,
|
||||
decodingMethod: "greedy_search",
|
||||
maxActivePaths: 4
|
||||
)
|
||||
recognizer = SherpaOnnxRecognizer(config: &config)
|
||||
}
|
||||
|
||||
|
||||
func initRecorder() {
|
||||
print("init recorder")
|
||||
audioEngine = AVAudioEngine()
|
||||
@@ -112,9 +114,9 @@ class ViewController: UIViewController {
|
||||
commonFormat: .pcmFormatFloat32,
|
||||
sampleRate: 16000, channels: 1,
|
||||
interleaved: false)!
|
||||
|
||||
|
||||
let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!
|
||||
|
||||
|
||||
inputNode!.installTap(
|
||||
onBus: bus,
|
||||
bufferSize: 1024,
|
||||
@@ -122,34 +124,34 @@ class ViewController: UIViewController {
|
||||
) {
|
||||
(buffer: AVAudioPCMBuffer, when: AVAudioTime) in
|
||||
var newBufferAvailable = true
|
||||
|
||||
|
||||
let inputCallback: AVAudioConverterInputBlock = {
|
||||
inNumPackets, outStatus in
|
||||
if newBufferAvailable {
|
||||
outStatus.pointee = .haveData
|
||||
newBufferAvailable = false
|
||||
|
||||
|
||||
return buffer
|
||||
} else {
|
||||
outStatus.pointee = .noDataNow
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
let convertedBuffer = AVAudioPCMBuffer(
|
||||
pcmFormat: outputFormat,
|
||||
frameCapacity:
|
||||
AVAudioFrameCount(outputFormat.sampleRate)
|
||||
* buffer.frameLength
|
||||
/ AVAudioFrameCount(buffer.format.sampleRate))!
|
||||
|
||||
|
||||
var error: NSError?
|
||||
let _ = converter.convert(
|
||||
to: convertedBuffer,
|
||||
error: &error, withInputFrom: inputCallback)
|
||||
|
||||
|
||||
// TODO(fangjun): Handle status != haveData
|
||||
|
||||
|
||||
let array = convertedBuffer.array()
|
||||
if !array.isEmpty {
|
||||
self.recognizer.acceptWaveform(samples: array)
|
||||
@@ -158,13 +160,13 @@ class ViewController: UIViewController {
|
||||
}
|
||||
let isEndpoint = self.recognizer.isEndpoint()
|
||||
let text = self.recognizer.getResult().text
|
||||
|
||||
|
||||
if !text.isEmpty && self.lastSentence != text {
|
||||
self.lastSentence = text
|
||||
self.updateLabel()
|
||||
print(text)
|
||||
}
|
||||
|
||||
|
||||
if isEndpoint {
|
||||
if !text.isEmpty {
|
||||
let tmp = self.lastSentence
|
||||
@@ -175,13 +177,13 @@ class ViewController: UIViewController {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
func startRecorder() {
|
||||
lastSentence = ""
|
||||
sentences = []
|
||||
|
||||
|
||||
do {
|
||||
try self.audioEngine?.start()
|
||||
} catch let error as NSError {
|
||||
@@ -189,7 +191,7 @@ class ViewController: UIViewController {
|
||||
}
|
||||
print("started")
|
||||
}
|
||||
|
||||
|
||||
func stopRecorder() {
|
||||
audioEngine?.stop()
|
||||
print("stopped")
|
||||
|
||||
Reference in New Issue
Block a user