Add SwiftUI demo project (#118)

* Commit after creating the project

* Add sherpa-onnx related files

* copy and modify files from sherpa-ncnn

* add app icon
This commit is contained in:
Fangjun Kuang
2023-04-05 22:16:29 +08:00
committed by GitHub
parent ae1f9e7914
commit 9ac747248b
19 changed files with 1097 additions and 0 deletions

View File

@@ -0,0 +1,11 @@
{
"colors" : [
{
"idiom" : "universal"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,14 @@
{
"images" : [
{
"filename" : "k2-1024x1024.png",
"idiom" : "universal",
"platform" : "ios",
"size" : "1024x1024"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 411 KiB

View File

@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,46 @@
//
// ContentView.swift
// SherpaOnnx
//
// Created by fangjun on 2023/4/5.
//
import SwiftUI
struct ContentView: View {
@StateObject var sherpaOnnxVM = SherpaOnnxViewModel()
var body: some View {
VStack {
Text("ASR with Next-gen Kaldi")
.font(.title)
if sherpaOnnxVM.status == .stop {
Text("See https://github.com/k2-fsa/sherpa-onnx")
Text("Press the Start button to run!")
}
ScrollView(.vertical, showsIndicators: true) {
HStack {
Text(sherpaOnnxVM.subtitles)
Spacer()
}
}
Spacer()
Button {
toggleRecorder()
} label: {
Text(sherpaOnnxVM.status == .stop ? "Start" : "Stop")
}
}
.padding()
}
private func toggleRecorder() {
sherpaOnnxVM.toggleRecorder()
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}

View File

@@ -0,0 +1,20 @@
//
// Extension.swift
// SherpaOnnx
//
// Created by knight on 2023/4/5.
//
import AVFoundation
extension AudioBuffer {
func array() -> [Float] {
return Array(UnsafeBufferPointer(self))
}
}
extension AVAudioPCMBuffer {
func array() -> [Float] {
return self.audioBufferList.pointee.mBuffers.array()
}
}

View File

@@ -0,0 +1,35 @@
import Foundation
func getResource(_ forResource: String, _ ofType: String) -> String {
let path = Bundle.main.path(forResource: forResource, ofType: ofType)
precondition(
path != nil,
"\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
+ " Build Phases -> Copy Bundle Resources\n" + "to add it!"
)
return path!
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to download pre-trained models
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig {
let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
let tokens = getResource("tokens", "txt")
return sherpaOnnxOnlineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner,
tokens: tokens,
numThreads: 2
)
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to add more models if you need

View File

@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,17 @@
//
// SherpaOnnxApp.swift
// SherpaOnnx
//
// Created by fangjun on 2023/4/5.
//
import SwiftUI
@main
struct SherpaOnnxApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}

View File

@@ -0,0 +1,182 @@
//
// SherpaOnnxViewModel.swift
// SherpaOnnx
//
// Created by knight on 2023/4/5.
//
import Foundation
import AVFoundation
enum Status {
case stop
case recording
}
class SherpaOnnxViewModel: ObservableObject {
@Published var status: Status = .stop
@Published var subtitles: String = ""
var sentences: [String] = []
var audioEngine: AVAudioEngine? = nil
var recognizer: SherpaOnnxRecognizer! = nil
var lastSentence: String = ""
let maxSentence: Int = 20
var results: String {
if sentences.isEmpty && lastSentence.isEmpty {
return ""
}
if sentences.isEmpty {
return "0: \(lastSentence.lowercased())"
}
let start = max(sentences.count - maxSentence, 0)
if lastSentence.isEmpty {
return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
.joined(separator: "\n")
} else {
return sentences.enumerated().map { (index, s) in "\(index): \(s.lowercased())" }[start...]
.joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
}
}
func updateLabel() {
DispatchQueue.main.async {
self.subtitles = self.results
}
}
init() {
initRecognizer()
initRecorder()
}
private func initRecognizer() {
// Please select one model that is best suitable for you.
//
// You can also modify Model.swift to add new pre-trained models from
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
let modelConfig = getBilingualStreamZhEnZipformer20230220()
let featConfig = sherpaOnnxFeatureConfig(
sampleRate: 16000,
featureDim: 80)
var config = sherpaOnnxOnlineRecognizerConfig(
featConfig: featConfig,
modelConfig: modelConfig,
enableEndpoint: true,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 0.8,
rule3MinUtteranceLength: 30,
decodingMethod: "greedy_search",
maxActivePaths: 4
)
recognizer = SherpaOnnxRecognizer(config: &config)
}
private func initRecorder() {
print("init recorder")
audioEngine = AVAudioEngine()
let inputNode = self.audioEngine?.inputNode
let bus = 0
let inputFormat = inputNode?.outputFormat(forBus: bus)
let outputFormat = AVAudioFormat(
commonFormat: .pcmFormatFloat32,
sampleRate: 16000, channels: 1,
interleaved: false)!
let converter = AVAudioConverter(from: inputFormat!, to: outputFormat)!
inputNode!.installTap(
onBus: bus,
bufferSize: 1024,
format: inputFormat
) {
(buffer: AVAudioPCMBuffer, when: AVAudioTime) in
var newBufferAvailable = true
let inputCallback: AVAudioConverterInputBlock = {
inNumPackets, outStatus in
if newBufferAvailable {
outStatus.pointee = .haveData
newBufferAvailable = false
return buffer
} else {
outStatus.pointee = .noDataNow
return nil
}
}
let convertedBuffer = AVAudioPCMBuffer(
pcmFormat: outputFormat,
frameCapacity:
AVAudioFrameCount(outputFormat.sampleRate)
* buffer.frameLength
/ AVAudioFrameCount(buffer.format.sampleRate))!
var error: NSError?
let _ = converter.convert(
to: convertedBuffer,
error: &error, withInputFrom: inputCallback)
// TODO(fangjun): Handle status != haveData
let array = convertedBuffer.array()
if !array.isEmpty {
self.recognizer.acceptWaveform(samples: array)
while (self.recognizer.isReady()){
self.recognizer.decode()
}
let isEndpoint = self.recognizer.isEndpoint()
let text = self.recognizer.getResult().text
if !text.isEmpty && self.lastSentence != text {
self.lastSentence = text
self.updateLabel()
print(text)
}
if isEndpoint{
if !text.isEmpty {
let tmp = self.lastSentence
self.lastSentence = ""
self.sentences.append(tmp)
}
self.recognizer.reset()
}
}
}
}
public func toggleRecorder() {
if status == .stop {
startRecorder()
status = .recording
} else {
stopRecorder()
status = .stop
}
}
private func startRecorder() {
lastSentence = ""
sentences = []
do {
try self.audioEngine?.start()
} catch let error as NSError {
print("Got an error starting audioEngine: \(error.domain), \(error)")
}
print("started")
}
private func stopRecorder() {
audioEngine?.stop()
print("stopped")
}
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 411 KiB