Support paraformer on iOS (#265)

* Fix C API to support streaming paraformer

* Fix Swift API

* Support paraformer in iOS
This commit is contained in:
Fangjun Kuang
2023-08-14 14:38:41 +08:00
committed by GitHub
parent 35526e26e1
commit a8bdb4b38a
12 changed files with 204 additions and 86 deletions

View File

@@ -15,22 +15,39 @@ func getResource(_ forResource: String, _ ofType: String) -> String {
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig {
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
let tokens = getResource("tokens", "txt")
return sherpaOnnxOnlineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner,
return sherpaOnnxOnlineModelConfig(
tokens: tokens,
transducer: sherpaOnnxOnlineTransducerModelConfig(
encoder: encoder,
decoder: decoder,
joiner: joiner),
numThreads: 2,
modelType: "zipformer"
)
}
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig {
let encoder = getResource("encoder.int8", "onnx")
let decoder = getResource("decoder.int8", "onnx")
let tokens = getResource("tokens", "txt")
return sherpaOnnxOnlineModelConfig(
tokens: tokens,
paraformer: sherpaOnnxOnlineParaformerModelConfig(
encoder: encoder,
decoder: decoder),
numThreads: 1,
modelType: "paraformer"
)
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
/// to add more models if you need

View File

@@ -16,15 +16,15 @@ enum Status {
class SherpaOnnxViewModel: ObservableObject {
@Published var status: Status = .stop
@Published var subtitles: String = ""
var sentences: [String] = []
var audioEngine: AVAudioEngine? = nil
var recognizer: SherpaOnnxRecognizer! = nil
var lastSentence: String = ""
let maxSentence: Int = 20
var results: String {
if sentences.isEmpty && lastSentence.isEmpty {
return ""
@@ -42,24 +42,25 @@ class SherpaOnnxViewModel: ObservableObject {
.joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
}
}
func updateLabel() {
DispatchQueue.main.async {
self.subtitles = self.results
}
}
init() {
initRecognizer()
initRecorder()
}
private func initRecognizer() {
// Please select one model that is best suitable for you.
//
// You can also modify Model.swift to add new pre-trained models from
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
let modelConfig = getBilingualStreamZhEnZipformer20230220()
// let modelConfig = getBilingualStreamZhEnZipformer20230220()
let modelConfig = getBilingualStreamingZhEnParaformer()
let featConfig = sherpaOnnxFeatureConfig(
sampleRate: 16000,
@@ -77,7 +78,7 @@ class SherpaOnnxViewModel: ObservableObject {
)
recognizer = SherpaOnnxRecognizer(config: &config)
}
private func initRecorder() {
print("init recorder")
audioEngine = AVAudioEngine()
@@ -152,7 +153,7 @@ class SherpaOnnxViewModel: ObservableObject {
}
}
}
public func toggleRecorder() {
if status == .stop {
startRecorder()