diff --git a/c-api-examples/decode-file-c-api.c b/c-api-examples/decode-file-c-api.c index 6a8e1d3e..afcc3e4b 100644 --- a/c-api-examples/decode-file-c-api.c +++ b/c-api-examples/decode-file-c-api.c @@ -113,13 +113,13 @@ int32_t main(int32_t argc, char *argv[]) { config.model_config.tokens = value; break; case 'e': - config.model_config.encoder = value; + config.model_config.transducer.encoder = value; break; case 'd': - config.model_config.decoder = value; + config.model_config.transducer.decoder = value; break; case 'j': - config.model_config.joiner = value; + config.model_config.transducer.joiner = value; break; case 'n': config.model_config.num_threads = atoi(value); diff --git a/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj b/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj index d125a9b4..a7d3c3c3 100644 --- a/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj +++ b/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.pbxproj @@ -7,6 +7,8 @@ objects = { /* Begin PBXBuildFile section */ + C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */; }; + C93989B02A89FE33009AB859 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */; }; C984A7E829A9EEB700D74C52 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E729A9EEB700D74C52 /* AppDelegate.swift */; }; C984A7EA29A9EEB700D74C52 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */; }; C984A7F129A9EEB900D74C52 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C984A7F029A9EEB900D74C52 /* Assets.xcassets */; }; @@ -18,8 +20,6 @@ C984A82829AA196100D74C52 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = C984A82629AA196100D74C52 /* Main.storyboard */; }; C984A82A29AA19AC00D74C52 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A82929AA19AC00D74C52 /* Model.swift */; }; C984A83C29AA430B00D74C52 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A83B29AA430B00D74C52 /* ViewController.swift */; }; - C984A83D29AA43D900D74C52 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */; }; - C984A83F29AA43EE00D74C52 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */; }; /* End PBXBuildFile section */ /* Begin PBXContainerItemProxy section */ @@ -40,6 +40,10 @@ /* End PBXContainerItemProxy section */ /* Begin PBXFileReference section */ + C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.15.1/onnxruntime.xcframework"; sourceTree = ""; }; + C93989B12A89FF78009AB859 /* decoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = decoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; sourceTree = ""; }; + C93989B22A89FF78009AB859 /* encoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = encoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; sourceTree = ""; }; + C93989B32A89FF78009AB859 /* tokens.txt */ = {isa = PBXFileReference; lastKnownFileType = text; name = tokens.txt; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; sourceTree = ""; }; C984A7E429A9EEB700D74C52 /* SherpaOnnx.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx.app; sourceTree = BUILT_PRODUCTS_DIR; }; C984A7E729A9EEB700D74C52 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = ""; }; @@ -66,8 +70,8 @@ isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; files = ( - C984A83F29AA43EE00D74C52 /* onnxruntime.xcframework in Frameworks */, - C984A83D29AA43D900D74C52 /* sherpa-onnx.xcframework in Frameworks */, + C93989B02A89FE33009AB859 /* onnxruntime.xcframework in Frameworks */, + C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; }; @@ -146,8 +150,12 @@ C984A81A29AA11C500D74C52 /* Frameworks */ = { isa = PBXGroup; children = ( + C93989B12A89FF78009AB859 /* decoder.int8.onnx */, + C93989B22A89FF78009AB859 /* encoder.int8.onnx */, + C93989B32A89FF78009AB859 /* tokens.txt */, C984A82029AA139600D74C52 /* onnxruntime.xcframework */, C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */, + C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */, C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */, ); name = Frameworks; diff --git a/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcuserdata/fangjun.xcuserdatad/UserInterfaceState.xcuserstate b/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcuserdata/fangjun.xcuserdatad/UserInterfaceState.xcuserstate index 5e8e877d..5753b74b 100644 Binary files a/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcuserdata/fangjun.xcuserdatad/UserInterfaceState.xcuserstate and b/ios-swift/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcuserdata/fangjun.xcuserdatad/UserInterfaceState.xcuserstate differ diff --git a/ios-swift/SherpaOnnx/SherpaOnnx/Model.swift b/ios-swift/SherpaOnnx/SherpaOnnx/Model.swift index 5e6c30d6..a1e84260 100644 --- a/ios-swift/SherpaOnnx/SherpaOnnx/Model.swift +++ b/ios-swift/SherpaOnnx/SherpaOnnx/Model.swift @@ -15,70 +15,91 @@ func getResource(_ forResource: String, _ ofType: String) -> String { /// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html -func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig { +func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig { let encoder = getResource("encoder-epoch-99-avg-1", "onnx") let decoder = getResource("decoder-epoch-99-avg-1", "onnx") let joiner = getResource("joiner-epoch-99-avg-1", "onnx") let tokens = getResource("tokens", "txt") - return sherpaOnnxOnlineTransducerModelConfig( - encoder: encoder, - decoder: decoder, - joiner: joiner, + return sherpaOnnxOnlineModelConfig( tokens: tokens, - numThreads: 2, + transducer: sherpaOnnxOnlineTransducerModelConfig( + encoder: encoder, + decoder: decoder, + joiner: joiner + ), + numThreads: 1, modelType: "zipformer" ) } -func getZhZipformer20230615() -> SherpaOnnxOnlineTransducerModelConfig { +func getZhZipformer20230615() -> SherpaOnnxOnlineModelConfig { let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128", "onnx") let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx") let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx") let tokens = getResource("tokens", "txt") - return sherpaOnnxOnlineTransducerModelConfig( - encoder: encoder, - decoder: decoder, - joiner: joiner, + return sherpaOnnxOnlineModelConfig( tokens: tokens, - numThreads: 2, + transducer: sherpaOnnxOnlineTransducerModelConfig( + encoder: encoder, + decoder: decoder, + joiner: joiner + ), + numThreads: 1, modelType: "zipformer2" ) } -func getZhZipformer20230615Int8() -> SherpaOnnxOnlineTransducerModelConfig { +func getZhZipformer20230615Int8() -> SherpaOnnxOnlineModelConfig { let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128.int8", "onnx") let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx") let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx") let tokens = getResource("tokens", "txt") - return sherpaOnnxOnlineTransducerModelConfig( - encoder: encoder, - decoder: decoder, - joiner: joiner, + return sherpaOnnxOnlineModelConfig( tokens: tokens, - numThreads: 2, + transducer: sherpaOnnxOnlineTransducerModelConfig( + encoder: encoder, + decoder: decoder, + joiner: joiner), + numThreads: 1, modelType: "zipformer2" ) } -func getEnZipformer20230626() -> SherpaOnnxOnlineTransducerModelConfig { +func getEnZipformer20230626() -> SherpaOnnxOnlineModelConfig { let encoder = getResource("encoder-epoch-99-avg-1-chunk-16-left-128", "onnx") let decoder = getResource("decoder-epoch-99-avg-1-chunk-16-left-128", "onnx") let joiner = getResource("joiner-epoch-99-avg-1-chunk-16-left-128", "onnx") let tokens = getResource("tokens", "txt") - return sherpaOnnxOnlineTransducerModelConfig( - encoder: encoder, - decoder: decoder, - joiner: joiner, + return sherpaOnnxOnlineModelConfig( tokens: tokens, - numThreads: 2, + transducer: sherpaOnnxOnlineTransducerModelConfig( + encoder: encoder, + decoder: decoder, + joiner: joiner), + numThreads: 1, modelType: "zipformer2" ) } +func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig { + let encoder = getResource("encoder.int8", "onnx") + let decoder = getResource("decoder.int8", "onnx") + let tokens = getResource("tokens", "txt") + + return sherpaOnnxOnlineModelConfig( + tokens: tokens, + paraformer: sherpaOnnxOnlineParaformerModelConfig( + encoder: encoder, + decoder: decoder), + numThreads: 1, + modelType: "paraformer" + ) +} + /// Please refer to /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html /// to add more models if you need diff --git a/ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift b/ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift index eb4706b6..7153a354 100644 --- a/ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift +++ b/ios-swift/SherpaOnnx/SherpaOnnx/ViewController.swift @@ -87,7 +87,8 @@ class ViewController: UIViewController { // let modelConfig = getBilingualStreamZhEnZipformer20230220() // let modelConfig = getZhZipformer20230615() - let modelConfig = getEnZipformer20230626() + // let modelConfig = getEnZipformer20230626() + let modelConfig = getBilingualStreamingZhEnParaformer() let featConfig = sherpaOnnxFeatureConfig( sampleRate: 16000, diff --git a/ios-swiftui/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcuserdata/fangjun.xcuserdatad/UserInterfaceState.xcuserstate b/ios-swiftui/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcuserdata/fangjun.xcuserdatad/UserInterfaceState.xcuserstate index 0d703c0b..86affab0 100644 Binary files a/ios-swiftui/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcuserdata/fangjun.xcuserdatad/UserInterfaceState.xcuserstate and b/ios-swiftui/SherpaOnnx/SherpaOnnx.xcodeproj/project.xcworkspace/xcuserdata/fangjun.xcuserdatad/UserInterfaceState.xcuserstate differ diff --git a/ios-swiftui/SherpaOnnx/SherpaOnnx/Model.swift b/ios-swiftui/SherpaOnnx/SherpaOnnx/Model.swift index 569b62c8..a8439f8e 100644 --- a/ios-swiftui/SherpaOnnx/SherpaOnnx/Model.swift +++ b/ios-swiftui/SherpaOnnx/SherpaOnnx/Model.swift @@ -15,22 +15,39 @@ func getResource(_ forResource: String, _ ofType: String) -> String { /// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English) /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html -func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig { +func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig { let encoder = getResource("encoder-epoch-99-avg-1", "onnx") let decoder = getResource("decoder-epoch-99-avg-1", "onnx") let joiner = getResource("joiner-epoch-99-avg-1", "onnx") let tokens = getResource("tokens", "txt") - return sherpaOnnxOnlineTransducerModelConfig( - encoder: encoder, - decoder: decoder, - joiner: joiner, + return sherpaOnnxOnlineModelConfig( tokens: tokens, + transducer: sherpaOnnxOnlineTransducerModelConfig( + encoder: encoder, + decoder: decoder, + joiner: joiner), numThreads: 2, modelType: "zipformer" ) } +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html +func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig { + let encoder = getResource("encoder.int8", "onnx") + let decoder = getResource("decoder.int8", "onnx") + let tokens = getResource("tokens", "txt") + + return sherpaOnnxOnlineModelConfig( + tokens: tokens, + paraformer: sherpaOnnxOnlineParaformerModelConfig( + encoder: encoder, + decoder: decoder), + numThreads: 1, + modelType: "paraformer" + ) +} + /// Please refer to /// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html /// to add more models if you need diff --git a/ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxViewModel.swift b/ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxViewModel.swift index e2b9c5bf..52912137 100644 --- a/ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxViewModel.swift +++ b/ios-swiftui/SherpaOnnx/SherpaOnnx/SherpaOnnxViewModel.swift @@ -16,15 +16,15 @@ enum Status { class SherpaOnnxViewModel: ObservableObject { @Published var status: Status = .stop @Published var subtitles: String = "" - + var sentences: [String] = [] - + var audioEngine: AVAudioEngine? = nil var recognizer: SherpaOnnxRecognizer! = nil - + var lastSentence: String = "" let maxSentence: Int = 20 - + var results: String { if sentences.isEmpty && lastSentence.isEmpty { return "" @@ -42,24 +42,25 @@ class SherpaOnnxViewModel: ObservableObject { .joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())" } } - + func updateLabel() { DispatchQueue.main.async { self.subtitles = self.results } } - + init() { initRecognizer() initRecorder() } - + private func initRecognizer() { // Please select one model that is best suitable for you. // // You can also modify Model.swift to add new pre-trained models from // https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html - let modelConfig = getBilingualStreamZhEnZipformer20230220() + // let modelConfig = getBilingualStreamZhEnZipformer20230220() + let modelConfig = getBilingualStreamingZhEnParaformer() let featConfig = sherpaOnnxFeatureConfig( sampleRate: 16000, @@ -77,7 +78,7 @@ class SherpaOnnxViewModel: ObservableObject { ) recognizer = SherpaOnnxRecognizer(config: &config) } - + private func initRecorder() { print("init recorder") audioEngine = AVAudioEngine() @@ -152,7 +153,7 @@ class SherpaOnnxViewModel: ObservableObject { } } } - + public func toggleRecorder() { if status == .stop { startRecorder() diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 7a2e0540..0a3bc13f 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -39,11 +39,17 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer( SHERPA_ONNX_OR(config->feat_config.feature_dim, 80); recognizer_config.model_config.transducer.encoder = - SHERPA_ONNX_OR(config->model_config.encoder, ""); + SHERPA_ONNX_OR(config->model_config.transducer.encoder, ""); recognizer_config.model_config.transducer.decoder = - SHERPA_ONNX_OR(config->model_config.decoder, ""); + SHERPA_ONNX_OR(config->model_config.transducer.decoder, ""); recognizer_config.model_config.transducer.joiner = - SHERPA_ONNX_OR(config->model_config.joiner, ""); + SHERPA_ONNX_OR(config->model_config.transducer.joiner, ""); + + recognizer_config.model_config.paraformer.encoder = + SHERPA_ONNX_OR(config->model_config.paraformer.encoder, ""); + recognizer_config.model_config.paraformer.decoder = + SHERPA_ONNX_OR(config->model_config.paraformer.decoder, ""); + recognizer_config.model_config.tokens = SHERPA_ONNX_OR(config->model_config.tokens, ""); recognizer_config.model_config.num_threads = @@ -128,6 +134,8 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( const auto &text = result.text; auto r = new SherpaOnnxOnlineRecognizerResult; + memset(r, 0, sizeof(SherpaOnnxOnlineRecognizerResult)); + // copy text r->text = new char[text.size() + 1]; std::copy(text.begin(), text.end(), const_cast(r->text)); @@ -153,7 +161,6 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( r->tokens = new char[total_length]; memset(reinterpret_cast(const_cast(r->tokens)), 0, total_length); - r->timestamps = new float[r->count]; char **tokens_temp = new char *[r->count]; int32_t pos = 0; for (int32_t i = 0; i < r->count; ++i) { @@ -162,10 +169,17 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult( result.tokens[i].c_str(), result.tokens[i].size()); // +1 to move past the null character pos += result.tokens[i].size() + 1; - r->timestamps[i] = result.timestamps[i]; + } + r->tokens_arr = tokens_temp; + + if (!result.timestamps.empty()) { + r->timestamps = new float[r->count]; + std::copy(result.timestamps.begin(), result.timestamps.end(), + r->timestamps); + } else { + r->timestamps = nullptr; } - r->tokens_arr = tokens_temp; } else { r->count = 0; r->timestamps = nullptr; diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index cb1fa7e8..621b6a80 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -50,12 +50,25 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig { const char *encoder; const char *decoder; const char *joiner; +} SherpaOnnxOnlineTransducerModelConfig; + +// please visit +// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html +// to download pre-trained streaming paraformer models +SHERPA_ONNX_API typedef struct SherpaOnnxOnlineParaformerModelConfig { + const char *encoder; + const char *decoder; +} SherpaOnnxOnlineParaformerModelConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxModelConfig { + SherpaOnnxOnlineTransducerModelConfig transducer; + SherpaOnnxOnlineParaformerModelConfig paraformer; const char *tokens; int32_t num_threads; const char *provider; int32_t debug; // true to print debug information of the model const char *model_type; -} SherpaOnnxOnlineTransducerModelConfig; +} SherpaOnnxOnlineModelConfig; /// It expects 16 kHz 16-bit single channel wave format. SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig { @@ -71,7 +84,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig { SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig { SherpaOnnxFeatureConfig feat_config; - SherpaOnnxOnlineTransducerModelConfig model_config; + SherpaOnnxOnlineModelConfig model_config; /// Possible values are: greedy_search, modified_beam_search const char *decoding_method; diff --git a/swift-api-examples/SherpaOnnx.swift b/swift-api-examples/SherpaOnnx.swift index 70565abd..4f2c470e 100644 --- a/swift-api-examples/SherpaOnnx.swift +++ b/swift-api-examples/SherpaOnnx.swift @@ -18,31 +18,71 @@ func toCPointer(_ s: String) -> UnsafePointer! { /// Return an instance of SherpaOnnxOnlineTransducerModelConfig. /// /// Please refer to -/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html /// to download the required `.onnx` files. /// /// - Parameters: /// - encoder: Path to encoder.onnx /// - decoder: Path to decoder.onnx /// - joiner: Path to joiner.onnx -/// - tokens: Path to tokens.txt -/// - numThreads: Number of threads to use for neural network computation. /// /// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig func sherpaOnnxOnlineTransducerModelConfig( - encoder: String, - decoder: String, - joiner: String, - tokens: String, - numThreads: Int = 2, - provider: String = "cpu", - debug: Int = 0, - modelType: String = "" + encoder: String = "", + decoder: String = "", + joiner: String = "" ) -> SherpaOnnxOnlineTransducerModelConfig { return SherpaOnnxOnlineTransducerModelConfig( encoder: toCPointer(encoder), decoder: toCPointer(decoder), - joiner: toCPointer(joiner), + joiner: toCPointer(joiner) + ) +} + +/// Return an instance of SherpaOnnxOnlineParaformerModelConfig. +/// +/// Please refer to +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html +/// to download the required `.onnx` files. +/// +/// - Parameters: +/// - encoder: Path to encoder.onnx +/// - decoder: Path to decoder.onnx +/// +/// - Returns: Return an instance of SherpaOnnxOnlineParaformerModelConfig +func sherpaOnnxOnlineParaformerModelConfig( + encoder: String = "", + decoder: String = "" +) -> SherpaOnnxOnlineParaformerModelConfig { + return SherpaOnnxOnlineParaformerModelConfig( + encoder: toCPointer(encoder), + decoder: toCPointer(decoder) + ) +} + +/// Return an instance of SherpaOnnxOnlineModelConfig. +/// +/// Please refer to +/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html +/// to download the required `.onnx` files. +/// +/// - Parameters: +/// - tokens: Path to tokens.txt +/// - numThreads: Number of threads to use for neural network computation. +/// +/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig +func sherpaOnnxOnlineModelConfig( + tokens: String, + transducer: SherpaOnnxOnlineTransducerModelConfig = sherpaOnnxOnlineTransducerModelConfig(), + paraformer: SherpaOnnxOnlineParaformerModelConfig = sherpaOnnxOnlineParaformerModelConfig(), + numThreads: Int = 1, + provider: String = "cpu", + debug: Int = 0, + modelType: String = "" +) -> SherpaOnnxOnlineModelConfig { + return SherpaOnnxOnlineModelConfig( + transducer: transducer, + paraformer: paraformer, tokens: toCPointer(tokens), num_threads: Int32(numThreads), provider: toCPointer(provider), @@ -62,7 +102,7 @@ func sherpaOnnxFeatureConfig( func sherpaOnnxOnlineRecognizerConfig( featConfig: SherpaOnnxFeatureConfig, - modelConfig: SherpaOnnxOnlineTransducerModelConfig, + modelConfig: SherpaOnnxOnlineModelConfig, enableEndpoint: Bool = false, rule1MinTrailingSilence: Float = 2.4, rule2MinTrailingSilence: Float = 1.2, @@ -100,17 +140,17 @@ class SherpaOnnxOnlineRecongitionResult { } var count: Int32 { - return result.pointee.count + return result.pointee.count } var tokens: [String] { if let tokensPointer = result.pointee.tokens_arr { var tokens: [String] = [] for index in 0..