Support paraformer on iOS (#265)
* Fix C API to support streaming paraformer * Fix Swift API * Support paraformer in iOS
This commit is contained in:
@@ -113,13 +113,13 @@ int32_t main(int32_t argc, char *argv[]) {
|
|||||||
config.model_config.tokens = value;
|
config.model_config.tokens = value;
|
||||||
break;
|
break;
|
||||||
case 'e':
|
case 'e':
|
||||||
config.model_config.encoder = value;
|
config.model_config.transducer.encoder = value;
|
||||||
break;
|
break;
|
||||||
case 'd':
|
case 'd':
|
||||||
config.model_config.decoder = value;
|
config.model_config.transducer.decoder = value;
|
||||||
break;
|
break;
|
||||||
case 'j':
|
case 'j':
|
||||||
config.model_config.joiner = value;
|
config.model_config.transducer.joiner = value;
|
||||||
break;
|
break;
|
||||||
case 'n':
|
case 'n':
|
||||||
config.model_config.num_threads = atoi(value);
|
config.model_config.num_threads = atoi(value);
|
||||||
|
|||||||
@@ -7,6 +7,8 @@
|
|||||||
objects = {
|
objects = {
|
||||||
|
|
||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
|
C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */; };
|
||||||
|
C93989B02A89FE33009AB859 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */; };
|
||||||
C984A7E829A9EEB700D74C52 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E729A9EEB700D74C52 /* AppDelegate.swift */; };
|
C984A7E829A9EEB700D74C52 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E729A9EEB700D74C52 /* AppDelegate.swift */; };
|
||||||
C984A7EA29A9EEB700D74C52 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */; };
|
C984A7EA29A9EEB700D74C52 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */; };
|
||||||
C984A7F129A9EEB900D74C52 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C984A7F029A9EEB900D74C52 /* Assets.xcassets */; };
|
C984A7F129A9EEB900D74C52 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C984A7F029A9EEB900D74C52 /* Assets.xcassets */; };
|
||||||
@@ -18,8 +20,6 @@
|
|||||||
C984A82829AA196100D74C52 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = C984A82629AA196100D74C52 /* Main.storyboard */; };
|
C984A82829AA196100D74C52 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = C984A82629AA196100D74C52 /* Main.storyboard */; };
|
||||||
C984A82A29AA19AC00D74C52 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A82929AA19AC00D74C52 /* Model.swift */; };
|
C984A82A29AA19AC00D74C52 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A82929AA19AC00D74C52 /* Model.swift */; };
|
||||||
C984A83C29AA430B00D74C52 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A83B29AA430B00D74C52 /* ViewController.swift */; };
|
C984A83C29AA430B00D74C52 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A83B29AA430B00D74C52 /* ViewController.swift */; };
|
||||||
C984A83D29AA43D900D74C52 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */; };
|
|
||||||
C984A83F29AA43EE00D74C52 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */; };
|
|
||||||
/* End PBXBuildFile section */
|
/* End PBXBuildFile section */
|
||||||
|
|
||||||
/* Begin PBXContainerItemProxy section */
|
/* Begin PBXContainerItemProxy section */
|
||||||
@@ -40,6 +40,10 @@
|
|||||||
/* End PBXContainerItemProxy section */
|
/* End PBXContainerItemProxy section */
|
||||||
|
|
||||||
/* Begin PBXFileReference section */
|
/* Begin PBXFileReference section */
|
||||||
|
C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.15.1/onnxruntime.xcframework"; sourceTree = "<group>"; };
|
||||||
|
C93989B12A89FF78009AB859 /* decoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = decoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; sourceTree = "<group>"; };
|
||||||
|
C93989B22A89FF78009AB859 /* encoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = encoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; sourceTree = "<group>"; };
|
||||||
|
C93989B32A89FF78009AB859 /* tokens.txt */ = {isa = PBXFileReference; lastKnownFileType = text; name = tokens.txt; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; sourceTree = "<group>"; };
|
||||||
C984A7E429A9EEB700D74C52 /* SherpaOnnx.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
C984A7E429A9EEB700D74C52 /* SherpaOnnx.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||||
C984A7E729A9EEB700D74C52 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
|
C984A7E729A9EEB700D74C52 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
|
||||||
C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
|
C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
|
||||||
@@ -66,8 +70,8 @@
|
|||||||
isa = PBXFrameworksBuildPhase;
|
isa = PBXFrameworksBuildPhase;
|
||||||
buildActionMask = 2147483647;
|
buildActionMask = 2147483647;
|
||||||
files = (
|
files = (
|
||||||
C984A83F29AA43EE00D74C52 /* onnxruntime.xcframework in Frameworks */,
|
C93989B02A89FE33009AB859 /* onnxruntime.xcframework in Frameworks */,
|
||||||
C984A83D29AA43D900D74C52 /* sherpa-onnx.xcframework in Frameworks */,
|
C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */,
|
||||||
);
|
);
|
||||||
runOnlyForDeploymentPostprocessing = 0;
|
runOnlyForDeploymentPostprocessing = 0;
|
||||||
};
|
};
|
||||||
@@ -146,8 +150,12 @@
|
|||||||
C984A81A29AA11C500D74C52 /* Frameworks */ = {
|
C984A81A29AA11C500D74C52 /* Frameworks */ = {
|
||||||
isa = PBXGroup;
|
isa = PBXGroup;
|
||||||
children = (
|
children = (
|
||||||
|
C93989B12A89FF78009AB859 /* decoder.int8.onnx */,
|
||||||
|
C93989B22A89FF78009AB859 /* encoder.int8.onnx */,
|
||||||
|
C93989B32A89FF78009AB859 /* tokens.txt */,
|
||||||
C984A82029AA139600D74C52 /* onnxruntime.xcframework */,
|
C984A82029AA139600D74C52 /* onnxruntime.xcframework */,
|
||||||
C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */,
|
C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */,
|
||||||
|
C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */,
|
||||||
C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */,
|
C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */,
|
||||||
);
|
);
|
||||||
name = Frameworks;
|
name = Frameworks;
|
||||||
|
|||||||
Binary file not shown.
@@ -15,70 +15,91 @@ func getResource(_ forResource: String, _ ofType: String) -> String {
|
|||||||
|
|
||||||
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
|
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
|
||||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
|
||||||
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig {
|
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
|
||||||
let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
|
let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
|
||||||
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
|
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
|
||||||
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
|
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
|
||||||
let tokens = getResource("tokens", "txt")
|
let tokens = getResource("tokens", "txt")
|
||||||
|
|
||||||
return sherpaOnnxOnlineTransducerModelConfig(
|
return sherpaOnnxOnlineModelConfig(
|
||||||
|
tokens: tokens,
|
||||||
|
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||||
encoder: encoder,
|
encoder: encoder,
|
||||||
decoder: decoder,
|
decoder: decoder,
|
||||||
joiner: joiner,
|
joiner: joiner
|
||||||
tokens: tokens,
|
),
|
||||||
numThreads: 2,
|
numThreads: 1,
|
||||||
modelType: "zipformer"
|
modelType: "zipformer"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getZhZipformer20230615() -> SherpaOnnxOnlineTransducerModelConfig {
|
func getZhZipformer20230615() -> SherpaOnnxOnlineModelConfig {
|
||||||
let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||||
let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||||
let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||||
let tokens = getResource("tokens", "txt")
|
let tokens = getResource("tokens", "txt")
|
||||||
|
|
||||||
return sherpaOnnxOnlineTransducerModelConfig(
|
return sherpaOnnxOnlineModelConfig(
|
||||||
|
tokens: tokens,
|
||||||
|
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||||
encoder: encoder,
|
encoder: encoder,
|
||||||
decoder: decoder,
|
decoder: decoder,
|
||||||
joiner: joiner,
|
joiner: joiner
|
||||||
tokens: tokens,
|
),
|
||||||
numThreads: 2,
|
numThreads: 1,
|
||||||
modelType: "zipformer2"
|
modelType: "zipformer2"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getZhZipformer20230615Int8() -> SherpaOnnxOnlineTransducerModelConfig {
|
func getZhZipformer20230615Int8() -> SherpaOnnxOnlineModelConfig {
|
||||||
let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128.int8", "onnx")
|
let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128.int8", "onnx")
|
||||||
let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||||
let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||||
let tokens = getResource("tokens", "txt")
|
let tokens = getResource("tokens", "txt")
|
||||||
|
|
||||||
return sherpaOnnxOnlineTransducerModelConfig(
|
return sherpaOnnxOnlineModelConfig(
|
||||||
|
tokens: tokens,
|
||||||
|
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||||
encoder: encoder,
|
encoder: encoder,
|
||||||
decoder: decoder,
|
decoder: decoder,
|
||||||
joiner: joiner,
|
joiner: joiner),
|
||||||
tokens: tokens,
|
numThreads: 1,
|
||||||
numThreads: 2,
|
|
||||||
modelType: "zipformer2"
|
modelType: "zipformer2"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getEnZipformer20230626() -> SherpaOnnxOnlineTransducerModelConfig {
|
func getEnZipformer20230626() -> SherpaOnnxOnlineModelConfig {
|
||||||
let encoder = getResource("encoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
|
let encoder = getResource("encoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
|
||||||
let decoder = getResource("decoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
|
let decoder = getResource("decoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
|
||||||
let joiner = getResource("joiner-epoch-99-avg-1-chunk-16-left-128", "onnx")
|
let joiner = getResource("joiner-epoch-99-avg-1-chunk-16-left-128", "onnx")
|
||||||
let tokens = getResource("tokens", "txt")
|
let tokens = getResource("tokens", "txt")
|
||||||
|
|
||||||
return sherpaOnnxOnlineTransducerModelConfig(
|
return sherpaOnnxOnlineModelConfig(
|
||||||
|
tokens: tokens,
|
||||||
|
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||||
encoder: encoder,
|
encoder: encoder,
|
||||||
decoder: decoder,
|
decoder: decoder,
|
||||||
joiner: joiner,
|
joiner: joiner),
|
||||||
tokens: tokens,
|
numThreads: 1,
|
||||||
numThreads: 2,
|
|
||||||
modelType: "zipformer2"
|
modelType: "zipformer2"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig {
|
||||||
|
let encoder = getResource("encoder.int8", "onnx")
|
||||||
|
let decoder = getResource("decoder.int8", "onnx")
|
||||||
|
let tokens = getResource("tokens", "txt")
|
||||||
|
|
||||||
|
return sherpaOnnxOnlineModelConfig(
|
||||||
|
tokens: tokens,
|
||||||
|
paraformer: sherpaOnnxOnlineParaformerModelConfig(
|
||||||
|
encoder: encoder,
|
||||||
|
decoder: decoder),
|
||||||
|
numThreads: 1,
|
||||||
|
modelType: "paraformer"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/// Please refer to
|
/// Please refer to
|
||||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
/// to add more models if you need
|
/// to add more models if you need
|
||||||
|
|||||||
@@ -87,7 +87,8 @@ class ViewController: UIViewController {
|
|||||||
|
|
||||||
// let modelConfig = getBilingualStreamZhEnZipformer20230220()
|
// let modelConfig = getBilingualStreamZhEnZipformer20230220()
|
||||||
// let modelConfig = getZhZipformer20230615()
|
// let modelConfig = getZhZipformer20230615()
|
||||||
let modelConfig = getEnZipformer20230626()
|
// let modelConfig = getEnZipformer20230626()
|
||||||
|
let modelConfig = getBilingualStreamingZhEnParaformer()
|
||||||
|
|
||||||
let featConfig = sherpaOnnxFeatureConfig(
|
let featConfig = sherpaOnnxFeatureConfig(
|
||||||
sampleRate: 16000,
|
sampleRate: 16000,
|
||||||
|
|||||||
Binary file not shown.
@@ -15,22 +15,39 @@ func getResource(_ forResource: String, _ ofType: String) -> String {
|
|||||||
|
|
||||||
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
|
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
|
||||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
|
||||||
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig {
|
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
|
||||||
let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
|
let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
|
||||||
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
|
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
|
||||||
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
|
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
|
||||||
let tokens = getResource("tokens", "txt")
|
let tokens = getResource("tokens", "txt")
|
||||||
|
|
||||||
return sherpaOnnxOnlineTransducerModelConfig(
|
return sherpaOnnxOnlineModelConfig(
|
||||||
|
tokens: tokens,
|
||||||
|
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||||
encoder: encoder,
|
encoder: encoder,
|
||||||
decoder: decoder,
|
decoder: decoder,
|
||||||
joiner: joiner,
|
joiner: joiner),
|
||||||
tokens: tokens,
|
|
||||||
numThreads: 2,
|
numThreads: 2,
|
||||||
modelType: "zipformer"
|
modelType: "zipformer"
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||||
|
func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig {
|
||||||
|
let encoder = getResource("encoder.int8", "onnx")
|
||||||
|
let decoder = getResource("decoder.int8", "onnx")
|
||||||
|
let tokens = getResource("tokens", "txt")
|
||||||
|
|
||||||
|
return sherpaOnnxOnlineModelConfig(
|
||||||
|
tokens: tokens,
|
||||||
|
paraformer: sherpaOnnxOnlineParaformerModelConfig(
|
||||||
|
encoder: encoder,
|
||||||
|
decoder: decoder),
|
||||||
|
numThreads: 1,
|
||||||
|
modelType: "paraformer"
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
/// Please refer to
|
/// Please refer to
|
||||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
/// to add more models if you need
|
/// to add more models if you need
|
||||||
|
|||||||
@@ -59,7 +59,8 @@ class SherpaOnnxViewModel: ObservableObject {
|
|||||||
//
|
//
|
||||||
// You can also modify Model.swift to add new pre-trained models from
|
// You can also modify Model.swift to add new pre-trained models from
|
||||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
let modelConfig = getBilingualStreamZhEnZipformer20230220()
|
// let modelConfig = getBilingualStreamZhEnZipformer20230220()
|
||||||
|
let modelConfig = getBilingualStreamingZhEnParaformer()
|
||||||
|
|
||||||
let featConfig = sherpaOnnxFeatureConfig(
|
let featConfig = sherpaOnnxFeatureConfig(
|
||||||
sampleRate: 16000,
|
sampleRate: 16000,
|
||||||
|
|||||||
@@ -39,11 +39,17 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
|
|||||||
SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
|
SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
|
||||||
|
|
||||||
recognizer_config.model_config.transducer.encoder =
|
recognizer_config.model_config.transducer.encoder =
|
||||||
SHERPA_ONNX_OR(config->model_config.encoder, "");
|
SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");
|
||||||
recognizer_config.model_config.transducer.decoder =
|
recognizer_config.model_config.transducer.decoder =
|
||||||
SHERPA_ONNX_OR(config->model_config.decoder, "");
|
SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");
|
||||||
recognizer_config.model_config.transducer.joiner =
|
recognizer_config.model_config.transducer.joiner =
|
||||||
SHERPA_ONNX_OR(config->model_config.joiner, "");
|
SHERPA_ONNX_OR(config->model_config.transducer.joiner, "");
|
||||||
|
|
||||||
|
recognizer_config.model_config.paraformer.encoder =
|
||||||
|
SHERPA_ONNX_OR(config->model_config.paraformer.encoder, "");
|
||||||
|
recognizer_config.model_config.paraformer.decoder =
|
||||||
|
SHERPA_ONNX_OR(config->model_config.paraformer.decoder, "");
|
||||||
|
|
||||||
recognizer_config.model_config.tokens =
|
recognizer_config.model_config.tokens =
|
||||||
SHERPA_ONNX_OR(config->model_config.tokens, "");
|
SHERPA_ONNX_OR(config->model_config.tokens, "");
|
||||||
recognizer_config.model_config.num_threads =
|
recognizer_config.model_config.num_threads =
|
||||||
@@ -128,6 +134,8 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
|
|||||||
const auto &text = result.text;
|
const auto &text = result.text;
|
||||||
|
|
||||||
auto r = new SherpaOnnxOnlineRecognizerResult;
|
auto r = new SherpaOnnxOnlineRecognizerResult;
|
||||||
|
memset(r, 0, sizeof(SherpaOnnxOnlineRecognizerResult));
|
||||||
|
|
||||||
// copy text
|
// copy text
|
||||||
r->text = new char[text.size() + 1];
|
r->text = new char[text.size() + 1];
|
||||||
std::copy(text.begin(), text.end(), const_cast<char *>(r->text));
|
std::copy(text.begin(), text.end(), const_cast<char *>(r->text));
|
||||||
@@ -153,7 +161,6 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
|
|||||||
r->tokens = new char[total_length];
|
r->tokens = new char[total_length];
|
||||||
memset(reinterpret_cast<void *>(const_cast<char *>(r->tokens)), 0,
|
memset(reinterpret_cast<void *>(const_cast<char *>(r->tokens)), 0,
|
||||||
total_length);
|
total_length);
|
||||||
r->timestamps = new float[r->count];
|
|
||||||
char **tokens_temp = new char *[r->count];
|
char **tokens_temp = new char *[r->count];
|
||||||
int32_t pos = 0;
|
int32_t pos = 0;
|
||||||
for (int32_t i = 0; i < r->count; ++i) {
|
for (int32_t i = 0; i < r->count; ++i) {
|
||||||
@@ -162,10 +169,17 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
|
|||||||
result.tokens[i].c_str(), result.tokens[i].size());
|
result.tokens[i].c_str(), result.tokens[i].size());
|
||||||
// +1 to move past the null character
|
// +1 to move past the null character
|
||||||
pos += result.tokens[i].size() + 1;
|
pos += result.tokens[i].size() + 1;
|
||||||
r->timestamps[i] = result.timestamps[i];
|
}
|
||||||
|
r->tokens_arr = tokens_temp;
|
||||||
|
|
||||||
|
if (!result.timestamps.empty()) {
|
||||||
|
r->timestamps = new float[r->count];
|
||||||
|
std::copy(result.timestamps.begin(), result.timestamps.end(),
|
||||||
|
r->timestamps);
|
||||||
|
} else {
|
||||||
|
r->timestamps = nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
r->tokens_arr = tokens_temp;
|
|
||||||
} else {
|
} else {
|
||||||
r->count = 0;
|
r->count = 0;
|
||||||
r->timestamps = nullptr;
|
r->timestamps = nullptr;
|
||||||
|
|||||||
@@ -50,12 +50,25 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig {
|
|||||||
const char *encoder;
|
const char *encoder;
|
||||||
const char *decoder;
|
const char *decoder;
|
||||||
const char *joiner;
|
const char *joiner;
|
||||||
|
} SherpaOnnxOnlineTransducerModelConfig;
|
||||||
|
|
||||||
|
// please visit
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||||
|
// to download pre-trained streaming paraformer models
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineParaformerModelConfig {
|
||||||
|
const char *encoder;
|
||||||
|
const char *decoder;
|
||||||
|
} SherpaOnnxOnlineParaformerModelConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxModelConfig {
|
||||||
|
SherpaOnnxOnlineTransducerModelConfig transducer;
|
||||||
|
SherpaOnnxOnlineParaformerModelConfig paraformer;
|
||||||
const char *tokens;
|
const char *tokens;
|
||||||
int32_t num_threads;
|
int32_t num_threads;
|
||||||
const char *provider;
|
const char *provider;
|
||||||
int32_t debug; // true to print debug information of the model
|
int32_t debug; // true to print debug information of the model
|
||||||
const char *model_type;
|
const char *model_type;
|
||||||
} SherpaOnnxOnlineTransducerModelConfig;
|
} SherpaOnnxOnlineModelConfig;
|
||||||
|
|
||||||
/// It expects 16 kHz 16-bit single channel wave format.
|
/// It expects 16 kHz 16-bit single channel wave format.
|
||||||
SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
|
SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
|
||||||
@@ -71,7 +84,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
|
|||||||
|
|
||||||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
|
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
|
||||||
SherpaOnnxFeatureConfig feat_config;
|
SherpaOnnxFeatureConfig feat_config;
|
||||||
SherpaOnnxOnlineTransducerModelConfig model_config;
|
SherpaOnnxOnlineModelConfig model_config;
|
||||||
|
|
||||||
/// Possible values are: greedy_search, modified_beam_search
|
/// Possible values are: greedy_search, modified_beam_search
|
||||||
const char *decoding_method;
|
const char *decoding_method;
|
||||||
|
|||||||
@@ -18,31 +18,71 @@ func toCPointer(_ s: String) -> UnsafePointer<Int8>! {
|
|||||||
/// Return an instance of SherpaOnnxOnlineTransducerModelConfig.
|
/// Return an instance of SherpaOnnxOnlineTransducerModelConfig.
|
||||||
///
|
///
|
||||||
/// Please refer to
|
/// Please refer to
|
||||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
||||||
/// to download the required `.onnx` files.
|
/// to download the required `.onnx` files.
|
||||||
///
|
///
|
||||||
/// - Parameters:
|
/// - Parameters:
|
||||||
/// - encoder: Path to encoder.onnx
|
/// - encoder: Path to encoder.onnx
|
||||||
/// - decoder: Path to decoder.onnx
|
/// - decoder: Path to decoder.onnx
|
||||||
/// - joiner: Path to joiner.onnx
|
/// - joiner: Path to joiner.onnx
|
||||||
/// - tokens: Path to tokens.txt
|
|
||||||
/// - numThreads: Number of threads to use for neural network computation.
|
|
||||||
///
|
///
|
||||||
/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
|
/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
|
||||||
func sherpaOnnxOnlineTransducerModelConfig(
|
func sherpaOnnxOnlineTransducerModelConfig(
|
||||||
encoder: String,
|
encoder: String = "",
|
||||||
decoder: String,
|
decoder: String = "",
|
||||||
joiner: String,
|
joiner: String = ""
|
||||||
tokens: String,
|
|
||||||
numThreads: Int = 2,
|
|
||||||
provider: String = "cpu",
|
|
||||||
debug: Int = 0,
|
|
||||||
modelType: String = ""
|
|
||||||
) -> SherpaOnnxOnlineTransducerModelConfig {
|
) -> SherpaOnnxOnlineTransducerModelConfig {
|
||||||
return SherpaOnnxOnlineTransducerModelConfig(
|
return SherpaOnnxOnlineTransducerModelConfig(
|
||||||
encoder: toCPointer(encoder),
|
encoder: toCPointer(encoder),
|
||||||
decoder: toCPointer(decoder),
|
decoder: toCPointer(decoder),
|
||||||
joiner: toCPointer(joiner),
|
joiner: toCPointer(joiner)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return an instance of SherpaOnnxOnlineParaformerModelConfig.
|
||||||
|
///
|
||||||
|
/// Please refer to
|
||||||
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||||
|
/// to download the required `.onnx` files.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - encoder: Path to encoder.onnx
|
||||||
|
/// - decoder: Path to decoder.onnx
|
||||||
|
///
|
||||||
|
/// - Returns: Return an instance of SherpaOnnxOnlineParaformerModelConfig
|
||||||
|
func sherpaOnnxOnlineParaformerModelConfig(
|
||||||
|
encoder: String = "",
|
||||||
|
decoder: String = ""
|
||||||
|
) -> SherpaOnnxOnlineParaformerModelConfig {
|
||||||
|
return SherpaOnnxOnlineParaformerModelConfig(
|
||||||
|
encoder: toCPointer(encoder),
|
||||||
|
decoder: toCPointer(decoder)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return an instance of SherpaOnnxOnlineModelConfig.
|
||||||
|
///
|
||||||
|
/// Please refer to
|
||||||
|
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||||
|
/// to download the required `.onnx` files.
|
||||||
|
///
|
||||||
|
/// - Parameters:
|
||||||
|
/// - tokens: Path to tokens.txt
|
||||||
|
/// - numThreads: Number of threads to use for neural network computation.
|
||||||
|
///
|
||||||
|
/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
|
||||||
|
func sherpaOnnxOnlineModelConfig(
|
||||||
|
tokens: String,
|
||||||
|
transducer: SherpaOnnxOnlineTransducerModelConfig = sherpaOnnxOnlineTransducerModelConfig(),
|
||||||
|
paraformer: SherpaOnnxOnlineParaformerModelConfig = sherpaOnnxOnlineParaformerModelConfig(),
|
||||||
|
numThreads: Int = 1,
|
||||||
|
provider: String = "cpu",
|
||||||
|
debug: Int = 0,
|
||||||
|
modelType: String = ""
|
||||||
|
) -> SherpaOnnxOnlineModelConfig {
|
||||||
|
return SherpaOnnxOnlineModelConfig(
|
||||||
|
transducer: transducer,
|
||||||
|
paraformer: paraformer,
|
||||||
tokens: toCPointer(tokens),
|
tokens: toCPointer(tokens),
|
||||||
num_threads: Int32(numThreads),
|
num_threads: Int32(numThreads),
|
||||||
provider: toCPointer(provider),
|
provider: toCPointer(provider),
|
||||||
@@ -62,7 +102,7 @@ func sherpaOnnxFeatureConfig(
|
|||||||
|
|
||||||
func sherpaOnnxOnlineRecognizerConfig(
|
func sherpaOnnxOnlineRecognizerConfig(
|
||||||
featConfig: SherpaOnnxFeatureConfig,
|
featConfig: SherpaOnnxFeatureConfig,
|
||||||
modelConfig: SherpaOnnxOnlineTransducerModelConfig,
|
modelConfig: SherpaOnnxOnlineModelConfig,
|
||||||
enableEndpoint: Bool = false,
|
enableEndpoint: Bool = false,
|
||||||
rule1MinTrailingSilence: Float = 2.4,
|
rule1MinTrailingSilence: Float = 2.4,
|
||||||
rule2MinTrailingSilence: Float = 1.2,
|
rule2MinTrailingSilence: Float = 1.2,
|
||||||
|
|||||||
@@ -13,17 +13,24 @@ extension AVAudioPCMBuffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func run() {
|
func run() {
|
||||||
let encoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
|
let encoder =
|
||||||
let decoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
|
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
|
||||||
let joiner = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"
|
let decoder =
|
||||||
|
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
|
||||||
|
let joiner =
|
||||||
|
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"
|
||||||
let tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
|
let tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
|
||||||
|
|
||||||
let modelConfig = sherpaOnnxOnlineTransducerModelConfig(
|
let transducerConfig = sherpaOnnxOnlineTransducerModelConfig(
|
||||||
encoder: encoder,
|
encoder: encoder,
|
||||||
decoder: decoder,
|
decoder: decoder,
|
||||||
joiner: joiner,
|
joiner: joiner
|
||||||
|
)
|
||||||
|
|
||||||
|
let modelConfig = sherpaOnnxOnlineModelConfig(
|
||||||
tokens: tokens,
|
tokens: tokens,
|
||||||
numThreads: 2)
|
transducer: transducerConfig
|
||||||
|
)
|
||||||
|
|
||||||
let featConfig = sherpaOnnxFeatureConfig(
|
let featConfig = sherpaOnnxFeatureConfig(
|
||||||
sampleRate: 16000,
|
sampleRate: 16000,
|
||||||
@@ -31,13 +38,9 @@ func run() {
|
|||||||
)
|
)
|
||||||
var config = sherpaOnnxOnlineRecognizerConfig(
|
var config = sherpaOnnxOnlineRecognizerConfig(
|
||||||
featConfig: featConfig,
|
featConfig: featConfig,
|
||||||
modelConfig: modelConfig,
|
modelConfig: modelConfig
|
||||||
enableEndpoint: false,
|
|
||||||
decodingMethod: "modified_beam_search",
|
|
||||||
maxActivePaths: 4
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
let recognizer = SherpaOnnxRecognizer(config: &config)
|
let recognizer = SherpaOnnxRecognizer(config: &config)
|
||||||
|
|
||||||
let filePath = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav"
|
let filePath = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav"
|
||||||
@@ -60,7 +63,7 @@ func run() {
|
|||||||
recognizer.acceptWaveform(samples: tailPadding)
|
recognizer.acceptWaveform(samples: tailPadding)
|
||||||
|
|
||||||
recognizer.inputFinished()
|
recognizer.inputFinished()
|
||||||
while (recognizer.isReady()) {
|
while recognizer.isReady() {
|
||||||
recognizer.decode()
|
recognizer.decode()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user