Support paraformer on iOS (#265)
* Fix C API to support streaming paraformer * Fix Swift API * Support paraformer in iOS
This commit is contained in:
@@ -113,13 +113,13 @@ int32_t main(int32_t argc, char *argv[]) {
|
||||
config.model_config.tokens = value;
|
||||
break;
|
||||
case 'e':
|
||||
config.model_config.encoder = value;
|
||||
config.model_config.transducer.encoder = value;
|
||||
break;
|
||||
case 'd':
|
||||
config.model_config.decoder = value;
|
||||
config.model_config.transducer.decoder = value;
|
||||
break;
|
||||
case 'j':
|
||||
config.model_config.joiner = value;
|
||||
config.model_config.transducer.joiner = value;
|
||||
break;
|
||||
case 'n':
|
||||
config.model_config.num_threads = atoi(value);
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */; };
|
||||
C93989B02A89FE33009AB859 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */; };
|
||||
C984A7E829A9EEB700D74C52 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E729A9EEB700D74C52 /* AppDelegate.swift */; };
|
||||
C984A7EA29A9EEB700D74C52 /* SceneDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */; };
|
||||
C984A7F129A9EEB900D74C52 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = C984A7F029A9EEB900D74C52 /* Assets.xcassets */; };
|
||||
@@ -18,8 +20,6 @@
|
||||
C984A82829AA196100D74C52 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = C984A82629AA196100D74C52 /* Main.storyboard */; };
|
||||
C984A82A29AA19AC00D74C52 /* Model.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A82929AA19AC00D74C52 /* Model.swift */; };
|
||||
C984A83C29AA430B00D74C52 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = C984A83B29AA430B00D74C52 /* ViewController.swift */; };
|
||||
C984A83D29AA43D900D74C52 /* sherpa-onnx.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */; };
|
||||
C984A83F29AA43EE00D74C52 /* onnxruntime.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
@@ -40,6 +40,10 @@
|
||||
/* End PBXContainerItemProxy section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = onnxruntime.xcframework; path = "../../build-ios/ios-onnxruntime/1.15.1/onnxruntime.xcframework"; sourceTree = "<group>"; };
|
||||
C93989B12A89FF78009AB859 /* decoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = decoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx"; sourceTree = "<group>"; };
|
||||
C93989B22A89FF78009AB859 /* encoder.int8.onnx */ = {isa = PBXFileReference; lastKnownFileType = file; name = encoder.int8.onnx; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx"; sourceTree = "<group>"; };
|
||||
C93989B32A89FF78009AB859 /* tokens.txt */ = {isa = PBXFileReference; lastKnownFileType = text; name = tokens.txt; path = "../../../icefall-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt"; sourceTree = "<group>"; };
|
||||
C984A7E429A9EEB700D74C52 /* SherpaOnnx.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = SherpaOnnx.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
C984A7E729A9EEB700D74C52 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
|
||||
C984A7E929A9EEB700D74C52 /* SceneDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = SceneDelegate.swift; sourceTree = "<group>"; };
|
||||
@@ -66,8 +70,8 @@
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
C984A83F29AA43EE00D74C52 /* onnxruntime.xcframework in Frameworks */,
|
||||
C984A83D29AA43D900D74C52 /* sherpa-onnx.xcframework in Frameworks */,
|
||||
C93989B02A89FE33009AB859 /* onnxruntime.xcframework in Frameworks */,
|
||||
C93989AE2A89FE13009AB859 /* sherpa-onnx.xcframework in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@@ -146,8 +150,12 @@
|
||||
C984A81A29AA11C500D74C52 /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
C93989B12A89FF78009AB859 /* decoder.int8.onnx */,
|
||||
C93989B22A89FF78009AB859 /* encoder.int8.onnx */,
|
||||
C93989B32A89FF78009AB859 /* tokens.txt */,
|
||||
C984A82029AA139600D74C52 /* onnxruntime.xcframework */,
|
||||
C984A83E29AA43EE00D74C52 /* onnxruntime.xcframework */,
|
||||
C93989AF2A89FE33009AB859 /* onnxruntime.xcframework */,
|
||||
C984A81B29AA11C500D74C52 /* sherpa-onnx.xcframework */,
|
||||
);
|
||||
name = Frameworks;
|
||||
|
||||
Binary file not shown.
@@ -15,70 +15,91 @@ func getResource(_ forResource: String, _ ofType: String) -> String {
|
||||
|
||||
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
|
||||
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig {
|
||||
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
|
||||
let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
|
||||
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
|
||||
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
|
||||
let tokens = getResource("tokens", "txt")
|
||||
|
||||
return sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner,
|
||||
return sherpaOnnxOnlineModelConfig(
|
||||
tokens: tokens,
|
||||
numThreads: 2,
|
||||
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner
|
||||
),
|
||||
numThreads: 1,
|
||||
modelType: "zipformer"
|
||||
)
|
||||
}
|
||||
|
||||
func getZhZipformer20230615() -> SherpaOnnxOnlineTransducerModelConfig {
|
||||
func getZhZipformer20230615() -> SherpaOnnxOnlineModelConfig {
|
||||
let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||
let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||
let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||
let tokens = getResource("tokens", "txt")
|
||||
|
||||
return sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner,
|
||||
return sherpaOnnxOnlineModelConfig(
|
||||
tokens: tokens,
|
||||
numThreads: 2,
|
||||
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner
|
||||
),
|
||||
numThreads: 1,
|
||||
modelType: "zipformer2"
|
||||
)
|
||||
}
|
||||
|
||||
func getZhZipformer20230615Int8() -> SherpaOnnxOnlineTransducerModelConfig {
|
||||
func getZhZipformer20230615Int8() -> SherpaOnnxOnlineModelConfig {
|
||||
let encoder = getResource("encoder-epoch-12-avg-4-chunk-16-left-128.int8", "onnx")
|
||||
let decoder = getResource("decoder-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||
let joiner = getResource("joiner-epoch-12-avg-4-chunk-16-left-128", "onnx")
|
||||
let tokens = getResource("tokens", "txt")
|
||||
|
||||
return sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner,
|
||||
return sherpaOnnxOnlineModelConfig(
|
||||
tokens: tokens,
|
||||
numThreads: 2,
|
||||
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner),
|
||||
numThreads: 1,
|
||||
modelType: "zipformer2"
|
||||
)
|
||||
}
|
||||
|
||||
func getEnZipformer20230626() -> SherpaOnnxOnlineTransducerModelConfig {
|
||||
func getEnZipformer20230626() -> SherpaOnnxOnlineModelConfig {
|
||||
let encoder = getResource("encoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
|
||||
let decoder = getResource("decoder-epoch-99-avg-1-chunk-16-left-128", "onnx")
|
||||
let joiner = getResource("joiner-epoch-99-avg-1-chunk-16-left-128", "onnx")
|
||||
let tokens = getResource("tokens", "txt")
|
||||
|
||||
return sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner,
|
||||
return sherpaOnnxOnlineModelConfig(
|
||||
tokens: tokens,
|
||||
numThreads: 2,
|
||||
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner),
|
||||
numThreads: 1,
|
||||
modelType: "zipformer2"
|
||||
)
|
||||
}
|
||||
|
||||
func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig {
|
||||
let encoder = getResource("encoder.int8", "onnx")
|
||||
let decoder = getResource("decoder.int8", "onnx")
|
||||
let tokens = getResource("tokens", "txt")
|
||||
|
||||
return sherpaOnnxOnlineModelConfig(
|
||||
tokens: tokens,
|
||||
paraformer: sherpaOnnxOnlineParaformerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder),
|
||||
numThreads: 1,
|
||||
modelType: "paraformer"
|
||||
)
|
||||
}
|
||||
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to add more models if you need
|
||||
|
||||
@@ -87,7 +87,8 @@ class ViewController: UIViewController {
|
||||
|
||||
// let modelConfig = getBilingualStreamZhEnZipformer20230220()
|
||||
// let modelConfig = getZhZipformer20230615()
|
||||
let modelConfig = getEnZipformer20230626()
|
||||
// let modelConfig = getEnZipformer20230626()
|
||||
let modelConfig = getBilingualStreamingZhEnParaformer()
|
||||
|
||||
let featConfig = sherpaOnnxFeatureConfig(
|
||||
sampleRate: 16000,
|
||||
|
||||
Binary file not shown.
@@ -15,22 +15,39 @@ func getResource(_ forResource: String, _ ofType: String) -> String {
|
||||
|
||||
/// sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 (Bilingual, Chinese + English)
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/zipformer-transducer-models.html
|
||||
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineTransducerModelConfig {
|
||||
func getBilingualStreamZhEnZipformer20230220() -> SherpaOnnxOnlineModelConfig {
|
||||
let encoder = getResource("encoder-epoch-99-avg-1", "onnx")
|
||||
let decoder = getResource("decoder-epoch-99-avg-1", "onnx")
|
||||
let joiner = getResource("joiner-epoch-99-avg-1", "onnx")
|
||||
let tokens = getResource("tokens", "txt")
|
||||
|
||||
return sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner,
|
||||
return sherpaOnnxOnlineModelConfig(
|
||||
tokens: tokens,
|
||||
transducer: sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner),
|
||||
numThreads: 2,
|
||||
modelType: "zipformer"
|
||||
)
|
||||
}
|
||||
|
||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||
func getBilingualStreamingZhEnParaformer() -> SherpaOnnxOnlineModelConfig {
|
||||
let encoder = getResource("encoder.int8", "onnx")
|
||||
let decoder = getResource("decoder.int8", "onnx")
|
||||
let tokens = getResource("tokens", "txt")
|
||||
|
||||
return sherpaOnnxOnlineModelConfig(
|
||||
tokens: tokens,
|
||||
paraformer: sherpaOnnxOnlineParaformerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder),
|
||||
numThreads: 1,
|
||||
modelType: "paraformer"
|
||||
)
|
||||
}
|
||||
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to add more models if you need
|
||||
|
||||
@@ -16,15 +16,15 @@ enum Status {
|
||||
class SherpaOnnxViewModel: ObservableObject {
|
||||
@Published var status: Status = .stop
|
||||
@Published var subtitles: String = ""
|
||||
|
||||
|
||||
var sentences: [String] = []
|
||||
|
||||
|
||||
var audioEngine: AVAudioEngine? = nil
|
||||
var recognizer: SherpaOnnxRecognizer! = nil
|
||||
|
||||
|
||||
var lastSentence: String = ""
|
||||
let maxSentence: Int = 20
|
||||
|
||||
|
||||
var results: String {
|
||||
if sentences.isEmpty && lastSentence.isEmpty {
|
||||
return ""
|
||||
@@ -42,24 +42,25 @@ class SherpaOnnxViewModel: ObservableObject {
|
||||
.joined(separator: "\n") + "\n\(sentences.count): \(lastSentence.lowercased())"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func updateLabel() {
|
||||
DispatchQueue.main.async {
|
||||
self.subtitles = self.results
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
init() {
|
||||
initRecognizer()
|
||||
initRecorder()
|
||||
}
|
||||
|
||||
|
||||
private func initRecognizer() {
|
||||
// Please select one model that is best suitable for you.
|
||||
//
|
||||
// You can also modify Model.swift to add new pre-trained models from
|
||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
let modelConfig = getBilingualStreamZhEnZipformer20230220()
|
||||
// let modelConfig = getBilingualStreamZhEnZipformer20230220()
|
||||
let modelConfig = getBilingualStreamingZhEnParaformer()
|
||||
|
||||
let featConfig = sherpaOnnxFeatureConfig(
|
||||
sampleRate: 16000,
|
||||
@@ -77,7 +78,7 @@ class SherpaOnnxViewModel: ObservableObject {
|
||||
)
|
||||
recognizer = SherpaOnnxRecognizer(config: &config)
|
||||
}
|
||||
|
||||
|
||||
private func initRecorder() {
|
||||
print("init recorder")
|
||||
audioEngine = AVAudioEngine()
|
||||
@@ -152,7 +153,7 @@ class SherpaOnnxViewModel: ObservableObject {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public func toggleRecorder() {
|
||||
if status == .stop {
|
||||
startRecorder()
|
||||
|
||||
@@ -39,11 +39,17 @@ SherpaOnnxOnlineRecognizer *CreateOnlineRecognizer(
|
||||
SHERPA_ONNX_OR(config->feat_config.feature_dim, 80);
|
||||
|
||||
recognizer_config.model_config.transducer.encoder =
|
||||
SHERPA_ONNX_OR(config->model_config.encoder, "");
|
||||
SHERPA_ONNX_OR(config->model_config.transducer.encoder, "");
|
||||
recognizer_config.model_config.transducer.decoder =
|
||||
SHERPA_ONNX_OR(config->model_config.decoder, "");
|
||||
SHERPA_ONNX_OR(config->model_config.transducer.decoder, "");
|
||||
recognizer_config.model_config.transducer.joiner =
|
||||
SHERPA_ONNX_OR(config->model_config.joiner, "");
|
||||
SHERPA_ONNX_OR(config->model_config.transducer.joiner, "");
|
||||
|
||||
recognizer_config.model_config.paraformer.encoder =
|
||||
SHERPA_ONNX_OR(config->model_config.paraformer.encoder, "");
|
||||
recognizer_config.model_config.paraformer.decoder =
|
||||
SHERPA_ONNX_OR(config->model_config.paraformer.decoder, "");
|
||||
|
||||
recognizer_config.model_config.tokens =
|
||||
SHERPA_ONNX_OR(config->model_config.tokens, "");
|
||||
recognizer_config.model_config.num_threads =
|
||||
@@ -128,6 +134,8 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
|
||||
const auto &text = result.text;
|
||||
|
||||
auto r = new SherpaOnnxOnlineRecognizerResult;
|
||||
memset(r, 0, sizeof(SherpaOnnxOnlineRecognizerResult));
|
||||
|
||||
// copy text
|
||||
r->text = new char[text.size() + 1];
|
||||
std::copy(text.begin(), text.end(), const_cast<char *>(r->text));
|
||||
@@ -153,7 +161,6 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
|
||||
r->tokens = new char[total_length];
|
||||
memset(reinterpret_cast<void *>(const_cast<char *>(r->tokens)), 0,
|
||||
total_length);
|
||||
r->timestamps = new float[r->count];
|
||||
char **tokens_temp = new char *[r->count];
|
||||
int32_t pos = 0;
|
||||
for (int32_t i = 0; i < r->count; ++i) {
|
||||
@@ -162,10 +169,17 @@ SherpaOnnxOnlineRecognizerResult *GetOnlineStreamResult(
|
||||
result.tokens[i].c_str(), result.tokens[i].size());
|
||||
// +1 to move past the null character
|
||||
pos += result.tokens[i].size() + 1;
|
||||
r->timestamps[i] = result.timestamps[i];
|
||||
}
|
||||
r->tokens_arr = tokens_temp;
|
||||
|
||||
if (!result.timestamps.empty()) {
|
||||
r->timestamps = new float[r->count];
|
||||
std::copy(result.timestamps.begin(), result.timestamps.end(),
|
||||
r->timestamps);
|
||||
} else {
|
||||
r->timestamps = nullptr;
|
||||
}
|
||||
|
||||
r->tokens_arr = tokens_temp;
|
||||
} else {
|
||||
r->count = 0;
|
||||
r->timestamps = nullptr;
|
||||
|
||||
@@ -50,12 +50,25 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOnlineTransducerModelConfig {
|
||||
const char *encoder;
|
||||
const char *decoder;
|
||||
const char *joiner;
|
||||
} SherpaOnnxOnlineTransducerModelConfig;
|
||||
|
||||
// please visit
|
||||
// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||
// to download pre-trained streaming paraformer models
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineParaformerModelConfig {
|
||||
const char *encoder;
|
||||
const char *decoder;
|
||||
} SherpaOnnxOnlineParaformerModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxModelConfig {
|
||||
SherpaOnnxOnlineTransducerModelConfig transducer;
|
||||
SherpaOnnxOnlineParaformerModelConfig paraformer;
|
||||
const char *tokens;
|
||||
int32_t num_threads;
|
||||
const char *provider;
|
||||
int32_t debug; // true to print debug information of the model
|
||||
const char *model_type;
|
||||
} SherpaOnnxOnlineTransducerModelConfig;
|
||||
} SherpaOnnxOnlineModelConfig;
|
||||
|
||||
/// It expects 16 kHz 16-bit single channel wave format.
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
|
||||
@@ -71,7 +84,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxFeatureConfig {
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOnlineRecognizerConfig {
|
||||
SherpaOnnxFeatureConfig feat_config;
|
||||
SherpaOnnxOnlineTransducerModelConfig model_config;
|
||||
SherpaOnnxOnlineModelConfig model_config;
|
||||
|
||||
/// Possible values are: greedy_search, modified_beam_search
|
||||
const char *decoding_method;
|
||||
|
||||
@@ -18,31 +18,71 @@ func toCPointer(_ s: String) -> UnsafePointer<Int8>! {
|
||||
/// Return an instance of SherpaOnnxOnlineTransducerModelConfig.
|
||||
///
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/index.html
|
||||
/// to download the required `.onnx` files.
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - encoder: Path to encoder.onnx
|
||||
/// - decoder: Path to decoder.onnx
|
||||
/// - joiner: Path to joiner.onnx
|
||||
/// - tokens: Path to tokens.txt
|
||||
/// - numThreads: Number of threads to use for neural network computation.
|
||||
///
|
||||
/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
|
||||
func sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: String,
|
||||
decoder: String,
|
||||
joiner: String,
|
||||
tokens: String,
|
||||
numThreads: Int = 2,
|
||||
provider: String = "cpu",
|
||||
debug: Int = 0,
|
||||
modelType: String = ""
|
||||
encoder: String = "",
|
||||
decoder: String = "",
|
||||
joiner: String = ""
|
||||
) -> SherpaOnnxOnlineTransducerModelConfig {
|
||||
return SherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: toCPointer(encoder),
|
||||
decoder: toCPointer(decoder),
|
||||
joiner: toCPointer(joiner),
|
||||
joiner: toCPointer(joiner)
|
||||
)
|
||||
}
|
||||
|
||||
/// Return an instance of SherpaOnnxOnlineParaformerModelConfig.
|
||||
///
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/index.html
|
||||
/// to download the required `.onnx` files.
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - encoder: Path to encoder.onnx
|
||||
/// - decoder: Path to decoder.onnx
|
||||
///
|
||||
/// - Returns: Return an instance of SherpaOnnxOnlineParaformerModelConfig
|
||||
func sherpaOnnxOnlineParaformerModelConfig(
|
||||
encoder: String = "",
|
||||
decoder: String = ""
|
||||
) -> SherpaOnnxOnlineParaformerModelConfig {
|
||||
return SherpaOnnxOnlineParaformerModelConfig(
|
||||
encoder: toCPointer(encoder),
|
||||
decoder: toCPointer(decoder)
|
||||
)
|
||||
}
|
||||
|
||||
/// Return an instance of SherpaOnnxOnlineModelConfig.
|
||||
///
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
|
||||
/// to download the required `.onnx` files.
|
||||
///
|
||||
/// - Parameters:
|
||||
/// - tokens: Path to tokens.txt
|
||||
/// - numThreads: Number of threads to use for neural network computation.
|
||||
///
|
||||
/// - Returns: Return an instance of SherpaOnnxOnlineTransducerModelConfig
|
||||
func sherpaOnnxOnlineModelConfig(
|
||||
tokens: String,
|
||||
transducer: SherpaOnnxOnlineTransducerModelConfig = sherpaOnnxOnlineTransducerModelConfig(),
|
||||
paraformer: SherpaOnnxOnlineParaformerModelConfig = sherpaOnnxOnlineParaformerModelConfig(),
|
||||
numThreads: Int = 1,
|
||||
provider: String = "cpu",
|
||||
debug: Int = 0,
|
||||
modelType: String = ""
|
||||
) -> SherpaOnnxOnlineModelConfig {
|
||||
return SherpaOnnxOnlineModelConfig(
|
||||
transducer: transducer,
|
||||
paraformer: paraformer,
|
||||
tokens: toCPointer(tokens),
|
||||
num_threads: Int32(numThreads),
|
||||
provider: toCPointer(provider),
|
||||
@@ -62,7 +102,7 @@ func sherpaOnnxFeatureConfig(
|
||||
|
||||
func sherpaOnnxOnlineRecognizerConfig(
|
||||
featConfig: SherpaOnnxFeatureConfig,
|
||||
modelConfig: SherpaOnnxOnlineTransducerModelConfig,
|
||||
modelConfig: SherpaOnnxOnlineModelConfig,
|
||||
enableEndpoint: Bool = false,
|
||||
rule1MinTrailingSilence: Float = 2.4,
|
||||
rule2MinTrailingSilence: Float = 1.2,
|
||||
@@ -100,17 +140,17 @@ class SherpaOnnxOnlineRecongitionResult {
|
||||
}
|
||||
|
||||
var count: Int32 {
|
||||
return result.pointee.count
|
||||
return result.pointee.count
|
||||
}
|
||||
|
||||
var tokens: [String] {
|
||||
if let tokensPointer = result.pointee.tokens_arr {
|
||||
var tokens: [String] = []
|
||||
for index in 0..<count {
|
||||
if let tokenPointer = tokensPointer[Int(index)] {
|
||||
let token = String(cString: tokenPointer)
|
||||
tokens.append(token)
|
||||
}
|
||||
if let tokenPointer = tokensPointer[Int(index)] {
|
||||
let token = String(cString: tokenPointer)
|
||||
tokens.append(token)
|
||||
}
|
||||
}
|
||||
return tokens
|
||||
} else {
|
||||
|
||||
@@ -13,31 +13,34 @@ extension AVAudioPCMBuffer {
|
||||
}
|
||||
|
||||
func run() {
|
||||
let encoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
|
||||
let decoder = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
|
||||
let joiner = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"
|
||||
let encoder =
|
||||
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx"
|
||||
let decoder =
|
||||
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx"
|
||||
let joiner =
|
||||
"./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx"
|
||||
let tokens = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt"
|
||||
|
||||
let modelConfig = sherpaOnnxOnlineTransducerModelConfig(
|
||||
let transducerConfig = sherpaOnnxOnlineTransducerModelConfig(
|
||||
encoder: encoder,
|
||||
decoder: decoder,
|
||||
joiner: joiner,
|
||||
joiner: joiner
|
||||
)
|
||||
|
||||
let modelConfig = sherpaOnnxOnlineModelConfig(
|
||||
tokens: tokens,
|
||||
numThreads: 2)
|
||||
transducer: transducerConfig
|
||||
)
|
||||
|
||||
let featConfig = sherpaOnnxFeatureConfig(
|
||||
sampleRate: 16000,
|
||||
featureDim: 80
|
||||
)
|
||||
var config = sherpaOnnxOnlineRecognizerConfig(
|
||||
featConfig: featConfig,
|
||||
modelConfig: modelConfig,
|
||||
enableEndpoint: false,
|
||||
decodingMethod: "modified_beam_search",
|
||||
maxActivePaths: 4
|
||||
featConfig: featConfig,
|
||||
modelConfig: modelConfig
|
||||
)
|
||||
|
||||
|
||||
let recognizer = SherpaOnnxRecognizer(config: &config)
|
||||
|
||||
let filePath = "./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/1.wav"
|
||||
@@ -60,7 +63,7 @@ func run() {
|
||||
recognizer.acceptWaveform(samples: tailPadding)
|
||||
|
||||
recognizer.inputFinished()
|
||||
while (recognizer.isReady()) {
|
||||
while recognizer.isReady() {
|
||||
recognizer.decode()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user