Support non-streaming zipformer CTC ASR models (#2340)
This PR adds support for non-streaming Zipformer CTC ASR models across multiple language bindings, WebAssembly, examples, and CI workflows. - Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs - Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js - Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models Model doc is available at https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
This commit is contained in:
@@ -43,6 +43,10 @@ function freeConfig(config, Module) {
|
||||
freeConfig(config.dolphin, Module)
|
||||
}
|
||||
|
||||
if ('zipformerCtc' in config) {
|
||||
freeConfig(config.zipformerCtc, Module)
|
||||
}
|
||||
|
||||
if ('moonshine' in config) {
|
||||
freeConfig(config.moonshine, Module)
|
||||
}
|
||||
@@ -627,6 +631,23 @@ function initSherpaOnnxOfflineDolphinModelConfig(config, Module) {
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) {
|
||||
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
||||
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 1 * 4; // 1 pointer
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
Module.stringToUTF8(config.model || '', buffer, n);
|
||||
|
||||
Module.setValue(ptr, buffer, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
||||
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
|
||||
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
|
||||
@@ -840,6 +861,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
};
|
||||
}
|
||||
|
||||
if (!('zipformerCtc' in config)) {
|
||||
config.zipformerCtc = {
|
||||
model: '',
|
||||
};
|
||||
}
|
||||
|
||||
if (!('whisper' in config)) {
|
||||
config.whisper = {
|
||||
encoder: '',
|
||||
@@ -906,9 +933,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
const dolphin =
|
||||
initSherpaOnnxOfflineDolphinModelConfig(config.dolphin, Module);
|
||||
|
||||
const zipformerCtc =
|
||||
initSherpaOnnxOfflineZipformerCtcModelConfig(config.zipformerCtc, Module);
|
||||
|
||||
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
||||
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
|
||||
dolphin.len;
|
||||
dolphin.len + zipformerCtc.len;
|
||||
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
@@ -1010,11 +1040,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
Module._CopyHeap(dolphin.ptr, dolphin.len, ptr + offset);
|
||||
offset += dolphin.len;
|
||||
|
||||
Module._CopyHeap(zipformerCtc.ptr, zipformerCtc.len, ptr + offset);
|
||||
offset += zipformerCtc.len;
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
||||
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
||||
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
|
||||
dolphin: dolphin
|
||||
dolphin: dolphin, zipformerCtc: zipformerCtc
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,6 +13,7 @@ extern "C" {
|
||||
static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
|
||||
|
||||
static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
|
||||
@@ -31,7 +32,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
|
||||
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineMoonshineModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineDolphinModelConfig),
|
||||
sizeof(SherpaOnnxOfflineDolphinModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineZipformerCtcModelConfig),
|
||||
|
||||
"");
|
||||
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
|
||||
@@ -77,6 +79,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
auto moonshine = &model_config->moonshine;
|
||||
auto fire_red_asr = &model_config->fire_red_asr;
|
||||
auto dolphin = &model_config->dolphin;
|
||||
auto zipformer_ctc = &model_config->zipformer_ctc;
|
||||
|
||||
fprintf(stdout, "----------offline transducer model config----------\n");
|
||||
fprintf(stdout, "encoder: %s\n", transducer->encoder);
|
||||
@@ -117,6 +120,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
fprintf(stdout, "----------offline Dolphin model config----------\n");
|
||||
fprintf(stdout, "model: %s\n", dolphin->model);
|
||||
|
||||
fprintf(stdout, "----------offline zipformer ctc model config----------\n");
|
||||
fprintf(stdout, "model: %s\n", zipformer_ctc->model);
|
||||
|
||||
fprintf(stdout, "tokens: %s\n", model_config->tokens);
|
||||
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
|
||||
fprintf(stdout, "provider: %s\n", model_config->provider);
|
||||
|
||||
@@ -117,6 +117,10 @@ function initOfflineRecognizer() {
|
||||
};
|
||||
} else if (fileExists('dolphin.onnx')) {
|
||||
config.modelConfig.dolphin = {model: './dolphin.onnx'};
|
||||
} else if (fileExists('zipformer-ctc.onnx')) {
|
||||
// you need to rename model.int8.onnx from zipformer CTC to
|
||||
// zipformer-ctc.onnx
|
||||
config.modelConfig.zipformerCtc = {model: './zipformer-ctc.onnx'};
|
||||
} else {
|
||||
console.log('Please specify a model.');
|
||||
alert('Please specify a model.');
|
||||
|
||||
Reference in New Issue
Block a user