Support non-streaming zipformer CTC ASR models (#2340)

This PR adds support for non-streaming Zipformer CTC ASR models across 
multiple language bindings, WebAssembly, examples, and CI workflows.

- Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs
- Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js
- Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models

Model doc is available at
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
This commit is contained in:
Fangjun Kuang
2025-07-04 15:57:07 +08:00
committed by GitHub
parent ef16455cb5
commit 3bf986d08d
71 changed files with 2121 additions and 68 deletions

View File

@@ -43,6 +43,10 @@ function freeConfig(config, Module) {
freeConfig(config.dolphin, Module)
}
if ('zipformerCtc' in config) {
freeConfig(config.zipformerCtc, Module)
}
if ('moonshine' in config) {
freeConfig(config.moonshine, Module)
}
@@ -627,6 +631,23 @@ function initSherpaOnnxOfflineDolphinModelConfig(config, Module) {
}
}
function initSherpaOnnxOfflineZipformerCtcModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model || '') + 1;
const buffer = Module._malloc(n);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model || '', buffer, n);
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
@@ -840,6 +861,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
};
}
if (!('zipformerCtc' in config)) {
config.zipformerCtc = {
model: '',
};
}
if (!('whisper' in config)) {
config.whisper = {
encoder: '',
@@ -906,9 +933,12 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
const dolphin =
initSherpaOnnxOfflineDolphinModelConfig(config.dolphin, Module);
const zipformerCtc =
initSherpaOnnxOfflineZipformerCtcModelConfig(config.zipformerCtc, Module);
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len +
dolphin.len;
dolphin.len + zipformerCtc.len;
const ptr = Module._malloc(len);
@@ -1010,11 +1040,14 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
Module._CopyHeap(dolphin.ptr, dolphin.len, ptr + offset);
offset += dolphin.len;
Module._CopyHeap(zipformerCtc.ptr, zipformerCtc.len, ptr + offset);
offset += zipformerCtc.len;
return {
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr,
dolphin: dolphin
dolphin: dolphin, zipformerCtc: zipformerCtc
}
}

View File

@@ -13,6 +13,7 @@ extern "C" {
static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
@@ -31,7 +32,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
sizeof(SherpaOnnxOfflineMoonshineModelConfig) +
sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) +
sizeof(SherpaOnnxOfflineDolphinModelConfig),
sizeof(SherpaOnnxOfflineDolphinModelConfig) +
sizeof(SherpaOnnxOfflineZipformerCtcModelConfig),
"");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
@@ -77,6 +79,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
auto moonshine = &model_config->moonshine;
auto fire_red_asr = &model_config->fire_red_asr;
auto dolphin = &model_config->dolphin;
auto zipformer_ctc = &model_config->zipformer_ctc;
fprintf(stdout, "----------offline transducer model config----------\n");
fprintf(stdout, "encoder: %s\n", transducer->encoder);
@@ -117,6 +120,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
fprintf(stdout, "----------offline Dolphin model config----------\n");
fprintf(stdout, "model: %s\n", dolphin->model);
fprintf(stdout, "----------offline zipformer ctc model config----------\n");
fprintf(stdout, "model: %s\n", zipformer_ctc->model);
fprintf(stdout, "tokens: %s\n", model_config->tokens);
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
fprintf(stdout, "provider: %s\n", model_config->provider);

View File

@@ -117,6 +117,10 @@ function initOfflineRecognizer() {
};
} else if (fileExists('dolphin.onnx')) {
config.modelConfig.dolphin = {model: './dolphin.onnx'};
} else if (fileExists('zipformer-ctc.onnx')) {
// you need to rename model.int8.onnx from zipformer CTC to
// zipformer-ctc.onnx
config.modelConfig.zipformerCtc = {model: './zipformer-ctc.onnx'};
} else {
console.log('Please specify a model.');
alert('Please specify a model.');