Support non-streaming zipformer CTC ASR models (#2340)

This PR adds support for non-streaming Zipformer CTC ASR models across 
multiple language bindings, WebAssembly, examples, and CI workflows.

- Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs
- Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js
- Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models

Model doc is available at
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
This commit is contained in:
Fangjun Kuang
2025-07-04 15:57:07 +08:00
committed by GitHub
parent ef16455cb5
commit 3bf986d08d
71 changed files with 2121 additions and 68 deletions

View File

@@ -13,6 +13,7 @@ extern "C" {
static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineZipformerCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineDolphinModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
@@ -31,7 +32,8 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
sizeof(SherpaOnnxOfflineMoonshineModelConfig) +
sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) +
sizeof(SherpaOnnxOfflineDolphinModelConfig),
sizeof(SherpaOnnxOfflineDolphinModelConfig) +
sizeof(SherpaOnnxOfflineZipformerCtcModelConfig),
"");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
@@ -77,6 +79,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
auto moonshine = &model_config->moonshine;
auto fire_red_asr = &model_config->fire_red_asr;
auto dolphin = &model_config->dolphin;
auto zipformer_ctc = &model_config->zipformer_ctc;
fprintf(stdout, "----------offline transducer model config----------\n");
fprintf(stdout, "encoder: %s\n", transducer->encoder);
@@ -117,6 +120,9 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
fprintf(stdout, "----------offline Dolphin model config----------\n");
fprintf(stdout, "model: %s\n", dolphin->model);
fprintf(stdout, "----------offline zipformer ctc model config----------\n");
fprintf(stdout, "model: %s\n", zipformer_ctc->model);
fprintf(stdout, "tokens: %s\n", model_config->tokens);
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
fprintf(stdout, "provider: %s\n", model_config->provider);