Support non-streaming zipformer CTC ASR models (#2340)

This PR adds support for non-streaming Zipformer CTC ASR models across 
multiple language bindings, WebAssembly, examples, and CI workflows.

- Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs
- Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js
- Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models

Model doc is available at
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
This commit is contained in:
Fangjun Kuang
2025-07-04 15:57:07 +08:00
committed by GitHub
parent ef16455cb5
commit 3bf986d08d
71 changed files with 2121 additions and 68 deletions

View File

@@ -104,6 +104,27 @@ class OfflineDolphinModelConfig {
final String model;
}
class OfflineZipformerCtcModelConfig {
const OfflineZipformerCtcModelConfig({this.model = ''});
factory OfflineZipformerCtcModelConfig.fromJson(Map<String, dynamic> json) {
return OfflineZipformerCtcModelConfig(
model: json['model'] as String? ?? '',
);
}
@override
String toString() {
return 'OfflineZipformerCtcModelConfig(model: $model)';
}
Map<String, dynamic> toJson() => {
'model': model,
};
final String model;
}
class OfflineWhisperModelConfig {
const OfflineWhisperModelConfig(
{this.encoder = '',
@@ -288,6 +309,7 @@ class OfflineModelConfig {
this.moonshine = const OfflineMoonshineModelConfig(),
this.fireRedAsr = const OfflineFireRedAsrModelConfig(),
this.dolphin = const OfflineDolphinModelConfig(),
this.zipformerCtc = const OfflineZipformerCtcModelConfig(),
required this.tokens,
this.numThreads = 1,
this.debug = true,
@@ -336,6 +358,10 @@ class OfflineModelConfig {
? OfflineDolphinModelConfig.fromJson(
json['dolphin'] as Map<String, dynamic>)
: const OfflineDolphinModelConfig(),
zipformerCtc: json['zipformerCtc'] != null
? OfflineZipformerCtcModelConfig.fromJson(
json['zipformerCtc'] as Map<String, dynamic>)
: const OfflineZipformerCtcModelConfig(),
tokens: json['tokens'] as String,
numThreads: json['numThreads'] as int? ?? 1,
debug: json['debug'] as bool? ?? true,
@@ -349,7 +375,7 @@ class OfflineModelConfig {
@override
String toString() {
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
return 'OfflineModelConfig(transducer: $transducer, paraformer: $paraformer, nemoCtc: $nemoCtc, whisper: $whisper, tdnn: $tdnn, senseVoice: $senseVoice, moonshine: $moonshine, fireRedAsr: $fireRedAsr, dolphin: $dolphin, zipformerCtc: $zipformerCtc, tokens: $tokens, numThreads: $numThreads, debug: $debug, provider: $provider, modelType: $modelType, modelingUnit: $modelingUnit, bpeVocab: $bpeVocab, telespeechCtc: $telespeechCtc)';
}
Map<String, dynamic> toJson() => {
@@ -362,6 +388,7 @@ class OfflineModelConfig {
'moonshine': moonshine.toJson(),
'fireRedAsr': fireRedAsr.toJson(),
'dolphin': dolphin.toJson(),
'zipformerCtc': zipformerCtc.toJson(),
'tokens': tokens,
'numThreads': numThreads,
'debug': debug,
@@ -381,6 +408,7 @@ class OfflineModelConfig {
final OfflineMoonshineModelConfig moonshine;
final OfflineFireRedAsrModelConfig fireRedAsr;
final OfflineDolphinModelConfig dolphin;
final OfflineZipformerCtcModelConfig zipformerCtc;
final String tokens;
final int numThreads;
@@ -578,6 +606,8 @@ class OfflineRecognizer {
config.model.fireRedAsr.decoder.toNativeUtf8();
c.ref.model.dolphin.model = config.model.dolphin.model.toNativeUtf8();
c.ref.model.zipformerCtc.model =
config.model.zipformerCtc.model.toNativeUtf8();
c.ref.model.tokens = config.model.tokens.toNativeUtf8();
@@ -623,6 +653,7 @@ class OfflineRecognizer {
calloc.free(c.ref.model.modelType);
calloc.free(c.ref.model.provider);
calloc.free(c.ref.model.tokens);
calloc.free(c.ref.model.zipformerCtc.model);
calloc.free(c.ref.model.dolphin.model);
calloc.free(c.ref.model.fireRedAsr.decoder);
calloc.free(c.ref.model.fireRedAsr.encoder);

View File

@@ -266,6 +266,10 @@ final class SherpaOnnxOfflineDolphinModelConfig extends Struct {
external Pointer<Utf8> model;
}
final class SherpaOnnxOfflineZipformerCtcModelConfig extends Struct {
external Pointer<Utf8> model;
}
final class SherpaOnnxOfflineWhisperModelConfig extends Struct {
external Pointer<Utf8> encoder;
external Pointer<Utf8> decoder;
@@ -333,6 +337,7 @@ final class SherpaOnnxOfflineModelConfig extends Struct {
external SherpaOnnxOfflineMoonshineModelConfig moonshine;
external SherpaOnnxOfflineFireRedAsrModelConfig fireRedAsr;
external SherpaOnnxOfflineDolphinModelConfig dolphin;
external SherpaOnnxOfflineZipformerCtcModelConfig zipformerCtc;
}
final class SherpaOnnxOfflineRecognizerConfig extends Struct {