Support non-streaming zipformer CTC ASR models (#2340)

This PR adds support for non-streaming Zipformer CTC ASR models across 
multiple language bindings, WebAssembly, examples, and CI workflows.

- Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs
- Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js
- Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models

Model doc is available at
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
This commit is contained in:
Fangjun Kuang
2025-07-04 15:57:07 +08:00
committed by GitHub
parent ef16455cb5
commit 3bf986d08d
71 changed files with 2121 additions and 68 deletions

View File

@@ -398,6 +398,10 @@ type OfflineNemoEncDecCtcModelConfig struct {
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}
type OfflineZipformerCtcModelConfig struct {
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}
type OfflineDolphinModelConfig struct {
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}
@@ -439,16 +443,17 @@ type OfflineLMConfig struct {
}
type OfflineModelConfig struct {
Transducer OfflineTransducerModelConfig
Paraformer OfflineParaformerModelConfig
NemoCTC OfflineNemoEncDecCtcModelConfig
Whisper OfflineWhisperModelConfig
Tdnn OfflineTdnnModelConfig
SenseVoice OfflineSenseVoiceModelConfig
Moonshine OfflineMoonshineModelConfig
FireRedAsr OfflineFireRedAsrModelConfig
Dolphin OfflineDolphinModelConfig
Tokens string // Path to tokens.txt
Transducer OfflineTransducerModelConfig
Paraformer OfflineParaformerModelConfig
NemoCTC OfflineNemoEncDecCtcModelConfig
Whisper OfflineWhisperModelConfig
Tdnn OfflineTdnnModelConfig
SenseVoice OfflineSenseVoiceModelConfig
Moonshine OfflineMoonshineModelConfig
FireRedAsr OfflineFireRedAsrModelConfig
Dolphin OfflineDolphinModelConfig
ZipformerCtc OfflineZipformerCtcModelConfig
Tokens string // Path to tokens.txt
// Number of threads to use for neural network computation
NumThreads int
@@ -540,6 +545,7 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
c.model_config.fire_red_asr.decoder = C.CString(config.ModelConfig.FireRedAsr.Decoder)
c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model)
c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model)
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
@@ -653,11 +659,22 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
C.free(unsafe.Pointer(c.model_config.fire_red_asr.encoder))
c.model_config.fire_red_asr.encoder = nil
}
if c.model_config.fire_red_asr.decoder != nil {
C.free(unsafe.Pointer(c.model_config.fire_red_asr.decoder))
c.model_config.fire_red_asr.decoder = nil
}
if c.model_config.dolphin.model != nil {
C.free(unsafe.Pointer(c.model_config.dolphin.model))
c.model_config.dolphin.model = nil
}
if c.model_config.zipformer_ctc.model != nil {
C.free(unsafe.Pointer(c.model_config.zipformer_ctc.model))
c.model_config.zipformer_ctc.model = nil
}
if c.model_config.tokens != nil {
C.free(unsafe.Pointer(c.model_config.tokens))
c.model_config.tokens = nil