Support non-streaming zipformer CTC ASR models (#2340)

This PR adds support for non-streaming Zipformer CTC ASR models across 
multiple language bindings, WebAssembly, examples, and CI workflows.

- Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs
- Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js
- Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models

Model doc is available at
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
This commit is contained in:
Fangjun Kuang
2025-07-04 15:57:07 +08:00
committed by GitHub
parent ef16455cb5
commit 3bf986d08d
71 changed files with 2121 additions and 68 deletions

View File

@@ -344,7 +344,7 @@ def get_models():
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-ctc-fp16-zh-2025-06-30",
model_name="sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30",
idx=19,
lang="zh",
short_name="large_zipformer_fp16",
@@ -360,6 +360,26 @@ def get_models():
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30",
idx=20,
lang="zh",
short_name="large_zipformer_int8",
rule_fsts="itn_zh_number.fst",
cmd="""
if [ ! -f itn_zh_number.fst ]; then
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
fi
pushd $model_name
rm -fv bpe.model
rm -rf test_wavs
ls -lh
popd
""",
),

View File

@@ -548,6 +548,23 @@ def get_models():
ls -lh
popd
""",
),
Model(
model_name="sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03",
idx=31,
lang="zh",
lang2="Chinese",
short_name="zipformer_2025_07_03",
cmd="""
pushd $model_name
rm -rfv test_wavs
rm -rfv bbpe.model
ls -lh
popd
""",
),

View File

@@ -27,6 +27,7 @@ namespace SherpaOnnx
Moonshine = new OfflineMoonshineModelConfig();
FireRedAsr = new OfflineFireRedAsrModelConfig();
Dolphin = new OfflineDolphinModelConfig();
ZipformerCtc = new OfflineZipformerCtcModelConfig();
}
public OfflineTransducerModelConfig Transducer;
public OfflineParaformerModelConfig Paraformer;
@@ -60,5 +61,6 @@ namespace SherpaOnnx
public OfflineMoonshineModelConfig Moonshine;
public OfflineFireRedAsrModelConfig FireRedAsr;
public OfflineDolphinModelConfig Dolphin;
public OfflineZipformerCtcModelConfig ZipformerCtc;
}
}

View File

@@ -0,0 +1,18 @@
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
using System.Runtime.InteropServices;
namespace SherpaOnnx
{
[StructLayout(LayoutKind.Sequential)]
public struct OfflineZipformerCtcModelConfig
{
public OfflineZipformerCtcModelConfig()
{
Model = "";
}
[MarshalAs(UnmanagedType.LPStr)]
public string Model;
}
}

View File

@@ -0,0 +1 @@
../../../../go-api-examples/non-streaming-decode-files/run-zipformer-ctc.sh

View File

@@ -398,6 +398,10 @@ type OfflineNemoEncDecCtcModelConfig struct {
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}
type OfflineZipformerCtcModelConfig struct {
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}
type OfflineDolphinModelConfig struct {
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
}
@@ -439,16 +443,17 @@ type OfflineLMConfig struct {
}
type OfflineModelConfig struct {
Transducer OfflineTransducerModelConfig
Paraformer OfflineParaformerModelConfig
NemoCTC OfflineNemoEncDecCtcModelConfig
Whisper OfflineWhisperModelConfig
Tdnn OfflineTdnnModelConfig
SenseVoice OfflineSenseVoiceModelConfig
Moonshine OfflineMoonshineModelConfig
FireRedAsr OfflineFireRedAsrModelConfig
Dolphin OfflineDolphinModelConfig
Tokens string // Path to tokens.txt
Transducer OfflineTransducerModelConfig
Paraformer OfflineParaformerModelConfig
NemoCTC OfflineNemoEncDecCtcModelConfig
Whisper OfflineWhisperModelConfig
Tdnn OfflineTdnnModelConfig
SenseVoice OfflineSenseVoiceModelConfig
Moonshine OfflineMoonshineModelConfig
FireRedAsr OfflineFireRedAsrModelConfig
Dolphin OfflineDolphinModelConfig
ZipformerCtc OfflineZipformerCtcModelConfig
Tokens string // Path to tokens.txt
// Number of threads to use for neural network computation
NumThreads int
@@ -540,6 +545,7 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
c.model_config.fire_red_asr.decoder = C.CString(config.ModelConfig.FireRedAsr.Decoder)
c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model)
c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model)
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
@@ -653,11 +659,22 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
C.free(unsafe.Pointer(c.model_config.fire_red_asr.encoder))
c.model_config.fire_red_asr.encoder = nil
}
if c.model_config.fire_red_asr.decoder != nil {
C.free(unsafe.Pointer(c.model_config.fire_red_asr.decoder))
c.model_config.fire_red_asr.decoder = nil
}
if c.model_config.dolphin.model != nil {
C.free(unsafe.Pointer(c.model_config.dolphin.model))
c.model_config.dolphin.model = nil
}
if c.model_config.zipformer_ctc.model != nil {
C.free(unsafe.Pointer(c.model_config.zipformer_ctc.model))
c.model_config.zipformer_ctc.model = nil
}
if c.model_config.tokens != nil {
C.free(unsafe.Pointer(c.model_config.tokens))
c.model_config.tokens = nil

View File

@@ -212,6 +212,21 @@ def get_models():
git diff
""",
),
Model(
model_name="sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03",
hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc",
ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc",
short_name="vad-asr-zh-zipformer-ctc",
cmd="""
pushd $model_name
mv model.int8.onnx ../zipformer-ctc.onnx
mv tokens.txt ../
popd
rm -rf $model_name
sed -i.bak 's/Zipformer/Zipformer CTC supporting Chinese 中文/g' ../index.html
git diff
""",
),
]
return models