Support non-streaming zipformer CTC ASR models (#2340)
This PR adds support for non-streaming Zipformer CTC ASR models across multiple language bindings, WebAssembly, examples, and CI workflows. - Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs - Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js - Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models Model doc is available at https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
This commit is contained in:
@@ -344,7 +344,7 @@ def get_models():
|
||||
""",
|
||||
),
|
||||
Model(
|
||||
model_name="sherpa-onnx-streaming-zipformer-ctc-fp16-zh-2025-06-30",
|
||||
model_name="sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30",
|
||||
idx=19,
|
||||
lang="zh",
|
||||
short_name="large_zipformer_fp16",
|
||||
@@ -360,6 +360,26 @@ def get_models():
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
Model(
|
||||
model_name="sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30",
|
||||
idx=20,
|
||||
lang="zh",
|
||||
short_name="large_zipformer_int8",
|
||||
rule_fsts="itn_zh_number.fst",
|
||||
cmd="""
|
||||
if [ ! -f itn_zh_number.fst ]; then
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst
|
||||
fi
|
||||
pushd $model_name
|
||||
rm -fv bpe.model
|
||||
|
||||
rm -rf test_wavs
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
|
||||
@@ -548,6 +548,23 @@ def get_models():
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
Model(
|
||||
model_name="sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03",
|
||||
idx=31,
|
||||
lang="zh",
|
||||
lang2="Chinese",
|
||||
short_name="zipformer_2025_07_03",
|
||||
cmd="""
|
||||
pushd $model_name
|
||||
|
||||
rm -rfv test_wavs
|
||||
rm -rfv bbpe.model
|
||||
|
||||
ls -lh
|
||||
|
||||
popd
|
||||
""",
|
||||
),
|
||||
|
||||
@@ -27,6 +27,7 @@ namespace SherpaOnnx
|
||||
Moonshine = new OfflineMoonshineModelConfig();
|
||||
FireRedAsr = new OfflineFireRedAsrModelConfig();
|
||||
Dolphin = new OfflineDolphinModelConfig();
|
||||
ZipformerCtc = new OfflineZipformerCtcModelConfig();
|
||||
}
|
||||
public OfflineTransducerModelConfig Transducer;
|
||||
public OfflineParaformerModelConfig Paraformer;
|
||||
@@ -60,5 +61,6 @@ namespace SherpaOnnx
|
||||
public OfflineMoonshineModelConfig Moonshine;
|
||||
public OfflineFireRedAsrModelConfig FireRedAsr;
|
||||
public OfflineDolphinModelConfig Dolphin;
|
||||
public OfflineZipformerCtcModelConfig ZipformerCtc;
|
||||
}
|
||||
}
|
||||
|
||||
18
scripts/dotnet/OfflineZipformerCtcModelConfig.cs
Normal file
18
scripts/dotnet/OfflineZipformerCtcModelConfig.cs
Normal file
@@ -0,0 +1,18 @@
|
||||
/// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace SherpaOnnx
|
||||
{
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
public struct OfflineZipformerCtcModelConfig
|
||||
{
|
||||
public OfflineZipformerCtcModelConfig()
|
||||
{
|
||||
Model = "";
|
||||
}
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Model;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1 @@
|
||||
../../../../go-api-examples/non-streaming-decode-files/run-zipformer-ctc.sh
|
||||
@@ -398,6 +398,10 @@ type OfflineNemoEncDecCtcModelConfig struct {
|
||||
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
|
||||
}
|
||||
|
||||
type OfflineZipformerCtcModelConfig struct {
|
||||
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
|
||||
}
|
||||
|
||||
type OfflineDolphinModelConfig struct {
|
||||
Model string // Path to the model, e.g., model.onnx or model.int8.onnx
|
||||
}
|
||||
@@ -439,16 +443,17 @@ type OfflineLMConfig struct {
|
||||
}
|
||||
|
||||
type OfflineModelConfig struct {
|
||||
Transducer OfflineTransducerModelConfig
|
||||
Paraformer OfflineParaformerModelConfig
|
||||
NemoCTC OfflineNemoEncDecCtcModelConfig
|
||||
Whisper OfflineWhisperModelConfig
|
||||
Tdnn OfflineTdnnModelConfig
|
||||
SenseVoice OfflineSenseVoiceModelConfig
|
||||
Moonshine OfflineMoonshineModelConfig
|
||||
FireRedAsr OfflineFireRedAsrModelConfig
|
||||
Dolphin OfflineDolphinModelConfig
|
||||
Tokens string // Path to tokens.txt
|
||||
Transducer OfflineTransducerModelConfig
|
||||
Paraformer OfflineParaformerModelConfig
|
||||
NemoCTC OfflineNemoEncDecCtcModelConfig
|
||||
Whisper OfflineWhisperModelConfig
|
||||
Tdnn OfflineTdnnModelConfig
|
||||
SenseVoice OfflineSenseVoiceModelConfig
|
||||
Moonshine OfflineMoonshineModelConfig
|
||||
FireRedAsr OfflineFireRedAsrModelConfig
|
||||
Dolphin OfflineDolphinModelConfig
|
||||
ZipformerCtc OfflineZipformerCtcModelConfig
|
||||
Tokens string // Path to tokens.txt
|
||||
|
||||
// Number of threads to use for neural network computation
|
||||
NumThreads int
|
||||
@@ -540,6 +545,7 @@ func newCOfflineRecognizerConfig(config *OfflineRecognizerConfig) *C.struct_Sher
|
||||
c.model_config.fire_red_asr.decoder = C.CString(config.ModelConfig.FireRedAsr.Decoder)
|
||||
|
||||
c.model_config.dolphin.model = C.CString(config.ModelConfig.Dolphin.Model)
|
||||
c.model_config.zipformer_ctc.model = C.CString(config.ModelConfig.ZipformerCtc.Model)
|
||||
|
||||
c.model_config.tokens = C.CString(config.ModelConfig.Tokens)
|
||||
|
||||
@@ -653,11 +659,22 @@ func freeCOfflineRecognizerConfig(c *C.struct_SherpaOnnxOfflineRecognizerConfig)
|
||||
C.free(unsafe.Pointer(c.model_config.fire_red_asr.encoder))
|
||||
c.model_config.fire_red_asr.encoder = nil
|
||||
}
|
||||
|
||||
if c.model_config.fire_red_asr.decoder != nil {
|
||||
C.free(unsafe.Pointer(c.model_config.fire_red_asr.decoder))
|
||||
c.model_config.fire_red_asr.decoder = nil
|
||||
}
|
||||
|
||||
if c.model_config.dolphin.model != nil {
|
||||
C.free(unsafe.Pointer(c.model_config.dolphin.model))
|
||||
c.model_config.dolphin.model = nil
|
||||
}
|
||||
|
||||
if c.model_config.zipformer_ctc.model != nil {
|
||||
C.free(unsafe.Pointer(c.model_config.zipformer_ctc.model))
|
||||
c.model_config.zipformer_ctc.model = nil
|
||||
}
|
||||
|
||||
if c.model_config.tokens != nil {
|
||||
C.free(unsafe.Pointer(c.model_config.tokens))
|
||||
c.model_config.tokens = nil
|
||||
|
||||
@@ -212,6 +212,21 @@ def get_models():
|
||||
git diff
|
||||
""",
|
||||
),
|
||||
Model(
|
||||
model_name="sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03",
|
||||
hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc",
|
||||
ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-ctc",
|
||||
short_name="vad-asr-zh-zipformer-ctc",
|
||||
cmd="""
|
||||
pushd $model_name
|
||||
mv model.int8.onnx ../zipformer-ctc.onnx
|
||||
mv tokens.txt ../
|
||||
popd
|
||||
rm -rf $model_name
|
||||
sed -i.bak 's/Zipformer/Zipformer CTC supporting Chinese 中文/g' ../index.html
|
||||
git diff
|
||||
""",
|
||||
),
|
||||
]
|
||||
return models
|
||||
|
||||
|
||||
Reference in New Issue
Block a user