Support non-streaming zipformer CTC ASR models (#2340)

This PR adds support for non-streaming Zipformer CTC ASR models across 
multiple language bindings, WebAssembly, examples, and CI workflows.

- Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs
- Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js
- Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models

Model doc is available at
https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
This commit is contained in:
Fangjun Kuang
2025-07-04 15:57:07 +08:00
committed by GitHub
parent ef16455cb5
commit 3bf986d08d
71 changed files with 2121 additions and 68 deletions

View File

@@ -6,6 +6,10 @@ cd dart-api-examples
pushd non-streaming-asr
echo '----------Zipformer CTC----------'
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*
echo '----------SenseVoice----------'
./run-sense-voice-with-hr.sh
./run-sense-voice.sh
@@ -114,6 +118,10 @@ popd
pushd vad-with-non-streaming-asr
echo '----------Zipformer CTC----------'
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*
echo '----------Dolphin CTC----------'
./run-dolphin-ctc.sh
rm -rf sherpa-onnx-*

View File

@@ -6,43 +6,11 @@ cd ./version-test
./run.sh
ls -lh
cd ../speech-enhancement-gtcrn
./run.sh
ls -lh
cd ../kokoro-tts
./run-kokoro.sh
ls -lh
cd ../offline-tts
./run-matcha-zh.sh
ls -lh *.wav
./run-matcha-en.sh
ls -lh *.wav
./run-aishell3.sh
ls -lh *.wav
./run-piper.sh
ls -lh *.wav
./run-hf-fanchen.sh
ls -lh *.wav
ls -lh
pushd ../..
mkdir tts
cp -v dotnet-examples/kokoro-tts/*.wav ./tts
cp -v dotnet-examples/offline-tts/*.wav ./tts
popd
cd ../offline-speaker-diarization
./run.sh
rm -rfv *.onnx
rm -fv *.wav
rm -rfv sherpa-onnx-pyannote-*
cd ../offline-decode-files
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*
./run-dolphin-ctc.sh
rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
@@ -82,6 +50,41 @@ rm -rf sherpa-onnx-*
./run-tdnn-yesno.sh
rm -rf sherpa-onnx-*
cd ../speech-enhancement-gtcrn
./run.sh
ls -lh
cd ../kokoro-tts
./run-kokoro.sh
ls -lh
cd ../offline-tts
./run-matcha-zh.sh
ls -lh *.wav
./run-matcha-en.sh
ls -lh *.wav
./run-aishell3.sh
ls -lh *.wav
./run-piper.sh
ls -lh *.wav
./run-hf-fanchen.sh
ls -lh *.wav
ls -lh
pushd ../..
mkdir tts
cp -v dotnet-examples/kokoro-tts/*.wav ./tts
cp -v dotnet-examples/offline-tts/*.wav ./tts
popd
cd ../offline-speaker-diarization
./run.sh
rm -rfv *.onnx
rm -fv *.wav
rm -rfv sherpa-onnx-pyannote-*
cd ../keyword-spotting-from-files
./run.sh
@@ -115,5 +118,3 @@ rm -rf sherpa-onnx-*
cd ../spoken-language-identification
./run.sh
rm -rf sherpa-onnx-*

View File

@@ -10,6 +10,15 @@ arch=$(node -p "require('os').arch()")
platform=$(node -p "require('os').platform()")
node_version=$(node -p "process.versions.node.split('.')[0]")
echo "----------non-streaming ASR Zipformer CTC----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
node ./test_asr_non_streaming_zipformer_ctc.js
rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
echo "----------non-streaming ASR NeMo parakeet tdt----------"
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2

View File

@@ -9,6 +9,15 @@ git status
ls -lh
ls -lh node_modules
# asr with offline zipformer ctc
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
node ./test-offline-zipformer-ctc.js
rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
# asr with offline dolphin ctc
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2

View File

@@ -9,6 +9,9 @@ ls -lh
./run-test-version.sh
./run-zipformer-ctc-asr.sh
rm -rf sherpa-onnx-zipformer-*
./run-decode-file-sense-voice-with-hr.sh
rm -rf sherpa-onnx-sense-voice-*
rm -rf dict lexicon.txt replace.fst test-hr.wav

View File

@@ -89,6 +89,7 @@ jobs:
make -j4 install
cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin
rm -rf install/lib/pkgconfig
rm -fv install/lib/cargs.h
@@ -135,6 +136,7 @@ jobs:
make -j4 install
cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin
rm -rf install/lib/pkgconfig
rm -fv install/lib/cargs.h

View File

@@ -90,6 +90,7 @@ jobs:
make install
cp bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
cp bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin
ls -lh install/lib

View File

@@ -37,7 +37,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, macos-13, windows-latest]
os: [ubuntu-latest, macos-latest, macos-13, windows-latest, ubuntu-22.04-arm]
steps:
- uses: actions/checkout@v4
@@ -56,7 +56,7 @@ jobs:
key: ${{ matrix.os }}
- name: Install Free pascal compiler (ubuntu)
if: matrix.os == 'ubuntu-latest'
if: matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-22.04-arm'
shell: bash
run: |
sudo apt-get update
@@ -156,6 +156,10 @@ jobs:
pushd non-streaming-asr
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-*
echo "---"
./run-dolphin-ctc.sh
rm -rf sherpa-onnx-*
echo "---"
@@ -264,9 +268,12 @@ jobs:
cd ./pascal-api-examples
pushd vad-with-non-streaming-asr
time ./run-vad-with-zipformer-ctc.sh
rm -rf sherpa-onnx-*
echo "---"
time ./run-vad-with-dolphin-ctc.sh
rm -rf sherpa-onnx-*
echo "---"

View File

@@ -165,6 +165,9 @@ jobs:
run: |
cd ./java-api-examples
./run-non-streaming-decode-file-zipformer-ctc.sh
rm -rf sherpa-onnx-zipformer-ctc-*
./run-non-streaming-decode-file-dolphin-ctc.sh
rm -rf sherpa-onnx-dolphin-*

View File

@@ -184,6 +184,10 @@ jobs:
go build
ls -lh
echo "Test Zipformer CTC"
./run-zipformer-ctc.sh
rm -rf sherpa-onnx-zipformer-*
echo "Test SenseVoice ctc"
./run-sense-voice-small-with-hr.sh
./run-sense-voice-small.sh

View File

@@ -19,12 +19,36 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.8"]
python-version: ["3.10"]
steps:
- uses: actions/checkout@v4
- name: Zipformer CTC (non-streaming)
shell: bash
run: |
git lfs install
names=(
sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
sherpa-onnx-zipformer-ctc-zh-2025-07-03
sherpa-onnx-zipformer-ctc-zh-fp16-2025-07-03
)
for name in ${names[@]}; do
git clone https://huggingface.co/csukuangfj/$name
pushd $name
git lfs pull
rm -rf .git
rm -rfv .gitattributes
ls -lh
popd
tar cjfv $name.tar.bz2 $name
rm -rf $name
ls -lh *.tar.bz2
done
- name: Vietnamese (zipformer)
if: false
shell: bash
run: |
rm -rf models
@@ -76,6 +100,7 @@ jobs:
mv models/* .
- name: Publish to huggingface (Vietnamese zipformer)
if: false
env:
HF_TOKEN: ${{ secrets.HF_TOKEN }}
uses: nick-fields/retry@v3