Support non-streaming zipformer CTC ASR models (#2340)
This PR adds support for non-streaming Zipformer CTC ASR models across multiple language bindings, WebAssembly, examples, and CI workflows. - Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs - Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js - Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models Model doc is available at https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
This commit is contained in:
8
.github/scripts/test-dart.sh
vendored
8
.github/scripts/test-dart.sh
vendored
@@ -6,6 +6,10 @@ cd dart-api-examples
|
||||
|
||||
pushd non-streaming-asr
|
||||
|
||||
echo '----------Zipformer CTC----------'
|
||||
./run-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
echo '----------SenseVoice----------'
|
||||
./run-sense-voice-with-hr.sh
|
||||
./run-sense-voice.sh
|
||||
@@ -114,6 +118,10 @@ popd
|
||||
|
||||
pushd vad-with-non-streaming-asr
|
||||
|
||||
echo '----------Zipformer CTC----------'
|
||||
./run-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
echo '----------Dolphin CTC----------'
|
||||
./run-dolphin-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
75
.github/scripts/test-dot-net.sh
vendored
75
.github/scripts/test-dot-net.sh
vendored
@@ -6,43 +6,11 @@ cd ./version-test
|
||||
./run.sh
|
||||
ls -lh
|
||||
|
||||
cd ../speech-enhancement-gtcrn
|
||||
./run.sh
|
||||
ls -lh
|
||||
|
||||
cd ../kokoro-tts
|
||||
./run-kokoro.sh
|
||||
ls -lh
|
||||
|
||||
cd ../offline-tts
|
||||
./run-matcha-zh.sh
|
||||
ls -lh *.wav
|
||||
./run-matcha-en.sh
|
||||
ls -lh *.wav
|
||||
./run-aishell3.sh
|
||||
ls -lh *.wav
|
||||
./run-piper.sh
|
||||
ls -lh *.wav
|
||||
./run-hf-fanchen.sh
|
||||
ls -lh *.wav
|
||||
ls -lh
|
||||
|
||||
pushd ../..
|
||||
|
||||
mkdir tts
|
||||
|
||||
cp -v dotnet-examples/kokoro-tts/*.wav ./tts
|
||||
cp -v dotnet-examples/offline-tts/*.wav ./tts
|
||||
popd
|
||||
|
||||
cd ../offline-speaker-diarization
|
||||
./run.sh
|
||||
rm -rfv *.onnx
|
||||
rm -fv *.wav
|
||||
rm -rfv sherpa-onnx-pyannote-*
|
||||
|
||||
cd ../offline-decode-files
|
||||
|
||||
./run-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
./run-dolphin-ctc.sh
|
||||
rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02
|
||||
|
||||
@@ -82,6 +50,41 @@ rm -rf sherpa-onnx-*
|
||||
./run-tdnn-yesno.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
cd ../speech-enhancement-gtcrn
|
||||
./run.sh
|
||||
ls -lh
|
||||
|
||||
cd ../kokoro-tts
|
||||
./run-kokoro.sh
|
||||
ls -lh
|
||||
|
||||
cd ../offline-tts
|
||||
./run-matcha-zh.sh
|
||||
ls -lh *.wav
|
||||
./run-matcha-en.sh
|
||||
ls -lh *.wav
|
||||
./run-aishell3.sh
|
||||
ls -lh *.wav
|
||||
./run-piper.sh
|
||||
ls -lh *.wav
|
||||
./run-hf-fanchen.sh
|
||||
ls -lh *.wav
|
||||
ls -lh
|
||||
|
||||
pushd ../..
|
||||
|
||||
mkdir tts
|
||||
|
||||
cp -v dotnet-examples/kokoro-tts/*.wav ./tts
|
||||
cp -v dotnet-examples/offline-tts/*.wav ./tts
|
||||
popd
|
||||
|
||||
cd ../offline-speaker-diarization
|
||||
./run.sh
|
||||
rm -rfv *.onnx
|
||||
rm -fv *.wav
|
||||
rm -rfv sherpa-onnx-pyannote-*
|
||||
|
||||
cd ../keyword-spotting-from-files
|
||||
./run.sh
|
||||
|
||||
@@ -115,5 +118,3 @@ rm -rf sherpa-onnx-*
|
||||
cd ../spoken-language-identification
|
||||
./run.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
|
||||
|
||||
|
||||
9
.github/scripts/test-nodejs-addon-npm.sh
vendored
9
.github/scripts/test-nodejs-addon-npm.sh
vendored
@@ -10,6 +10,15 @@ arch=$(node -p "require('os').arch()")
|
||||
platform=$(node -p "require('os').platform()")
|
||||
node_version=$(node -p "process.versions.node.split('.')[0]")
|
||||
|
||||
echo "----------non-streaming ASR Zipformer CTC----------"
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
|
||||
tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
|
||||
node ./test_asr_non_streaming_zipformer_ctc.js
|
||||
rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
|
||||
|
||||
echo "----------non-streaming ASR NeMo parakeet tdt----------"
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
|
||||
tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
|
||||
|
||||
9
.github/scripts/test-nodejs-npm.sh
vendored
9
.github/scripts/test-nodejs-npm.sh
vendored
@@ -9,6 +9,15 @@ git status
|
||||
ls -lh
|
||||
ls -lh node_modules
|
||||
|
||||
# asr with offline zipformer ctc
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
|
||||
tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
|
||||
|
||||
node ./test-offline-zipformer-ctc.js
|
||||
rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
|
||||
|
||||
# asr with offline dolphin ctc
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||
tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
|
||||
|
||||
3
.github/scripts/test-swift.sh
vendored
3
.github/scripts/test-swift.sh
vendored
@@ -9,6 +9,9 @@ ls -lh
|
||||
|
||||
./run-test-version.sh
|
||||
|
||||
./run-zipformer-ctc-asr.sh
|
||||
rm -rf sherpa-onnx-zipformer-*
|
||||
|
||||
./run-decode-file-sense-voice-with-hr.sh
|
||||
rm -rf sherpa-onnx-sense-voice-*
|
||||
rm -rf dict lexicon.txt replace.fst test-hr.wav
|
||||
|
||||
@@ -89,6 +89,7 @@ jobs:
|
||||
make -j4 install
|
||||
|
||||
cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
|
||||
cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin
|
||||
|
||||
rm -rf install/lib/pkgconfig
|
||||
rm -fv install/lib/cargs.h
|
||||
@@ -135,6 +136,7 @@ jobs:
|
||||
make -j4 install
|
||||
|
||||
cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
|
||||
cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin
|
||||
|
||||
rm -rf install/lib/pkgconfig
|
||||
rm -fv install/lib/cargs.h
|
||||
|
||||
@@ -90,6 +90,7 @@ jobs:
|
||||
make install
|
||||
|
||||
cp bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
|
||||
cp bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin
|
||||
|
||||
ls -lh install/lib
|
||||
|
||||
|
||||
13
.github/workflows/pascal.yaml
vendored
13
.github/workflows/pascal.yaml
vendored
@@ -37,7 +37,7 @@ jobs:
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest, macos-13, windows-latest]
|
||||
os: [ubuntu-latest, macos-latest, macos-13, windows-latest, ubuntu-22.04-arm]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
@@ -56,7 +56,7 @@ jobs:
|
||||
key: ${{ matrix.os }}
|
||||
|
||||
- name: Install Free pascal compiler (ubuntu)
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
if: matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-22.04-arm'
|
||||
shell: bash
|
||||
run: |
|
||||
sudo apt-get update
|
||||
@@ -156,6 +156,10 @@ jobs:
|
||||
|
||||
pushd non-streaming-asr
|
||||
|
||||
./run-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
echo "---"
|
||||
|
||||
./run-dolphin-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
echo "---"
|
||||
@@ -264,9 +268,12 @@ jobs:
|
||||
|
||||
cd ./pascal-api-examples
|
||||
|
||||
|
||||
pushd vad-with-non-streaming-asr
|
||||
|
||||
time ./run-vad-with-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
echo "---"
|
||||
|
||||
time ./run-vad-with-dolphin-ctc.sh
|
||||
rm -rf sherpa-onnx-*
|
||||
echo "---"
|
||||
|
||||
3
.github/workflows/run-java-test.yaml
vendored
3
.github/workflows/run-java-test.yaml
vendored
@@ -165,6 +165,9 @@ jobs:
|
||||
run: |
|
||||
cd ./java-api-examples
|
||||
|
||||
./run-non-streaming-decode-file-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-zipformer-ctc-*
|
||||
|
||||
./run-non-streaming-decode-file-dolphin-ctc.sh
|
||||
rm -rf sherpa-onnx-dolphin-*
|
||||
|
||||
|
||||
4
.github/workflows/test-go.yaml
vendored
4
.github/workflows/test-go.yaml
vendored
@@ -184,6 +184,10 @@ jobs:
|
||||
go build
|
||||
ls -lh
|
||||
|
||||
echo "Test Zipformer CTC"
|
||||
./run-zipformer-ctc.sh
|
||||
rm -rf sherpa-onnx-zipformer-*
|
||||
|
||||
echo "Test SenseVoice ctc"
|
||||
./run-sense-voice-small-with-hr.sh
|
||||
./run-sense-voice-small.sh
|
||||
|
||||
27
.github/workflows/upload-models.yaml
vendored
27
.github/workflows/upload-models.yaml
vendored
@@ -19,12 +19,36 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
python-version: ["3.8"]
|
||||
python-version: ["3.10"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Zipformer CTC (non-streaming)
|
||||
shell: bash
|
||||
run: |
|
||||
git lfs install
|
||||
names=(
|
||||
sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
|
||||
sherpa-onnx-zipformer-ctc-zh-2025-07-03
|
||||
sherpa-onnx-zipformer-ctc-zh-fp16-2025-07-03
|
||||
)
|
||||
for name in ${names[@]}; do
|
||||
git clone https://huggingface.co/csukuangfj/$name
|
||||
pushd $name
|
||||
git lfs pull
|
||||
rm -rf .git
|
||||
rm -rfv .gitattributes
|
||||
ls -lh
|
||||
popd
|
||||
|
||||
tar cjfv $name.tar.bz2 $name
|
||||
rm -rf $name
|
||||
ls -lh *.tar.bz2
|
||||
done
|
||||
|
||||
- name: Vietnamese (zipformer)
|
||||
if: false
|
||||
shell: bash
|
||||
run: |
|
||||
rm -rf models
|
||||
@@ -76,6 +100,7 @@ jobs:
|
||||
mv models/* .
|
||||
|
||||
- name: Publish to huggingface (Vietnamese zipformer)
|
||||
if: false
|
||||
env:
|
||||
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
||||
uses: nick-fields/retry@v3
|
||||
|
||||
Reference in New Issue
Block a user