Support non-streaming zipformer CTC ASR models (#2340)

This PR adds support for non-streaming Zipformer CTC ASR models across multiple language bindings, WebAssembly, examples, and CI workflows. - Introduces a new OfflineZipformerCtcModelConfig in C/C++, Python, Swift, Java, Kotlin, Go, Dart, Pascal, and C# APIs - Updates initialization, freeing, and recognition logic to include Zipformer CTC in WASM and Node.js - Adds example scripts and CI steps for downloading, building, and running Zipformer CTC models Model doc is available at https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-ctc/icefall/zipformer.html
2025-07-04 15:57:07 +08:00
parent ef16455cb5
commit 3bf986d08d
71 changed files with 2121 additions and 68 deletions
--- a/.github/scripts/test-dart.sh
+++ b/.github/scripts/test-dart.sh
@@ -6,6 +6,10 @@ cd dart-api-examples

 pushd non-streaming-asr

+echo '----------Zipformer CTC----------'
+./run-zipformer-ctc.sh
+rm -rf sherpa-onnx-*
+
 echo '----------SenseVoice----------'
 ./run-sense-voice-with-hr.sh
 ./run-sense-voice.sh
@@ -114,6 +118,10 @@ popd

 pushd vad-with-non-streaming-asr

+echo '----------Zipformer CTC----------'
+./run-zipformer-ctc.sh
+rm -rf sherpa-onnx-*
+
 echo '----------Dolphin CTC----------'
 ./run-dolphin-ctc.sh
 rm -rf sherpa-onnx-*
--- a/.github/scripts/test-dot-net.sh
+++ b/.github/scripts/test-dot-net.sh
@@ -6,43 +6,11 @@ cd ./version-test
 ./run.sh
 ls -lh

-cd ../speech-enhancement-gtcrn
-./run.sh
-ls -lh
-
-cd ../kokoro-tts
-./run-kokoro.sh
-ls -lh
-
-cd ../offline-tts
-./run-matcha-zh.sh
-ls -lh *.wav
-./run-matcha-en.sh
-ls -lh *.wav
-./run-aishell3.sh
-ls -lh *.wav
-./run-piper.sh
-ls -lh *.wav
-./run-hf-fanchen.sh
-ls -lh *.wav
-ls -lh
-
-pushd ../..
-
-mkdir tts
-
-cp -v dotnet-examples/kokoro-tts/*.wav ./tts
-cp -v dotnet-examples/offline-tts/*.wav ./tts
-popd
-
-cd ../offline-speaker-diarization
-./run.sh
-rm -rfv *.onnx
-rm -fv *.wav
-rm -rfv sherpa-onnx-pyannote-*
-
 cd ../offline-decode-files

+./run-zipformer-ctc.sh
+rm -rf sherpa-onnx-*
+
 ./run-dolphin-ctc.sh
 rm -rf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02

@@ -82,6 +50,41 @@ rm -rf sherpa-onnx-*
 ./run-tdnn-yesno.sh
 rm -rf sherpa-onnx-*

+cd ../speech-enhancement-gtcrn
+./run.sh
+ls -lh
+
+cd ../kokoro-tts
+./run-kokoro.sh
+ls -lh
+
+cd ../offline-tts
+./run-matcha-zh.sh
+ls -lh *.wav
+./run-matcha-en.sh
+ls -lh *.wav
+./run-aishell3.sh
+ls -lh *.wav
+./run-piper.sh
+ls -lh *.wav
+./run-hf-fanchen.sh
+ls -lh *.wav
+ls -lh
+
+pushd ../..
+
+mkdir tts
+
+cp -v dotnet-examples/kokoro-tts/*.wav ./tts
+cp -v dotnet-examples/offline-tts/*.wav ./tts
+popd
+
+cd ../offline-speaker-diarization
+./run.sh
+rm -rfv *.onnx
+rm -fv *.wav
+rm -rfv sherpa-onnx-pyannote-*
+
 cd ../keyword-spotting-from-files
 ./run.sh

@@ -115,5 +118,3 @@ rm -rf sherpa-onnx-*
 cd ../spoken-language-identification
 ./run.sh
 rm -rf sherpa-onnx-*
-
-
--- a/.github/scripts/test-nodejs-addon-npm.sh
+++ b/.github/scripts/test-nodejs-addon-npm.sh
@@ -10,6 +10,15 @@ arch=$(node -p "require('os').arch()")
 platform=$(node -p "require('os').platform()")
 node_version=$(node -p "process.versions.node.split('.')[0]")

+echo "----------non-streaming ASR Zipformer CTC----------"
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+
+tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+
+node ./test_asr_non_streaming_zipformer_ctc.js
+rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
+
 echo "----------non-streaming ASR NeMo parakeet tdt----------"
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
 tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2
--- a/.github/scripts/test-nodejs-npm.sh
+++ b/.github/scripts/test-nodejs-npm.sh
@@ -9,6 +9,15 @@ git status
 ls -lh
 ls -lh node_modules

+# asr with offline zipformer ctc
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+
+tar xvf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+rm sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03.tar.bz2
+
+node ./test-offline-zipformer-ctc.js
+rm -rf sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
+
 # asr with offline dolphin ctc
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
 tar xvf sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2
--- a/.github/scripts/test-swift.sh
+++ b/.github/scripts/test-swift.sh
@@ -9,6 +9,9 @@ ls -lh

 ./run-test-version.sh

+./run-zipformer-ctc-asr.sh
+rm -rf sherpa-onnx-zipformer-*
+
 ./run-decode-file-sense-voice-with-hr.sh
 rm -rf sherpa-onnx-sense-voice-*
 rm -rf dict lexicon.txt replace.fst test-hr.wav
--- a/.github/workflows/aarch64-linux-gnu-shared.yaml
+++ b/.github/workflows/aarch64-linux-gnu-shared.yaml
@@ -89,6 +89,7 @@ jobs:
          make -j4 install

          cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
+          cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin

          rm -rf install/lib/pkgconfig
          rm -fv install/lib/cargs.h
@@ -135,6 +136,7 @@ jobs:
              make -j4 install

              cp -v bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
+              cp -v bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin

              rm -rf install/lib/pkgconfig
              rm -fv install/lib/cargs.h
--- a/.github/workflows/aarch64-linux-gnu-static.yaml
+++ b/.github/workflows/aarch64-linux-gnu-static.yaml
@@ -90,6 +90,7 @@ jobs:
              make install

              cp bin/sense-voice-simulate-streaming-alsa-cxx-api install/bin
+              cp bin/zipformer-ctc-simulate-streaming-alsa-cxx-api install/bin

              ls -lh install/lib

--- a/.github/workflows/pascal.yaml
+++ b/.github/workflows/pascal.yaml
@@ -37,7 +37,7 @@ jobs:
    strategy:
      fail-fast: false
      matrix:
-        os: [ubuntu-latest, macos-latest, macos-13, windows-latest]
+        os: [ubuntu-latest, macos-latest, macos-13, windows-latest, ubuntu-22.04-arm]

    steps:
      - uses: actions/checkout@v4
@@ -56,7 +56,7 @@ jobs:
          key: ${{ matrix.os }}

      - name: Install Free pascal compiler (ubuntu)
-        if: matrix.os == 'ubuntu-latest'
+        if: matrix.os == 'ubuntu-latest' || matrix.os == 'ubuntu-22.04-arm'
        shell: bash
        run: |
          sudo apt-get update
@@ -156,6 +156,10 @@ jobs:

          pushd non-streaming-asr

+          ./run-zipformer-ctc.sh
+          rm -rf sherpa-onnx-*
+          echo "---"
+
          ./run-dolphin-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"
@@ -264,9 +268,12 @@ jobs:

          cd ./pascal-api-examples

-
          pushd vad-with-non-streaming-asr

+          time ./run-vad-with-zipformer-ctc.sh
+          rm -rf sherpa-onnx-*
+          echo "---"
+
          time ./run-vad-with-dolphin-ctc.sh
          rm -rf sherpa-onnx-*
          echo "---"
--- a/.github/workflows/run-java-test.yaml
+++ b/.github/workflows/run-java-test.yaml
@@ -165,6 +165,9 @@ jobs:
        run: |
          cd ./java-api-examples

+          ./run-non-streaming-decode-file-zipformer-ctc.sh
+          rm -rf sherpa-onnx-zipformer-ctc-*
+
          ./run-non-streaming-decode-file-dolphin-ctc.sh
          rm -rf sherpa-onnx-dolphin-*

--- a/.github/workflows/test-go.yaml
+++ b/.github/workflows/test-go.yaml
@@ -184,6 +184,10 @@ jobs:
          go build
          ls -lh

+          echo "Test Zipformer CTC"
+          ./run-zipformer-ctc.sh
+          rm -rf sherpa-onnx-zipformer-*
+
          echo "Test SenseVoice ctc"
          ./run-sense-voice-small-with-hr.sh
          ./run-sense-voice-small.sh
--- a/.github/workflows/upload-models.yaml
+++ b/.github/workflows/upload-models.yaml
@@ -19,12 +19,36 @@ jobs:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
-        python-version: ["3.8"]
+        python-version: ["3.10"]

    steps:
      - uses: actions/checkout@v4

+      - name: Zipformer CTC (non-streaming)
+        shell: bash
+        run: |
+          git lfs install
+          names=(
+            sherpa-onnx-zipformer-ctc-zh-int8-2025-07-03
+            sherpa-onnx-zipformer-ctc-zh-2025-07-03
+            sherpa-onnx-zipformer-ctc-zh-fp16-2025-07-03
+          )
+          for name in ${names[@]}; do
+            git clone https://huggingface.co/csukuangfj/$name 
+            pushd $name
+            git lfs pull
+            rm -rf .git
+            rm -rfv .gitattributes
+            ls -lh
+            popd
+
+            tar cjfv $name.tar.bz2 $name
+            rm -rf $name
+            ls -lh *.tar.bz2
+          done
+
      - name: Vietnamese (zipformer)
+        if: false
        shell: bash
        run: |
          rm -rf models
@@ -76,6 +100,7 @@ jobs:
          mv models/* .

      - name: Publish to huggingface (Vietnamese zipformer)
+        if: false
        env:
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        uses: nick-fields/retry@v3