Add Python APIs for WeNet CTC models (#428)

2023-11-16 14:20:41 +08:00
parent fac4f6bc7c
commit 049fb9f451
13 changed files with 538 additions and 11 deletions
--- a/.github/scripts/test-python.sh
+++ b/.github/scripts/test-python.sh
@@ -8,6 +8,51 @@ log() {
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }

+wenet_models=(
+sherpa-onnx-zh-wenet-aishell
+sherpa-onnx-zh-wenet-aishell2
+sherpa-onnx-zh-wenet-wenetspeech
+sherpa-onnx-zh-wenet-multi-cn
+sherpa-onnx-en-wenet-librispeech
+sherpa-onnx-en-wenet-gigaspeech
+)
+
+mkdir -p /tmp/icefall-models
+dir=/tmp/icefall-models
+
+for name in ${wenet_models[@]}; do
+  repo_url=https://huggingface.co/csukuangfj/$name
+  log "Start testing ${repo_url}"
+  repo=$dir/$(basename $repo_url)
+  log "Download pretrained model and test-data from $repo_url"
+  pushd $dir
+  GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
+  cd $repo
+  git lfs pull --include "*.onnx"
+  ls -lh *.onnx
+  popd
+
+  python3 ./python-api-examples/offline-decode-files.py \
+    --tokens=$repo/tokens.txt \
+    --wenet-ctc=$repo/model.onnx \
+    $repo/test_wavs/0.wav \
+    $repo/test_wavs/1.wav \
+    $repo/test_wavs/8k.wav
+
+  python3 ./python-api-examples/online-decode-files.py \
+    --tokens=$repo/tokens.txt \
+    --wenet-ctc=$repo/model-streaming.onnx \
+    $repo/test_wavs/0.wav \
+    $repo/test_wavs/1.wav \
+    $repo/test_wavs/8k.wav
+
+  python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose
+
+  python3 sherpa-onnx/python/tests/test_online_recognizer.py --verbose
+
+  rm -rf $repo
+done
+
 log "Offline TTS test"
 # test waves are saved in ./tts
 mkdir ./tts
--- a/.github/workflows/mfc.yaml
+++ b/.github/workflows/mfc.yaml
@@ -85,10 +85,19 @@ jobs:
          arch=${{ matrix.arch }}

          cd mfc-examples/$arch/Release
-          cp StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe
-          cp NonStreamingSpeechRecognition.exe sherpa-onnx-non-streaming-${SHERPA_ONNX_VERSION}.exe
          ls -lh

+          cp -v StreamingSpeechRecognition.exe sherpa-onnx-streaming-${SHERPA_ONNX_VERSION}.exe
+          cp -v NonStreamingSpeechRecognition.exe sherpa-onnx-non-streaming-${SHERPA_ONNX_VERSION}.exe
+          cp -v NonStreamingTextToSpeech.exe ../sherpa-onnx-non-streaming-tts-${SHERPA_ONNX_VERSION}.exe
+          ls -lh
+
+      - name: Upload artifact tts
+        uses: actions/upload-artifact@v3
+        with:
+          name: non-streaming-tts-${{ matrix.arch }}
+          path: ./mfc-examples/${{ matrix.arch }}/Release/NonStreamingTextToSpeech.exe
+
      - name: Upload artifact
        uses: actions/upload-artifact@v3
        with:
@@ -116,3 +125,11 @@ jobs:
          file_glob: true
          overwrite: true
          file: ./mfc-examples/${{ matrix.arch }}/Release/sherpa-onnx-non-streaming-*.exe
+
+      - name: Release pre-compiled binaries and libs for Windows ${{ matrix.arch }}
+        if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./mfc-examples/${{ matrix.arch }}/sherpa-onnx-non-streaming-*.exe
--- a/.github/workflows/run-python-test.yaml
+++ b/.github/workflows/run-python-test.yaml
@@ -10,6 +10,7 @@ on:
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
+      - 'python-api-examples/**'
  pull_request:
    branches:
      - master
@@ -19,6 +20,7 @@ on:
      - 'CMakeLists.txt'
      - 'cmake/**'
      - 'sherpa-onnx/csrc/*'
+      - 'python-api-examples/**'
  workflow_dispatch:

 concurrency: