From f9db33c9268b20e7bc50c24637fb04bf3fa83e65 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 1 Mar 2024 15:20:56 +0800 Subject: [PATCH] Add WebAssembly demo for streaming trilingual Paraformer (Chinese+Cantonese+English) (#618) --- .../workflows/wasm-simd-hf-space-de-tts.yaml | 1 + .../wasm-simd-hf-space-en-asr-zipformer.yaml | 1 + .../workflows/wasm-simd-hf-space-en-tts.yaml | 1 + ...-space-zh-cantonese-en-asr-paraformer.yaml | 152 ++++++++++++++++++ ...sm-simd-hf-space-zh-en-asr-paraformer.yaml | 1 + ...asm-simd-hf-space-zh-en-asr-zipformer.yaml | 1 + 6 files changed, 157 insertions(+) create mode 100644 .github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml diff --git a/.github/workflows/wasm-simd-hf-space-de-tts.yaml b/.github/workflows/wasm-simd-hf-space-de-tts.yaml index 29d87883..5ca241bb 100644 --- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml +++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml @@ -72,6 +72,7 @@ jobs: path: ./sherpa-onnx-wasm-simd-*.tar.bz2 - name: Publish to ModelScope + if: false env: MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }} uses: nick-fields/retry@v2 diff --git a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml index 8bfcdf08..deab7f45 100644 --- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml @@ -74,6 +74,7 @@ jobs: path: ./sherpa-onnx-wasm-simd-*.tar.bz2 - name: Publish to ModelScope + if: false env: MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }} uses: nick-fields/retry@v2 diff --git a/.github/workflows/wasm-simd-hf-space-en-tts.yaml b/.github/workflows/wasm-simd-hf-space-en-tts.yaml index 3d5265f6..d6ffd20d 100644 --- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml +++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml @@ -70,6 +70,7 @@ jobs: path: ./sherpa-onnx-wasm-simd-*.tar.bz2 - name: Publish to ModelScope + if: false env: MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }} uses: nick-fields/retry@v2 diff --git a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml new file mode 100644 index 00000000..1b8194bc --- /dev/null +++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml @@ -0,0 +1,152 @@ +name: wasm-simd-hf-space-zh-cantonese-en-asr-paraformer + +on: + release: + types: + - published + + workflow_dispatch: + +concurrency: + group: wasm-simd-hf-space-zh-cantonese-en-asr-paraformer-${{ github.ref }} + cancel-in-progress: true + +jobs: + wasm-simd-hf-space-zh-cantonese-en-asr-paraformer: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install emsdk + uses: mymindstorm/setup-emsdk@v14 + + - name: View emsdk version + shell: bash + run: | + emcc -v + echo "--------------------" + emcc --check + + - name: Download model files + shell: bash + run: | + cd wasm/asr/assets + ls -lh + echo "----------" + + wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2 + tar xvf sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2 + rm sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en.tar.bz2 + + mv sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/encoder.int8.onnx encoder.onnx + mv sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/decoder.int8.onnx decoder.onnx + mv sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en/tokens.txt ./ + + rm -rf sherpa-onnx-streaming-paraformer-trilingual-zh-cantonese-en + + ls -lh + + cd ../ + + sed -i.bak s/"type = 0"/"type = 1"/g ./sherpa-onnx.js + sed -i.bak s/Zipformer/Paraformer/g ./index.html + + git diff + + - name: Build sherpa-onnx for WebAssembly (ASR) + shell: bash + run: | + ./build-wasm-simd-asr.sh + + - name: collect files + shell: bash + run: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-zh-cantonese-en-asr-paraformer + mv build-wasm-simd-asr/install/bin/wasm/asr $dst + ls -lh $dst + tar cjfv ${dst}.tar.bz2 ./${dst} + + - name: Upload wasm files + uses: actions/upload-artifact@v4 + with: + name: sherpa-onnx-wasm-simd-zh-cantonese-en-asr-paraformer + path: ./sherpa-onnx-wasm-simd-*.tar.bz2 + + - name: Publish to huggingface + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 20 + timeout_seconds: 200 + shell: bash + command: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf huggingface + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer huggingface + cd huggingface + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* . + + git status + git lfs track "*.data" + git lfs track "*.wasm" + ls -lh + + git add . + git commit -m "update model" + git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer main + + - name: Publish to ModelScope + if: false + env: + MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }} + uses: nick-fields/retry@v2 + with: + max_attempts: 10 + timeout_seconds: 600 + shell: bash + command: | + SHERPA_ONNX_VERSION=v$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) + + git config --global user.email "csukuangfj@gmail.com" + git config --global user.name "Fangjun Kuang" + + rm -rf ms + export GIT_LFS_SKIP_SMUDGE=1 + + git clone https://www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer.git ms + cd ms + git config lfs.locksverify true + git fetch + git pull + git merge -m "merge remote" --ff origin main + + cp -v ../sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-*/* . + + git status + git lfs track "*.data" + git lfs track "*.wasm" + ls -lh + + git add . + git commit -m "update model" + git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer.git + diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml index dc6ce30d..af46d2a1 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml @@ -81,6 +81,7 @@ jobs: path: ./sherpa-onnx-wasm-simd-*.tar.bz2 - name: Publish to ModelScope + if: false env: MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }} uses: nick-fields/retry@v2 diff --git a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml index eb5262af..702865e6 100644 --- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml +++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml @@ -72,6 +72,7 @@ jobs: path: ./sherpa-onnx-wasm-simd-*.tar.bz2 - name: Publish to ModelScope + if: false env: MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }} uses: nick-fields/retry@v2