WebAssembly example for VAD + Non-streaming ASR (#1284)

2024-08-24 13:24:52 +08:00
parent 1ef8a7a202
commit 537e163dd0
29 changed files with 1281 additions and 70 deletions
--- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml
+++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
@@ -27,6 +27,9 @@ jobs:
          fetch-depth: 0
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml
+++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
+++ b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
@@ -25,6 +25,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
--- a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
+++ b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
@@ -0,0 +1,93 @@
+name: wasm-simd-hf-space-vad-asr
+
+on:
+  push:
+    branches:
+      - wasm
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+*'
+
+  workflow_dispatch:
+
+concurrency:
+  group: wasm-simd-hf-space-vad-asr${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  wasm-simd-hf-space-vad-asr:
+    name: ${{ matrix.index }}/${{ matrix.total }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        total: ["8"]
+        index: ["0", "1", "2", "3", "4", "5", "6", "7"]
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Install Python dependencies
+        shell: bash
+        run: |
+          python3 -m pip install --upgrade pip jinja2
+
+      - name: Install emsdk
+        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'
+
+      - name: View emsdk version
+        shell: bash
+        run: |
+          emcc -v
+          echo "--------------------"
+          emcc --check
+
+      - name: Generate build script
+        shell: bash
+        run: |
+          cd scripts/wasm
+
+          total=${{ matrix.total }}
+          index=${{ matrix.index }}
+
+          ./generate-vad-asr.py --total $total --index $index
+
+          chmod +x run-vad-asr.sh
+          mv -v ./run-vad-asr.sh ../..
+
+      - name: Show build scripts
+        shell: bash
+        run: |
+          cat ./run-vad-asr.sh
+
+      - uses: actions/upload-artifact@v4
+        with:
+          name: run-vad-asr-${{ matrix.index }}
+          path: ./run-vad-asr.sh
+
+      - name: Build sherpa-onnx for WebAssembly
+        shell: bash
+        env:
+          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          ./run-vad-asr.sh
+
+      - name: Release jar
+        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
+        uses: svenstaro/upload-release-action@v2
+        with:
+          file_glob: true
+          overwrite: true
+          file: ./*.tar.bz2
+
+      - name: Upload wasm files
+        uses: actions/upload-artifact@v4
+        with:
+          name: sherpa-onnx-wasm-simd-vad-asr-${{ matrix.index }}
+          path: ./sherpa-onnx-wasm-simd-*.tar.bz2
--- a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
+
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
+        with:
+          version: 3.1.51
+          actions-cache-folder: 'emsdk-cache'

      - name: View emsdk version
        shell: bash
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,6 +36,7 @@ option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_VAD "Whether to enable WASM for VAD" OFF)
+option(SHERPA_ONNX_ENABLE_WASM_VAD_ASR "Whether to enable WASM for VAD+ASR" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
 option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
 option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON)
@@ -137,6 +138,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD ${SHERPA_ONNX_ENABLE_WASM_VAD}")
+message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD_ASR ${SHERPA_ONNX_ENABLE_WASM_VAD_ASR}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}")
 message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}")
 message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")
@@ -211,11 +213,22 @@ if(SHERPA_ONNX_ENABLE_WASM)
 endif()

 if(SHERPA_ONNX_ENABLE_WASM_KWS)
+  if(NOT SHERPA_ONNX_ENABLE_WASM)
+    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for KWS")
+  endif()
  add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
 endif()

 if(SHERPA_ONNX_ENABLE_WASM_VAD)
-  add_definitions(-DSHERPA_ONNX_ENABLE_WASM_VAD=1)
+  if(NOT SHERPA_ONNX_ENABLE_WASM)
+    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD")
+  endif()
+endif()
+
+if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
+  if(NOT SHERPA_ONNX_ENABLE_WASM)
+    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD+ASR")
+  endif()
 endif()

 if(NOT CMAKE_CXX_STANDARD)
--- a/README.md
+++ b/README.md
@@ -14,13 +14,13 @@

 ### Supported platforms

-|Architecture| Android          | iOS           | Windows    | macOS | linux |
-|------------|------------------|---------------|------------|-------|-------|
-|   x64      |  ✔️               |               |   ✔️        | ✔️     |  ✔️    |
-|   x86      |  ✔️               |               |   ✔️        |       |       |
-|   arm64    |  ✔️               | ✔️             |   ✔️        | ✔️     |  ✔️    |
-|   arm32    |  ✔️               |               |            |       |  ✔️    |
-|   riscv64  |                  |               |            |       |  ✔️    |
+|Architecture| Android | iOS     | Windows    | macOS | linux |
+|------------|---------|---------|------------|-------|-------|
+|   x64      |  ✔️      |         |   ✔️        | ✔️     |  ✔️    |
+|   x86      |  ✔️      |         |   ✔️        |       |       |
+|   arm64    |  ✔️      | ✔️       |   ✔️        | ✔️     |  ✔️    |
+|   arm32    |  ✔️      |         |            |       |  ✔️    |
+|   riscv64  |         |         |            |       |  ✔️    |


 ### Supported programming languages
@@ -37,7 +37,7 @@
 |-------|----------|----------|------------|
 | ✔️     |  ✔️       |   ✔️      |    ✔️       |

-For Rust support, please see https://github.com/thewh1teagle/sherpa-rs
+For Rust support, please see [sherpa-rs][sherpa-rs]

 It also supports WebAssembly.

@@ -51,7 +51,7 @@ This repository supports running the following functions **locally**
  - Speaker verification
  - Spoken language identification
  - Audio tagging
-  - VAD (e.g., [silero-vad](https://github.com/snakers4/silero-vad))
+  - VAD (e.g., [silero-vad][silero-vad])
  - Keyword spotting

 on the following platforms and operating systems:
@@ -62,11 +62,12 @@ on the following platforms and operating systems:
  - iOS
  - NodeJS
  - WebAssembly
-  - [Raspberry Pi](https://www.raspberrypi.com/)
-  - [RV1126](https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf)
-  - [LicheePi4A](https://sipeed.com/licheepi4a)
-  - [VisionFive 2](https://www.starfivetech.com/en/site/boards)
-  - [旭日X3派](https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html)
+  - [Raspberry Pi][Raspberry Pi]
+  - [RV1126][RV1126]
+  - [LicheePi4A][LicheePi4A]
+  - [VisionFive 2][VisionFive 2]
+  - [旭日X3派][旭日X3派]
+  - [爱芯派][爱芯派]
  - etc

 with the following APIs
@@ -81,59 +82,68 @@ with the following APIs
 You can visit the following Huggingface spaces to try `sherpa-onnx` without
 installing anything. All you need is a browser.

-| Description | URL |
-|---|---|
-| Speech recognition | [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition)|
-| Speech recognition with [Whisper](https://github.com/openai/whisper)| [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper)|
-| Speech synthesis | [Click me](https://huggingface.co/spaces/k2-fsa/text-to-speech)|
-| Generate subtitles| [Click me](https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos)|
-|Audio tagging| [Click me](https://huggingface.co/spaces/k2-fsa/audio-tagging)|
-|Spoken language identification with [Whisper](https://github.com/openai/whisper)|[Click me](https://huggingface.co/spaces/k2-fsa/spoken-language-identification)|
+| Description                                           | URL                                |
+|-------------------------------------------------------|------------------------------------|
+| Speech recognition                                    | [Click me][hf-space-asr]           |
+| Speech recognition with [Whisper][Whisper]            | [Click me][hf-space-asr-whisper]   |
+| Speech synthesis                                      | [Click me][hf-space-tts]           |
+| Generate subtitles                                    | [Click me][hf-space-subtitle]      |
+| Audio tagging                                         | [Click me][hf-space-audio-tagging] |
+| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper]  |

 We also have spaces built using WebAssembly. The are listed below:

-| Description | URL| Chinese users|
-|---|---|---|
-|Voice activity detection with [silero-vad](https://github.com/snakers4/silero-vad)| [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx)|[地址](https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx)|
-|Real-time speech recognition (Chinese + English) with Zipformer | [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|
-|Real-time speech recognition (Chinese + English) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)|
-|Real-time speech recognition (Chinese + English + Cantonese) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)|
-|Real-time speech recognition (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en)|
-|Speech synthesis (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en)|
-|Speech synthesis (German)|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de)|
+| Description                                                                              | Huggingface space| ModelScope space|
+|------------------------------------------------------------------------------------------|------------------|-----------------|
+|Voice activity detection with [silero-vad][silero-vad]                                    | [Click me][wasm-hf-vad]|[地址][wasm-ms-vad]|
+|Real-time speech recognition (Chinese + English) with Zipformer                           | [Click me][wasm-hf-streaming-asr-zh-en-zipformer]|[地址][wasm-hf-streaming-asr-zh-en-zipformer]|
+|Real-time speech recognition (Chinese + English) with Paraformer                          |[Click me][wasm-hf-streaming-asr-zh-en-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-paraformer]|
+|Real-time speech recognition (Chinese + English + Cantonese) with [Paraformer-large][Paraformer-large]|[Click me][wasm-hf-streaming-asr-zh-en-yue-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-yue-paraformer]|
+|Real-time speech recognition (English) |[Click me][wasm-hf-streaming-asr-en-zipformer]    |[地址][wasm-ms-streaming-asr-en-zipformer]|
+|VAD + speech recognition (Chinese + English + Korean + Japanese + Cantonese) with [SenseVoice][SenseVoice]|[Click me][wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]| [地址][wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]|
+|VAD + speech recognition (English) with [Whisper][Whisper] tiny.en|[Click me][wasm-hf-vad-asr-en-whisper-tiny-en]| [地址][wasm-ms-vad-asr-en-whisper-tiny-en]|
+|VAD + speech recognition (English) with Zipformer trained with [GigaSpeech][GigaSpeech]    |[Click me][wasm-hf-vad-asr-en-zipformer-gigaspeech]| [地址][wasm-ms-vad-asr-en-zipformer-gigaspeech]|
+|VAD + speech recognition (Chinese) with Zipformer trained with [WenetSpeech][WenetSpeech]  |[Click me][wasm-hf-vad-asr-zh-zipformer-wenetspeech]| [地址][wasm-ms-vad-asr-zh-zipformer-wenetspeech]|
+|VAD + speech recognition (Japanese) with Zipformer trained with [ReazonSpeech][ReazonSpeech]|[Click me][wasm-hf-vad-asr-ja-zipformer-reazonspeech]| [地址][wasm-ms-vad-asr-ja-zipformer-reazonspeech]|
+|VAD + speech recognition (Thai) with Zipformer trained with [GigaSpeech2][GigaSpeech2]      |[Click me][wasm-hf-vad-asr-th-zipformer-gigaspeech2]| [地址][wasm-ms-vad-asr-th-zipformer-gigaspeech2]|
+|VAD + speech recognition (Chinese 多种方言) with a [TeleSpeech-ASR][TeleSpeech-ASR] CTC model|[Click me][wasm-hf-vad-asr-zh-telespeech]| [地址][wasm-ms-vad-asr-zh-telespeech]|
+|VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-large          |[Click me][wasm-hf-vad-asr-zh-en-paraformer-large]| [地址][wasm-ms-vad-asr-zh-en-paraformer-large]|
+|VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-small          |[Click me][wasm-hf-vad-asr-zh-en-paraformer-small]| [地址][wasm-ms-vad-asr-zh-en-paraformer-small]|
+|Speech synthesis (English)                                                                  |[Click me][wasm-hf-tts-piper-en]| [地址][wasm-ms-tts-piper-en]|
+|Speech synthesis (German)                                                                   |[Click me][wasm-hf-tts-piper-de]| [地址][wasm-ms-tts-piper-de]|

 ### Links for pre-built Android APKs

-| Description                    | URL                                                                                     | 中国用户                                                                             |
-|--------------------------------|-----------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------|
-| Streaming speech recognition             | [Address](https://k2-fsa.github.io/sherpa/onnx/android/apk.html)                        | [点此](https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html)                        |
-| Text-to-speech | [Address](https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html)                     | [点此](https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html)                     |
-|Voice activity detection (VAD) | [Address](https://k2-fsa.github.io/sherpa/onnx/vad/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/vad/apk-cn.html)|
-|VAD + non-streaming speech recognition| [Address](https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr-cn.html)|
-|Two-pass speech recognition| [Address](https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass-cn.html)|
-| Audio tagging                  | [Address](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html)                  | [点此](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-cn.html)                  |
-| Audio tagging (WearOS)         | [Address](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html)           | [点此](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos-cn.html)           |
-| Speaker identification         | [Address](https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html)         | [点此](https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk-cn.html)         |
-| Spoken language identification | [Address](https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk-cn.html) |
-|Keyword spotting| [Address](https://k2-fsa.github.io/sherpa/onnx/kws/apk.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/kws/apk-cn.html)|
+| Description                            | URL                          | 中国用户                    |
+|----------------------------------------|------------------------------|-----------------------------|
+| Streaming speech recognition           | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn]|
+| Text-to-speech                         | [Address][apk-tts]           | [点此][apk-tts-cn]          |
+| Voice activity detection (VAD)         | [Address][apk-vad]           | [点此][apk-vad-cn]          |
+| VAD + non-streaming speech recognition | [Address][apk-vad-asr]       | [点此][apk-vad-asr-cn]      |
+| Two-pass speech recognition            | [Address][apk-2pass]         | [点此][apk-2pass-cn]        |
+| Audio tagging                          | [Address][apk-at]            | [点此][apk-at-cn]           |
+| Audio tagging (WearOS)                 | [Address][apk-at-wearos]     | [点此][apk-at-wearos-cn]    |
+| Speaker identification                 | [Address][apk-sid]           | [点此][apk-sid-cn]          |
+| Spoken language identification         | [Address][apk-slid]          | [点此][apk-slid-cn]         |
+| Keyword spotting                       | [Address][apk-kws]           | [点此][apk-kws-cn]          |

 ### Links for pre-built Flutter APPs

 #### Real-time speech recognition

-| Description                    | URL                                                                 | 中国用户                                                            |
-|--------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------|
-| Streaming speech recognition   | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app-cn.html)|
+| Description                    | URL                                 | 中国用户                            |
+|--------------------------------|-------------------------------------|-------------------------------------|
+| Streaming speech recognition   | [Address][apk-flutter-streaming-asr]| [点此][apk-flutter-streaming-asr-cn]|

 #### Text-to-speech

-| Description                    | URL                                                          | 中国用户                                                                    |
-|--------------------------------|--------------------------------------------------------------|-----------------------------------------------------------------------------|
-| Android (arm64-v8a, armeabi-v7a, x86_64) | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android-cn.html)|
-| Linux (x64)    | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux.html)       | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux-cn.html)      |
-| macOS (x64)    | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64.html)   | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64-cn.html)  |
-| macOS (arm64)  | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64-cn.html)|
-| Windows (x64)  | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win.html)         | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win-cn.html)        |
+| Description                              | URL                                | 中国用户                           |
+|------------------------------------------|------------------------------------|------------------------------------|
+| Android (arm64-v8a, armeabi-v7a, x86_64) | [Address][flutter-tts-android]     | [点此][flutter-tts-android-cn]     |
+| Linux (x64)                              | [Address][flutter-tts-linux]       | [点此][flutter-tts-linux-cn]       |
+| macOS (x64)                              | [Address][flutter-tts-macos-x64]   | [点此][flutter-tts-macos-arm64-cn] |
+| macOS (arm64)                            | [Address][flutter-tts-macos-arm64] | [点此][flutter-tts-macos-x64-cn]   |
+| Windows (x64)                            | [Address][flutter-tts-win-x64]     | [点此][flutter-tts-win-x64-cn]     |

 > Note: You need to build from source for iOS.

@@ -141,23 +151,23 @@ We also have spaces built using WebAssembly. The are listed below:

 #### Generating subtitles

-| Description                    | URL                                                                 | 中国用户                                                            |
-|--------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------|
-| Generate subtitles (生成字幕)   | [Address](https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles-cn.html)|
+| Description                    | URL                        | 中国用户                   |
+|--------------------------------|----------------------------|----------------------------|
+| Generate subtitles (生成字幕)  | [Address][lazarus-subtitle]| [点此][lazarus-subtitle-cn]|


 ### Links for pre-trained models

-| Description                    | URL                                                                                                                            |
-|--------------------------------|--------------------------------------------------------------------------------------------------------------------------------|
-| Speech recognition (speech to text, ASR)             | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models)              |
-| Text-to-speech (TTS)                 | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models)                             |
-| VAD | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx)|
-| Keyword spotting |[Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models)|
-| Audio tagging                  | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models)|
-| Speaker identification (Speaker ID)         | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models)|
-| Spoken language identification (Language ID) | See multi-lingual [Whisper](https://github.com/openai/whisper) ASR models from  [Speech recognition](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models) |
-| Punctuation| [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models)|
+| Description                                 | URL                                                                                   |
+|---------------------------------------------|---------------------------------------------------------------------------------------|
+| Speech recognition (speech to text, ASR)    | [Address][asr-models]                                                                 |
+| Text-to-speech (TTS)                        | [Address][tts-models]                                                                 |
+| VAD                                         | [Address][vad-models]                                                                 |
+| Keyword spotting                            | [Address][kws-models]                                                                 |
+| Audio tagging                               | [Address][at-models]                                                                  |
+| Speaker identification (Speaker ID)         | [Address][sid-models]                                                                 |
+| Spoken language identification (Language ID)| See multi-lingual [Whisper][Whisper] ASR models from  [Speech recognition][asr-models]|
+| Punctuation                                 | [Address][punct-models]                                                               |

 ### Useful links

@@ -169,3 +179,100 @@ We also have spaces built using WebAssembly. The are listed below:
 Please see
 https://k2-fsa.github.io/sherpa/social-groups.html
 for 新一代 Kaldi **微信交流群** and **QQ 交流群**.
+
+[sherpa-rs]: https://github.com/thewh1teagle/sherpa-rs
+[silero-vad]: https://github.com/snakers4/silero-vad
+[Raspberry Pi]: https://www.raspberrypi.com/
+[RV1126]: https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf
+[LicheePi4A]: https://sipeed.com/licheepi4a
+[VisionFive 2]: https://www.starfivetech.com/en/site/boards
+[旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html
+[爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html
+[hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
+[Whisper]: https://github.com/openai/whisper
+[hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper
+[hf-space-tts]: https://huggingface.co/spaces/k2-fsa/text-to-speech
+[hf-space-subtitle]: https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos
+[hf-space-audio-tagging]: https://huggingface.co/spaces/k2-fsa/audio-tagging
+[hf-space-slid-whisper]: https://huggingface.co/spaces/k2-fsa/spoken-language-identification
+[wasm-hf-vad]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx
+[wasm-ms-vad]: https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx
+[wasm-hf-streaming-asr-zh-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en
+[wasm-ms-streaming-asr-zh-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en
+[wasm-hf-streaming-asr-zh-en-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
+[wasm-ms-streaming-asr-zh-en-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
+[Paraformer-large]: https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary
+[wasm-hf-streaming-asr-zh-en-yue-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer
+[wasm-ms-streaming-asr-zh-en-yue-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer
+[wasm-hf-streaming-asr-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en
+[wasm-ms-streaming-asr-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en
+[SenseVoice]: https://github.com/FunAudioLLM/SenseVoice
+[wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice
+[wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice
+[wasm-hf-vad-asr-en-whisper-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
+[wasm-ms-vad-asr-en-whisper-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
+[wasm-hf-vad-asr-en-zipformer-gigaspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
+[wasm-ms-vad-asr-en-zipformer-gigaspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
+[wasm-hf-vad-asr-zh-zipformer-wenetspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
+[wasm-ms-vad-asr-zh-zipformer-wenetspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
+[ReazonSpeech]: https://research.reazon.jp/_static/reazonspeech_nlp2023.pdf
+[wasm-hf-vad-asr-ja-zipformer-reazonspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
+[wasm-ms-vad-asr-ja-zipformer-reazonspeech]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
+[GigaSpeech2]: https://github.com/SpeechColab/GigaSpeech2
+[wasm-hf-vad-asr-th-zipformer-gigaspeech2]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer
+[wasm-ms-vad-asr-th-zipformer-gigaspeech2]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer
+[TeleSpeech-ASR]: https://github.com/Tele-AI/TeleSpeech-ASR
+[wasm-hf-vad-asr-zh-telespeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
+[wasm-ms-vad-asr-zh-telespeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
+[wasm-hf-vad-asr-zh-en-paraformer-large]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
+[wasm-ms-vad-asr-zh-en-paraformer-large]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
+[wasm-hf-vad-asr-zh-en-paraformer-small]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
+[wasm-ms-vad-asr-zh-en-paraformer-small]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
+[wasm-hf-tts-piper-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en
+[wasm-ms-tts-piper-en]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en
+[wasm-hf-tts-piper-de]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de
+[wasm-ms-tts-piper-de]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de
+[apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html
+[apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html
+[apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html
+[apk-tts-cn]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html
+[apk-vad]: https://k2-fsa.github.io/sherpa/onnx/vad/apk.html
+[apk-vad-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-cn.html
+[apk-vad-asr]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html
+[apk-vad-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr-cn.html
+[apk-2pass]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html
+[apk-2pass-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass-cn.html
+[apk-at]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html
+[apk-at-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-cn.html
+[apk-at-wearos]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html
+[apk-at-wearos-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos-cn.html
+[apk-sid]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
+[apk-sid-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk-cn.html
+[apk-slid]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk.html
+[apk-slid-cn]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk-cn.html
+[apk-kws]: https://k2-fsa.github.io/sherpa/onnx/kws/apk.html
+[apk-kws-cn]: https://k2-fsa.github.io/sherpa/onnx/kws/apk-cn.html
+[apk-flutter-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app.html
+[apk-flutter-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app-cn.html
+[flutter-tts-android]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android.html
+[flutter-tts-android-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android-cn.html
+[flutter-tts-linux]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux.html
+[flutter-tts-linux-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux-cn.html
+[flutter-tts-macos-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64.html
+[flutter-tts-macos-arm64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64-cn.html
+[flutter-tts-macos-arm64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64.html
+[flutter-tts-macos-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64-cn.html
+[flutter-tts-win-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win.html
+[flutter-tts-win-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win-cn.html
+[lazarus-subtitle]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html
+[lazarus-subtitle-cn]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles-cn.html
+[asr-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+[tts-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
+[vad-models]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+[kws-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
+[at-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
+[sid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+[slid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
+[punct-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
+[GigaSpeech]: https://github.com/SpeechColab/GigaSpeech
+[WenetSpeech]: https://github.com/wenet-e2e/WenetSpeech
--- a/build-wasm-simd-vad-asr.sh
+++ b/build-wasm-simd-vad-asr.sh
@@ -0,0 +1,68 @@
+#!/usr/bin/env bash
+# Copyright (c)  2024  Xiaomi Corporation
+#
+# This script is to build sherpa-onnx for WebAssembly (VAD+ASR)
+# Note: ASR here means non-streaming ASR
+
+set -ex
+
+if [ x"$EMSCRIPTEN" == x"" ]; then
+  if ! command -v emcc &> /dev/null; then
+    echo "Please install emscripten first"
+    echo ""
+    echo "You can use the following commands to install it:"
+    echo ""
+    echo "git clone https://github.com/emscripten-core/emsdk.git"
+    echo "cd emsdk"
+    echo "git pull"
+    echo "./emsdk install latest"
+    echo "./emsdk activate latest"
+    echo "source ./emsdk_env.sh"
+    exit 1
+  else
+    EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
+  fi
+fi
+
+export EMSCRIPTEN=$EMSCRIPTEN
+echo "EMSCRIPTEN: $EMSCRIPTEN"
+if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
+  echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
+  echo "Please make sure you have installed emsdk correctly"
+  exit 1
+fi
+
+mkdir -p build-wasm-simd-vad-asr
+pushd build-wasm-simd-vad-asr
+
+export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
+
+cmake \
+  -DCMAKE_INSTALL_PREFIX=./install \
+  -DCMAKE_BUILD_TYPE=Release \
+  -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
+  \
+  -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+  -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+  -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+  -DBUILD_SHARED_LIBS=OFF \
+  -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+  -DSHERPA_ONNX_ENABLE_JNI=OFF \
+  -DSHERPA_ONNX_ENABLE_TTS=OFF \
+  -DSHERPA_ONNX_ENABLE_C_API=ON \
+  -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
+  -DSHERPA_ONNX_ENABLE_GPU=OFF \
+  -DSHERPA_ONNX_ENABLE_WASM=ON \
+  -DSHERPA_ONNX_ENABLE_WASM_VAD_ASR=ON \
+  -DSHERPA_ONNX_ENABLE_BINARY=OFF \
+  -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
+  ..
+make -j2
+make install
+
+echo "pwd: $PWD"
+
+cp -fv ../wasm/vad/sherpa-onnx-vad.js ./install/bin/wasm/vad-asr/
+cp -fv ../wasm/asr/sherpa-onnx-asr.js ./install/bin/wasm/vad-asr/
+
+ls -lh install/bin/wasm/vad-asr
--- a/scripts/wasm/generate-vad-asr.py
+++ b/scripts/wasm/generate-vad-asr.py
@@ -0,0 +1,229 @@
+#!/usr/bin/env python3
+
+import argparse
+from dataclasses import dataclass
+from typing import List, Optional
+
+import jinja2
+
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--total",
+        type=int,
+        default=1,
+        help="Number of runners",
+    )
+    parser.add_argument(
+        "--index",
+        type=int,
+        default=0,
+        help="Index of the current runner",
+    )
+    return parser.parse_args()
+
+
+@dataclass
+class Model:
+    model_name: str
+    hf: str  # huggingface space name
+    ms: str  # modelscope space name
+    short_name: str
+    cmd: str = ""
+
+
+def get_models():
+    models = [
+        Model(
+            model_name="sherpa-onnx-whisper-tiny.en",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny",
+            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny",
+            short_name="vad-asr-en-whisper_tiny",
+            cmd="""
+            pushd $model_name
+            mv -v tiny.en-encoder.int8.onnx ../whisper-encoder.onnx
+            mv -v tiny.en-decoder.int8.onnx ../whisper-decoder.onnx
+            mv -v tiny.en-tokens.txt ../tokens.txt
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/Whisper tiny.en supporting English 英文/g' ../index.html
+            git diff
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice",
+            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice",
+            short_name="vad-asr-zh_en_ja_ko_cantonese-sense_voice_small",
+            cmd="""
+            pushd $model_name
+            mv -v model.int8.onnx ../sense-voice.onnx
+            mv -v tokens.txt ../
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/SenseVoice Small supporting English, Chinese, Japanese, Korean, Cantonese 中英日韩粤/g' ../index.html
+            git diff
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-paraformer-zh-2023-09-14",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer",
+            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer",
+            short_name="vad-asr-zh_en-paraformer_large",
+            cmd="""
+            pushd $model_name
+            mv -v model.int8.onnx ../paraformer.onnx
+            mv -v tokens.txt ../
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/Paraformer supporting Chinese, English 中英/g' ../index.html
+            git diff
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-paraformer-zh-small-2024-03-09",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small",
+            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small",
+            short_name="vad-asr-zh_en-paraformer_small",
+            cmd="""
+            pushd $model_name
+            mv -v model.int8.onnx ../paraformer.onnx
+            mv -v tokens.txt ../
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/Paraformer-small supporting Chinese, English 中英文/g' ../index.html
+            git diff
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-zipformer-gigaspeech-2023-12-12",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech",
+            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech",
+            short_name="vad-asr-en-zipformer_gigaspeech",
+            cmd="""
+            pushd $model_name
+            mv encoder-epoch-30-avg-1.int8.onnx ../transducer-encoder.onnx
+            mv decoder-epoch-30-avg-1.onnx ../transducer-decoder.onnx
+            mv joiner-epoch-30-avg-1.int8.onnx ../transducer-joiner.onnx
+            mv tokens.txt ../
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/Zipformer supporting English 英语/g' ../index.html
+            git diff
+            """,
+        ),
+        Model(
+            model_name="icefall-asr-zipformer-wenetspeech-20230615",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech",
+            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech",
+            short_name="vad-asr-zh-zipformer_wenetspeech",
+            cmd="""
+            pushd $model_name
+            mv -v data/lang_char/tokens.txt ../
+            mv -v exp/encoder-epoch-12-avg-4.int8.onnx ../transducer-encoder.onnx
+            mv -v exp/decoder-epoch-12-avg-4.onnx ../transducer-decoder.onnx
+            mv -v exp/joiner-epoch-12-avg-4.int8.onnx ../transducer-joiner.onnx
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/Zipformer supporting Chinese 中文/g' ../index.html
+            git diff
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer",
+            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer",
+            short_name="vad-asr-ja-zipformer_reazonspeech",
+            cmd="""
+            pushd $model_name
+            mv encoder-epoch-99-avg-1.int8.onnx ../transducer-encoder.onnx
+            mv decoder-epoch-99-avg-1.onnx ../transducer-decoder.onnx
+            mv joiner-epoch-99-avg-1.int8.onnx ../transducer-joiner.onnx
+            mv tokens.txt ../
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/Zipformer supporting Japanese 日语/g' ../index.html
+            git diff
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-zipformer-thai-2024-06-20",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer",
+            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer",
+            short_name="vad-asr-th-zipformer_gigaspeech2",
+            cmd="""
+            pushd $model_name
+            mv encoder-epoch-12-avg-5.int8.onnx ../transducer-encoder.onnx
+            mv decoder-epoch-12-avg-5.onnx ../transducer-decoder.onnx
+            mv joiner-epoch-12-avg-5.int8.onnx ../transducer-joiner.onnx
+            mv tokens.txt ../
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/Zipformer supporting Thai 泰语/g' ../index.html
+            git diff
+            """,
+        ),
+        Model(
+            model_name="sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04",
+            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech",
+            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech",
+            short_name="vad-asr-zh-telespeech",
+            cmd="""
+            pushd $model_name
+            mv model.int8.onnx ../telespeech.onnx
+            mv tokens.txt ../
+            popd
+            rm -rf $model_name
+            sed -i.bak 's/Zipformer/TeleSpeech-ASR supporting Chinese 多种中文方言/g' ../index.html
+            git diff
+            """,
+        ),
+    ]
+    return models
+
+
+def main():
+    args = get_args()
+    index = args.index
+    total = args.total
+    assert 0 <= index < total, (index, total)
+
+    all_model_list = get_models()
+
+    num_models = len(all_model_list)
+
+    num_per_runner = num_models // total
+    if num_per_runner <= 0:
+        raise ValueError(f"num_models: {num_models}, num_runners: {total}")
+
+    start = index * num_per_runner
+    end = start + num_per_runner
+
+    remaining = num_models - args.total * num_per_runner
+
+    print(f"{index}/{total}: {start}-{end}/{num_models}")
+
+    d = dict()
+    d["model_list"] = all_model_list[start:end]
+    if index < remaining:
+        s = args.total * num_per_runner + index
+        d["model_list"].append(all_model_list[s])
+        print(f"{s}/{num_models}")
+
+    filename_list = [
+        "./run-vad-asr.sh",
+    ]
+    for filename in filename_list:
+        environment = jinja2.Environment()
+        with open(f"{filename}.in") as f:
+            s = f.read()
+        template = environment.from_string(s)
+
+        s = template.render(**d)
+        with open(filename, "w") as f:
+            print(s, file=f)
+
+
+if __name__ == "__main__":
+    main()
--- a/scripts/wasm/run-vad-asr.sh.in
+++ b/scripts/wasm/run-vad-asr.sh.in
@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+#
+# Build WebAssembly APPs for huggingface spaces and modelscope spaces
+
+set -ex
+
+log() {
+  # This function is from espnet
+  local fname=${BASH_SOURCE[1]##*/}
+  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
+}
+
+SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
+
+
+{% for model in model_list %}
+model_name={{ model.model_name }}
+short_name={{ model.short_name }}
+hf_name={{ model.hf }}
+ms_name={{ model.ms }}
+
+pushd wasm/vad-asr
+git checkout .
+rm -rf assets
+mkdir assets
+cd assets
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
+tar xvf ${model_name}.tar.bz2
+rm ${model_name}.tar.bz2
+
+{{ model.cmd }}
+
+popd
+
+ls -lh wasm/vad-asr/assets
+
+rm -rf build-wasm-simd-vad-asr/install
+rm -rf build-wasm-simd-vad-asr/wasm
+
+./build-wasm-simd-vad-asr.sh
+
+dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-${short_name}
+mv build-wasm-simd-vad-asr/install/bin/wasm/vad-asr $dst
+ls -lh $dst
+tar cjfv $dst.tar.bz2 ./$dst
+ls -lh *.tar.bz2
+
+git config --global user.email "csukuangfj@gmail.com"
+git config --global user.name "Fangjun Kuang"
+
+export GIT_LFS_SKIP_SMUDGE=1
+export GIT_CLONE_PROTECTION_ACTIVE=false
+
+rm -rf ms
+git clone https://www.modelscope.cn/studios/$ms_name.git ms
+
+cd ms
+cp -v ../$dst/* .
+
+git status
+git lfs track "*.data"
+git lfs track "*.wasm"
+ls -lh
+
+git add .
+git commit -m "update model"
+git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/$ms_name.git
+cd ..
+rm -rf ms
+
+rm -rf huggingface
+
+git clone https://huggingface.co/spaces/$hf_name huggingface
+cd huggingface
+cp -v ../$dst/* .
+
+git status
+git lfs track "*.data"
+git lfs track "*.wasm"
+ls -lh
+
+git add .
+git commit -m "update model"
+git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/$hf_name main
+cd ..
+rm -rf huggingface
+rm -rf $dst
+
+ls -lh *.tar.bz2
+
+{% endfor %}
--- a/sherpa-onnx/c-api/c-api.cc
+++ b/sherpa-onnx/c-api/c-api.cc
@@ -13,6 +13,7 @@
 #include "sherpa-onnx/csrc/audio-tagging.h"
 #include "sherpa-onnx/csrc/circular-buffer.h"
 #include "sherpa-onnx/csrc/display.h"
+#include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/keyword-spotter.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-punctuation.h"
@@ -1638,3 +1639,7 @@ int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
 void SherpaOnnxLinearResamplerReset(SherpaOnnxLinearResampler *p) {
  p->impl->Reset();
 }
+
+int32_t SherpaOnnxFileExists(const char *filename) {
+  return sherpa_onnx::FileExists(filename);
+}
--- a/sherpa-onnx/c-api/c-api.h
+++ b/sherpa-onnx/c-api/c-api.h
@@ -1361,6 +1361,9 @@ SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate(
 SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
    const SherpaOnnxLinearResampler *p);

+// Return 1 if the file exists; return 0 if the file does not exist.
+SHERPA_ONNX_API int32_t SherpaOnnxFileExists(const char *filename);
+
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif
--- a/wasm/CMakeLists.txt
+++ b/wasm/CMakeLists.txt
@@ -14,6 +14,10 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD)
  add_subdirectory(vad)
 endif()

+if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
+  add_subdirectory(vad-asr)
+endif()
+
 if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
  add_subdirectory(nodejs)
 endif()
--- a/wasm/asr/assets/README.md
+++ b/wasm/asr/assets/README.md
@@ -80,3 +80,10 @@ assets fangjun$ tree -L 1

 0 directories, 4 files
 ```
+
+You can find example build scripts at:
+
+  - Streaming Zipformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/ wasm-simd-hf-space-zh-en-asr-zipformer.yaml
+  - Streaming Zipformer (English): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
+  - Streaming Paraformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
+  - Streaming Paraformer (English + Chinese + Cantonese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
--- a/wasm/asr/index.html
+++ b/wasm/asr/index.html
@@ -3,7 +3,7 @@
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
-  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for ASR</title>
  <style>
    h1,div {
      text-align: center;
--- a/wasm/tts/assets/README.md
+++ b/wasm/tts/assets/README.md
@@ -30,3 +30,8 @@ assets fangjun$ tree -L 1

 1 directory, 3 files
 ```
+
+You can find example build scripts at:
+
+  - English TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-tts.yaml
+  - German TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-de-tts.yaml
--- a/wasm/vad-asr/CMakeLists.txt
+++ b/wasm/vad-asr/CMakeLists.txt
@@ -0,0 +1,83 @@
+if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
+  message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD")
+endif()
+
+if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx" OR NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/tokens.txt")
+  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
+endif()
+
+set(exported_functions
+  # VAD
+  SherpaOnnxCreateCircularBuffer
+  SherpaOnnxDestroyCircularBuffer
+  SherpaOnnxCircularBufferPush
+  SherpaOnnxCircularBufferGet
+  SherpaOnnxCircularBufferFree
+  SherpaOnnxCircularBufferPop
+  SherpaOnnxCircularBufferSize
+  SherpaOnnxCircularBufferHead
+  SherpaOnnxCircularBufferReset
+  SherpaOnnxCreateVoiceActivityDetector
+  SherpaOnnxDestroyVoiceActivityDetector
+  SherpaOnnxVoiceActivityDetectorAcceptWaveform
+  SherpaOnnxVoiceActivityDetectorEmpty
+  SherpaOnnxVoiceActivityDetectorDetected
+  SherpaOnnxVoiceActivityDetectorPop
+  SherpaOnnxVoiceActivityDetectorClear
+  SherpaOnnxVoiceActivityDetectorFront
+  SherpaOnnxDestroySpeechSegment
+  SherpaOnnxVoiceActivityDetectorReset
+  SherpaOnnxVoiceActivityDetectorFlush
+  # non-streaming ASR
+  SherpaOnnxAcceptWaveformOffline
+  SherpaOnnxCreateOfflineRecognizer
+  SherpaOnnxCreateOfflineStream
+  SherpaOnnxDecodeMultipleOfflineStreams
+  SherpaOnnxDecodeOfflineStream
+  SherpaOnnxDestroyOfflineRecognizer
+  SherpaOnnxDestroyOfflineRecognizerResult
+  SherpaOnnxDestroyOfflineStream
+  SherpaOnnxDestroyOfflineStreamResultJson
+  SherpaOnnxGetOfflineStreamResult
+  SherpaOnnxGetOfflineStreamResultAsJson
+  #
+  SherpaOnnxFileExists
+)
+set(mangled_exported_functions)
+foreach(x IN LISTS exported_functions)
+  list(APPEND mangled_exported_functions "_${x}")
+endforeach()
+list(JOIN mangled_exported_functions "," all_exported_functions)
+
+include_directories(${CMAKE_SOURCE_DIR})
+set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
+string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
+string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
+string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
+string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
+
+message(STATUS "MY_FLAGS: ${MY_FLAGS}")
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
+set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
+
+if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
+  message(FATAL_ERROR "The default suffix for building executables should be .js!")
+endif()
+# set(CMAKE_EXECUTABLE_SUFFIX ".html")
+
+add_executable(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-wasm-main-vad-asr.cc)
+target_link_libraries(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-c-api)
+install(TARGETS sherpa-onnx-wasm-main-vad-asr DESTINATION bin/wasm/vad-asr)
+
+install(
+  FILES
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.js"
+    "index.html"
+    "app-vad-asr.js"
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.wasm"
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.data"
+  DESTINATION
+    bin/wasm/vad-asr
+)
--- a/wasm/vad-asr/app-vad-asr.js
+++ b/wasm/vad-asr/app-vad-asr.js
@@ -0,0 +1,389 @@
+// This file copies and modifies code
+// from https://mdn.github.io/web-dictaphone/scripts/app.js
+// and https://gist.github.com/meziantou/edb7217fddfbb70e899e
+
+const startBtn = document.getElementById('startBtn');
+const stopBtn = document.getElementById('stopBtn');
+const clearBtn = document.getElementById('clearBtn');
+const hint = document.getElementById('hint');
+const soundClips = document.getElementById('sound-clips');
+
+let textArea = document.getElementById('results');
+
+let lastResult = '';
+let resultList = [];
+
+clearBtn.onclick = function() {
+  resultList = [];
+  textArea.value = getDisplayResult();
+  textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+};
+
+function getDisplayResult() {
+  let i = 0;
+  let ans = '';
+  for (let s in resultList) {
+    if (resultList[s] == '') {
+      continue;
+    }
+
+    if (resultList[s] == 'Speech detected') {
+      ans += '' + i + ': ' + resultList[s];
+      i += 1;
+    } else {
+      ans += ', ' + resultList[s] + '\n';
+    }
+  }
+
+  if (lastResult.length > 0) {
+    ans += '' + i + ': ' + lastResult + '\n';
+  }
+  return ans;
+}
+
+
+
+Module = {};
+
+let audioCtx;
+let mediaStream;
+
+let expectedSampleRate = 16000;
+let recordSampleRate;  // the sampleRate of the microphone
+let recorder = null;   // the microphone
+let leftchannel = [];  // TODO: Use a single channel
+
+let recordingLength = 0;  // number of samples so far
+
+let vad = null;
+let buffer = null;
+let recognizer = null;
+let printed = false;
+
+function fileExists(filename) {
+  const filenameLen = Module.lengthBytesUTF8(filename) + 1;
+  const buffer = Module._malloc(filenameLen);
+  Module.stringToUTF8(filename, buffer, filenameLen);
+
+  let exists = Module._SherpaOnnxFileExists(buffer);
+
+  Module._free(buffer);
+
+  return exists;
+}
+
+function createOfflineRecognizerSenseVoice() {}
+
+function initOfflineRecognizer() {
+  let config = {
+    modelConfig: {
+      debug: 1,
+      tokens: './tokens.txt',
+    },
+  };
+  if (fileExists('sense-voice.onnx') == 1) {
+    config.modelConfig.senseVoice = {
+      model: './sense-voice.onnx',
+      useInverseTextNormalization: 1,
+    };
+  } else if (fileExists('whisper-encoder.onnx')) {
+    config.modelConfig.whisper = {
+      encoder: './whisper-encoder.onnx',
+      decoder: './whisper-decoder.onnx',
+    };
+  } else if (fileExists('transducer-encoder.onnx')) {
+    config.modelConfig.transducer = {
+      encoder: './transducer-encoder.onnx',
+      decoder: './transducer-decoder.onnx',
+      joiner: './transducer-joiner.onnx',
+    };
+    config.modelConfig.modelType = 'transducer';
+  } else if (fileExists('nemo-transducer-encoder.onnx')) {
+    config.modelConfig.transducer = {
+      encoder: './nemo-transducer-encoder.onnx',
+      decoder: './nemo-transducer-decoder.onnx',
+      joiner: './nemo-transducer-joiner.onnx',
+    };
+    config.modelConfig.modelType = 'nemo_transducer';
+  } else if (fileExists('paraformer.onnx')) {
+    config.modelConfig.paraformer = {
+      model: './paraformer.onnx',
+    };
+  } else if (fileExists('telespeech.onnx')) {
+    config.modelConfig.telespeechCtc = './telespeech.onnx';
+  } else {
+    console.log('Please specify a model.');
+    alert('Please specify a model.');
+  }
+
+  recognizer = new OfflineRecognizer(config, Module);
+}
+
+Module.onRuntimeInitialized = function() {
+  console.log('inited!');
+  hint.innerText = 'Model loaded! Please click start';
+
+  startBtn.disabled = false;
+
+  vad = createVad(Module);
+  console.log('vad is created!', vad);
+
+  buffer = new CircularBuffer(30 * 16000, Module);
+  console.log('CircularBuffer is created!', buffer);
+
+  initOfflineRecognizer();
+};
+
+
+
+if (navigator.mediaDevices.getUserMedia) {
+  console.log('getUserMedia supported.');
+
+  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
+  const constraints = {audio: true};
+
+  let onSuccess = function(stream) {
+    if (!audioCtx) {
+      audioCtx = new AudioContext({sampleRate: expectedSampleRate});
+    }
+    console.log(audioCtx);
+    recordSampleRate = audioCtx.sampleRate;
+    console.log('sample rate ' + recordSampleRate);
+
+    // creates an audio node from the microphone incoming stream
+    mediaStream = audioCtx.createMediaStreamSource(stream);
+    console.log('media stream', mediaStream);
+
+    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
+    // bufferSize: the onaudioprocess event is called when the buffer is full
+    var bufferSize = 4096;
+    var numberOfInputChannels = 1;
+    var numberOfOutputChannels = 2;
+    if (audioCtx.createScriptProcessor) {
+      recorder = audioCtx.createScriptProcessor(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    } else {
+      recorder = audioCtx.createJavaScriptNode(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    }
+    console.log('recorder', recorder);
+
+    recorder.onaudioprocess = function(e) {
+      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
+      samples = downsampleBuffer(samples, expectedSampleRate);
+      buffer.push(samples);
+      while (buffer.size() > vad.config.sileroVad.windowSize) {
+        const s = buffer.get(buffer.head(), vad.config.sileroVad.windowSize);
+        vad.acceptWaveform(s);
+        buffer.pop(vad.config.sileroVad.windowSize);
+
+        if (vad.isDetected() && !printed) {
+          printed = true;
+          lastResult = 'Speech detected';
+        }
+
+        if (!vad.isDetected()) {
+          printed = false;
+          if (lastResult != '') {
+            resultList.push(lastResult);
+          }
+          lastResult = '';
+        }
+
+        while (!vad.isEmpty()) {
+          const segment = vad.front();
+          const duration = segment.samples.length / expectedSampleRate;
+          let durationStr = `Duration: ${duration.toFixed(3)} seconds`;
+          vad.pop();
+
+          // non-streaming asr
+          const stream = recognizer.createStream();
+          stream.acceptWaveform(expectedSampleRate, segment.samples);
+          recognizer.decode(stream);
+          let recognitionResult = recognizer.getResult(stream);
+          console.log(recognitionResult);
+          let text = recognitionResult.text;
+          stream.free();
+          console.log(text);
+
+          if (text != '') {
+            durationStr += `. Result: ${text}`;
+          }
+
+          resultList.push(durationStr);
+
+
+          // now save the segment to a wav file
+          let buf = new Int16Array(segment.samples.length);
+          for (var i = 0; i < segment.samples.length; ++i) {
+            let s = segment.samples[i];
+            if (s >= 1)
+              s = 1;
+            else if (s <= -1)
+              s = -1;
+
+            buf[i] = s * 32767;
+          }
+
+          let clipName = new Date().toISOString() + '--' + durationStr;
+
+          const clipContainer = document.createElement('article');
+          const clipLabel = document.createElement('p');
+          const audio = document.createElement('audio');
+          const deleteButton = document.createElement('button');
+
+          clipContainer.classList.add('clip');
+          audio.setAttribute('controls', '');
+          deleteButton.textContent = 'Delete';
+          deleteButton.className = 'delete';
+
+          clipLabel.textContent = clipName;
+
+          clipContainer.appendChild(audio);
+
+          clipContainer.appendChild(clipLabel);
+          clipContainer.appendChild(deleteButton);
+          soundClips.appendChild(clipContainer);
+
+          audio.controls = true;
+          const blob = toWav(buf);
+
+          leftchannel = [];
+          const audioURL = window.URL.createObjectURL(blob);
+          audio.src = audioURL;
+
+          deleteButton.onclick = function(e) {
+            let evtTgt = e.target;
+            evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
+          };
+
+          clipLabel.onclick = function() {
+            const existingName = clipLabel.textContent;
+            const newClipName = prompt('Enter a new name for your sound clip?');
+            if (newClipName === null) {
+              clipLabel.textContent = existingName;
+            } else {
+              clipLabel.textContent = newClipName;
+            }
+          };
+        }
+      }
+
+      textArea.value = getDisplayResult();
+      textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+    };
+
+    startBtn.onclick = function() {
+      mediaStream.connect(recorder);
+      recorder.connect(audioCtx.destination);
+
+      console.log('recorder started');
+
+      stopBtn.disabled = false;
+      startBtn.disabled = true;
+    };
+
+    stopBtn.onclick = function() {
+      vad.reset();
+      buffer.reset();
+      console.log('recorder stopped');
+
+      // stopBtn recording
+      recorder.disconnect(audioCtx.destination);
+      mediaStream.disconnect(recorder);
+
+      startBtn.style.background = '';
+      startBtn.style.color = '';
+      // mediaRecorder.requestData();
+
+      stopBtn.disabled = true;
+      startBtn.disabled = false;
+    };
+  };
+
+  let onError = function(err) {
+    console.log('The following error occured: ' + err);
+  };
+
+  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
+} else {
+  console.log('getUserMedia not supported on your browser!');
+  alert('getUserMedia not supported on your browser!');
+}
+
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function flatten(listOfSamples) {
+  let n = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    n += listOfSamples[i].length;
+  }
+  let ans = new Int16Array(n);
+
+  let offset = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    ans.set(listOfSamples[i], offset);
+    offset += listOfSamples[i].length;
+  }
+  return ans;
+}
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function toWav(samples) {
+  let buf = new ArrayBuffer(44 + samples.length * 2);
+  var view = new DataView(buf);
+
+  // http://soundfile.sapp.org/doc/WaveFormat/
+  //                   F F I R
+  view.setUint32(0, 0x46464952, true);               // chunkID
+  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
+  //                   E V A W
+  view.setUint32(8, 0x45564157, true);  // format
+                                        //
+  //                      t m f
+  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
+  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
+  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
+  view.setUint16(22, 1, true);                   // numChannels: 1 channel
+  view.setUint32(24, expectedSampleRate, true);  // sampleRate
+  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
+  view.setUint16(32, 2, true);                       // blockAlign
+  view.setUint16(34, 16, true);                      // bitsPerSample
+  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
+  view.setUint32(40, samples.length * 2, true);      // subchunk2Size
+
+  let offset = 44;
+  for (let i = 0; i < samples.length; ++i) {
+    view.setInt16(offset, samples[i], true);
+    offset += 2;
+  }
+
+  return new Blob([view], {type: 'audio/wav'});
+}
+
+// this function is copied from
+// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
+function downsampleBuffer(buffer, exportSampleRate) {
+  if (exportSampleRate === recordSampleRate) {
+    return buffer;
+  }
+  var sampleRateRatio = recordSampleRate / exportSampleRate;
+  var newLength = Math.round(buffer.length / sampleRateRatio);
+  var result = new Float32Array(newLength);
+  var offsetResult = 0;
+  var offsetBuffer = 0;
+  while (offsetResult < result.length) {
+    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
+    var accum = 0, count = 0;
+    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
+      accum += buffer[i];
+      count++;
+    }
+    result[offsetResult] = accum / count;
+    offsetResult++;
+    offsetBuffer = nextOffsetBuffer;
+  }
+  return result;
+};
--- a/wasm/vad-asr/assets/README.md
+++ b/wasm/vad-asr/assets/README.md
@@ -0,0 +1,23 @@
+# Introduction
+
+## Download VAD models
+
+Please download
+https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
+
+## Download non-streaming ASR models
+
+Please refer to
+https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+to download a non-streaming ASR model, i.e., an offline ASR model.
+
+After downloading, you should rename the model files.
+
+Please refer to
+https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-a-speech-recognition-model
+for how to rename.
+
+You can find example build scripts at the following address:
+
+  https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
--- a/wasm/vad-asr/index.html
+++ b/wasm/vad-asr/index.html
@@ -0,0 +1,43 @@
+<html lang="en">
+
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width" />
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD + ASR</title>
+  <style>
+    h1,div {
+      text-align: center;
+    }
+    textarea {
+      width:100%;
+    }
+  </style>
+</head>
+
+<body>
+  <h1>
+    Next-gen Kaldi + WebAssembly<br/>
+    VAD+ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
+    (with Zipformer)
+  </h1>
+
+  <div>
+    <span id="hint">Loading model ... ...</span>
+    <br/>
+    <br/>
+    <button id="startBtn" disabled>Start</button>
+    <button id="stopBtn" disabled>Stop</button>
+    <button id="clearBtn">Clear</button>
+    <br/>
+    <br/>
+    <textarea id="results" rows="10" readonly></textarea>
+  </div>
+
+  <section flex="1" overflow="auto" id="sound-clips">
+  </section>
+
+  <script src="sherpa-onnx-asr.js"></script>
+  <script src="sherpa-onnx-vad.js"></script>
+  <script src="app-vad-asr.js"></script>
+  <script src="sherpa-onnx-wasm-main-vad-asr.js"></script>
+</body>
--- a/wasm/vad-asr/sherpa-onnx-asr.js
+++ b/wasm/vad-asr/sherpa-onnx-asr.js
@@ -0,0 +1 @@
+../asr/sherpa-onnx-asr.js
--- a/wasm/vad-asr/sherpa-onnx-vad.js
+++ b/wasm/vad-asr/sherpa-onnx-vad.js
@@ -0,0 +1 @@
+../vad/sherpa-onnx-vad.js
--- a/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc
+++ b/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc
@@ -0,0 +1,19 @@
+// wasm/sherpa-onnx-wasm-main-vad-asr.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <stdio.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+// see also
+// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html
+
+extern "C" {
+
+void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
+  std::copy(src, src + num_bytes, dst);
+}
+}
--- a/wasm/vad/assets/README.md
+++ b/wasm/vad/assets/README.md
@@ -3,3 +3,6 @@
 Please download
 https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
 and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
+
+You can find example build script at
+https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
--- a/wasm/vad/index.html
+++ b/wasm/vad/index.html
@@ -3,7 +3,7 @@
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
-  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD</title>
  <style>
    h1,div {
      text-align: center;
--- a/wasm/vad/sherpa-onnx-vad.js
+++ b/wasm/vad/sherpa-onnx-vad.js
@@ -172,7 +172,6 @@ class Vad {
  constructor(configObj, Module) {
    this.config = configObj;
    const config = initSherpaOnnxVadModelConfig(configObj, Module);
-    Module._MyPrint(config.ptr);
    const handle = Module._SherpaOnnxCreateVoiceActivityDetector(
        config.ptr, configObj.bufferSizeInSeconds || 30);
    freeConfig(config, Module);