WebAssembly example for VAD + Non-streaming ASR (#1284)

2024-08-24 13:24:52 +08:00
parent 1ef8a7a202
commit 537e163dd0
29 changed files with 1281 additions and 70 deletions
--- a/.github/workflows/wasm-simd-hf-space-de-tts.yaml
+++ b/.github/workflows/wasm-simd-hf-space-de-tts.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.51
          actions-cache-folder: 'emsdk-cache'
      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
@@ -27,6 +27,9 @@ jobs:
          fetch-depth: 0
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.51
          actions-cache-folder: 'emsdk-cache'
      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-en-tts.yaml
+++ b/.github/workflows/wasm-simd-hf-space-en-tts.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.51
          actions-cache-folder: 'emsdk-cache'
      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
+++ b/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
@@ -25,6 +25,7 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
--- a/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
+++ b/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
@@ -0,0 +1,93 @@
 name: wasm-simd-hf-space-vad-asr
 on:
  push:
    branches:
      - wasm
    tags:
      - 'v[0-9]+.[0-9]+.[0-9]+*'
  workflow_dispatch:
 concurrency:
  group: wasm-simd-hf-space-vad-asr${{ github.ref }}
  cancel-in-progress: true
 jobs:
  wasm-simd-hf-space-vad-asr:
    name: ${{ matrix.index }}/${{ matrix.total }}
    runs-on: ${{ matrix.os }}
    strategy:
      fail-fast: false
      matrix:
        os: [ubuntu-latest]
        total: ["8"]
        index: ["0", "1", "2", "3", "4", "5", "6", "7"]
    steps:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Install Python dependencies
        shell: bash
        run: |
          python3 -m pip install --upgrade pip jinja2
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.51
          actions-cache-folder: 'emsdk-cache'
      - name: View emsdk version
        shell: bash
        run: |
          emcc -v
          echo "--------------------"
          emcc --check
      - name: Generate build script
        shell: bash
        run: |
          cd scripts/wasm
          total=${{ matrix.total }}
          index=${{ matrix.index }}
          ./generate-vad-asr.py --total $total --index $index
          chmod +x run-vad-asr.sh
          mv -v ./run-vad-asr.sh ../..
      - name: Show build scripts
        shell: bash
        run: |
          cat ./run-vad-asr.sh
      - uses: actions/upload-artifact@v4
        with:
          name: run-vad-asr-${{ matrix.index }}
          path: ./run-vad-asr.sh
      - name: Build sherpa-onnx for WebAssembly
        shell: bash
        env:
          MS_TOKEN: ${{ secrets.MODEL_SCOPE_GIT_TOKEN }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
        run: |
          ./run-vad-asr.sh
      - name: Release jar
        if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
        uses: svenstaro/upload-release-action@v2
        with:
          file_glob: true
          overwrite: true
          file: ./*.tar.bz2
      - name: Upload wasm files
        uses: actions/upload-artifact@v4
        with:
          name: sherpa-onnx-wasm-simd-vad-asr-${{ matrix.index }}
          path: ./sherpa-onnx-wasm-simd-*.tar.bz2
--- a/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.51
          actions-cache-folder: 'emsdk-cache'
      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.51
          actions-cache-folder: 'emsdk-cache'
      - name: View emsdk version
        shell: bash
--- a/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
+++ b/.github/workflows/wasm-simd-hf-space-zh-en-asr-zipformer.yaml
@@ -25,8 +25,12 @@ jobs:
      - uses: actions/checkout@v4
        with:
          fetch-depth: 0
      - name: Install emsdk
        uses: mymindstorm/setup-emsdk@v14
        with:
          version: 3.1.51
          actions-cache-folder: 'emsdk-cache'
      - name: View emsdk version
        shell: bash
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -36,6 +36,7 @@ option(SHERPA_ONNX_ENABLE_WASM_TTS "Whether to enable WASM for TTS" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_ASR "Whether to enable WASM for ASR" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_KWS "Whether to enable WASM for KWS" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_VAD "Whether to enable WASM for VAD" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_VAD_ASR "Whether to enable WASM for VAD+ASR" OFF)
 option(SHERPA_ONNX_ENABLE_WASM_NODEJS "Whether to enable WASM for NodeJS" OFF)
 option(SHERPA_ONNX_ENABLE_BINARY "Whether to build binaries" ON)
 option(SHERPA_ONNX_ENABLE_TTS "Whether to build TTS related code" ON)
@@ -137,6 +138,7 @@ message(STATUS "SHERPA_ONNX_ENABLE_WASM_TTS ${SHERPA_ONNX_ENABLE_WASM_TTS}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_ASR ${SHERPA_ONNX_ENABLE_WASM_ASR}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_KWS ${SHERPA_ONNX_ENABLE_WASM_KWS}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD ${SHERPA_ONNX_ENABLE_WASM_VAD}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_VAD_ASR ${SHERPA_ONNX_ENABLE_WASM_VAD_ASR}")
 message(STATUS "SHERPA_ONNX_ENABLE_WASM_NODEJS ${SHERPA_ONNX_ENABLE_WASM_NODEJS}")
 message(STATUS "SHERPA_ONNX_ENABLE_BINARY ${SHERPA_ONNX_ENABLE_BINARY}")
 message(STATUS "SHERPA_ONNX_ENABLE_TTS ${SHERPA_ONNX_ENABLE_TTS}")
@@ -211,11 +213,22 @@ if(SHERPA_ONNX_ENABLE_WASM)
 endif()
 if(SHERPA_ONNX_ENABLE_WASM_KWS)
  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for KWS")
  endif()
  add_definitions(-DSHERPA_ONNX_ENABLE_WASM_KWS=1)
 endif()
 if(SHERPA_ONNX_ENABLE_WASM_VAD)
-  add_definitions(-DSHERPA_ONNX_ENABLE_WASM_VAD=1)
+  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD")
  endif()
 endif()
 if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
  if(NOT SHERPA_ONNX_ENABLE_WASM)
    message(FATAL_ERROR "Please set SHERPA_ONNX_ENABLE_WASM to ON if you enable WASM for VAD+ASR")
  endif()
 endif()
 if(NOT CMAKE_CXX_STANDARD)
--- a/README.md
+++ b/README.md
@@ -14,13 +14,13 @@
 ### Supported platforms
-|Architecture| Android          | iOS           | Windows    | macOS | linux |
+|Architecture| Android | iOS     | Windows    | macOS | linux |
-|------------|------------------|---------------|------------|-------|-------|
+|------------|---------|---------|------------|-------|-------|
-|   x64      |  ✔️               |               |   ✔️        | ✔️     |  ✔️    |
+|   x64      |  ✔️      |         |   ✔️        | ✔️     |  ✔️    |
-|   x86      |  ✔️               |               |   ✔️        |       |       |
+|   x86      |  ✔️      |         |   ✔️        |       |       |
-|   arm64    |  ✔️               | ✔️             |   ✔️        | ✔️     |  ✔️    |
+|   arm64    |  ✔️      | ✔️       |   ✔️        | ✔️     |  ✔️    |
-|   arm32    |  ✔️               |               |            |       |  ✔️    |
+|   arm32    |  ✔️      |         |            |       |  ✔️    |
-|   riscv64  |                  |               |            |       |  ✔️    |
+|   riscv64  |         |         |            |       |  ✔️    |
 ### Supported programming languages
@@ -37,7 +37,7 @@
 |-------|----------|----------|------------|
 | ✔️     |  ✔️       |   ✔️      |    ✔️       |
-For Rust support, please see https://github.com/thewh1teagle/sherpa-rs
+For Rust support, please see [sherpa-rs][sherpa-rs]
 It also supports WebAssembly.
@@ -51,7 +51,7 @@ This repository supports running the following functions **locally**
  - Speaker verification
  - Spoken language identification
  - Audio tagging
-  - VAD (e.g., [silero-vad](https://github.com/snakers4/silero-vad))
+  - VAD (e.g., [silero-vad][silero-vad])
  - Keyword spotting
 on the following platforms and operating systems:
@@ -62,11 +62,12 @@ on the following platforms and operating systems:
  - iOS
  - NodeJS
  - WebAssembly
-  - [Raspberry Pi](https://www.raspberrypi.com/)
+  - [Raspberry Pi][Raspberry Pi]
-  - [RV1126](https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf)
+  - [RV1126][RV1126]
-  - [LicheePi4A](https://sipeed.com/licheepi4a)
+  - [LicheePi4A][LicheePi4A]
-  - [VisionFive 2](https://www.starfivetech.com/en/site/boards)
+  - [VisionFive 2][VisionFive 2]
-  - [旭日X3派](https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html)
+  - [旭日X3派][旭日X3派]
  - [爱芯派][爱芯派]
  - etc
 with the following APIs
@@ -81,59 +82,68 @@ with the following APIs
 You can visit the following Huggingface spaces to try `sherpa-onnx` without
 installing anything. All you need is a browser.
-| Description | URL |
+| Description                                           | URL                                |
-|---|---|
+|-------------------------------------------------------|------------------------------------|
-| Speech recognition | [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition)|
+| Speech recognition                                    | [Click me][hf-space-asr]           |
-| Speech recognition with [Whisper](https://github.com/openai/whisper)| [Click me](https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper)|
+| Speech recognition with [Whisper][Whisper]            | [Click me][hf-space-asr-whisper]   |
-| Speech synthesis | [Click me](https://huggingface.co/spaces/k2-fsa/text-to-speech)|
+| Speech synthesis                                      | [Click me][hf-space-tts]           |
-| Generate subtitles| [Click me](https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos)|
+| Generate subtitles                                    | [Click me][hf-space-subtitle]      |
-|Audio tagging| [Click me](https://huggingface.co/spaces/k2-fsa/audio-tagging)|
+| Audio tagging                                         | [Click me][hf-space-audio-tagging] |
-|Spoken language identification with [Whisper](https://github.com/openai/whisper)|[Click me](https://huggingface.co/spaces/k2-fsa/spoken-language-identification)|
+| Spoken language identification with [Whisper][Whisper]| [Click me][hf-space-slid-whisper]  |
 We also have spaces built using WebAssembly. The are listed below:
-| Description | URL| Chinese users|
+| Description                                                                              | Huggingface space| ModelScope space|
-|---|---|---|
+|------------------------------------------------------------------------------------------|------------------|-----------------|
-|Voice activity detection with [silero-vad](https://github.com/snakers4/silero-vad)| [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx)|[地址](https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx)|
+|Voice activity detection with [silero-vad][silero-vad]                                    | [Click me][wasm-hf-vad]|[地址][wasm-ms-vad]|
-|Real-time speech recognition (Chinese + English) with Zipformer | [Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en)|
+|Real-time speech recognition (Chinese + English) with Zipformer                           | [Click me][wasm-hf-streaming-asr-zh-en-zipformer]|[地址][wasm-hf-streaming-asr-zh-en-zipformer]|
-|Real-time speech recognition (Chinese + English) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer)|
+|Real-time speech recognition (Chinese + English) with Paraformer                          |[Click me][wasm-hf-streaming-asr-zh-en-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-paraformer]|
-|Real-time speech recognition (Chinese + English + Cantonese) with Paraformer|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer)|
+|Real-time speech recognition (Chinese + English + Cantonese) with [Paraformer-large][Paraformer-large]|[Click me][wasm-hf-streaming-asr-zh-en-yue-paraformer]| [地址][wasm-ms-streaming-asr-zh-en-yue-paraformer]|
-|Real-time speech recognition (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en)|[地址](https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en)|
+|Real-time speech recognition (English) |[Click me][wasm-hf-streaming-asr-en-zipformer]    |[地址][wasm-ms-streaming-asr-en-zipformer]|
-|Speech synthesis (English) |[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en)|
+|VAD + speech recognition (Chinese + English + Korean + Japanese + Cantonese) with [SenseVoice][SenseVoice]|[Click me][wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]| [地址][wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]|
-|Speech synthesis (German)|[Click me](https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de)| [地址](https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de)|
+|VAD + speech recognition (English) with [Whisper][Whisper] tiny.en|[Click me][wasm-hf-vad-asr-en-whisper-tiny-en]| [地址][wasm-ms-vad-asr-en-whisper-tiny-en]|
 |VAD + speech recognition (English) with Zipformer trained with [GigaSpeech][GigaSpeech]    |[Click me][wasm-hf-vad-asr-en-zipformer-gigaspeech]| [地址][wasm-ms-vad-asr-en-zipformer-gigaspeech]|
 |VAD + speech recognition (Chinese) with Zipformer trained with [WenetSpeech][WenetSpeech]  |[Click me][wasm-hf-vad-asr-zh-zipformer-wenetspeech]| [地址][wasm-ms-vad-asr-zh-zipformer-wenetspeech]|
 |VAD + speech recognition (Japanese) with Zipformer trained with [ReazonSpeech][ReazonSpeech]|[Click me][wasm-hf-vad-asr-ja-zipformer-reazonspeech]| [地址][wasm-ms-vad-asr-ja-zipformer-reazonspeech]|
 |VAD + speech recognition (Thai) with Zipformer trained with [GigaSpeech2][GigaSpeech2]      |[Click me][wasm-hf-vad-asr-th-zipformer-gigaspeech2]| [地址][wasm-ms-vad-asr-th-zipformer-gigaspeech2]|
 |VAD + speech recognition (Chinese 多种方言) with a [TeleSpeech-ASR][TeleSpeech-ASR] CTC model|[Click me][wasm-hf-vad-asr-zh-telespeech]| [地址][wasm-ms-vad-asr-zh-telespeech]|
 |VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-large          |[Click me][wasm-hf-vad-asr-zh-en-paraformer-large]| [地址][wasm-ms-vad-asr-zh-en-paraformer-large]|
 |VAD + speech recognition (English + Chinese, 及多种中文方言) with Paraformer-small          |[Click me][wasm-hf-vad-asr-zh-en-paraformer-small]| [地址][wasm-ms-vad-asr-zh-en-paraformer-small]|
 |Speech synthesis (English)                                                                  |[Click me][wasm-hf-tts-piper-en]| [地址][wasm-ms-tts-piper-en]|
 |Speech synthesis (German)                                                                   |[Click me][wasm-hf-tts-piper-de]| [地址][wasm-ms-tts-piper-de]|
 ### Links for pre-built Android APKs
-| Description                    | URL                                                                                     | 中国用户                                                                             |
+| Description                            | URL                          | 中国用户                    |
-|--------------------------------|-----------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------|
+|----------------------------------------|------------------------------|-----------------------------|
-| Streaming speech recognition             | [Address](https://k2-fsa.github.io/sherpa/onnx/android/apk.html)                        | [点此](https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html)                        |
+| Streaming speech recognition           | [Address][apk-streaming-asr] | [点此][apk-streaming-asr-cn]|
-| Text-to-speech | [Address](https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html)                     | [点此](https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html)                     |
+| Text-to-speech                         | [Address][apk-tts]           | [点此][apk-tts-cn]          |
-|Voice activity detection (VAD) | [Address](https://k2-fsa.github.io/sherpa/onnx/vad/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/vad/apk-cn.html)|
+| Voice activity detection (VAD)         | [Address][apk-vad]           | [点此][apk-vad-cn]          |
-|VAD + non-streaming speech recognition| [Address](https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr-cn.html)|
+| VAD + non-streaming speech recognition | [Address][apk-vad-asr]       | [点此][apk-vad-asr-cn]      |
-|Two-pass speech recognition| [Address](https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass-cn.html)|
+| Two-pass speech recognition            | [Address][apk-2pass]         | [点此][apk-2pass-cn]        |
-| Audio tagging                  | [Address](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html)                  | [点此](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-cn.html)                  |
+| Audio tagging                          | [Address][apk-at]            | [点此][apk-at-cn]           |
-| Audio tagging (WearOS)         | [Address](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html)           | [点此](https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos-cn.html)           |
+| Audio tagging (WearOS)                 | [Address][apk-at-wearos]     | [点此][apk-at-wearos-cn]    |
-| Speaker identification         | [Address](https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html)         | [点此](https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk-cn.html)         |
+| Speaker identification                 | [Address][apk-sid]           | [点此][apk-sid-cn]          |
-| Spoken language identification | [Address](https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk-cn.html) |
+| Spoken language identification         | [Address][apk-slid]          | [点此][apk-slid-cn]         |
-|Keyword spotting| [Address](https://k2-fsa.github.io/sherpa/onnx/kws/apk.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/kws/apk-cn.html)|
+| Keyword spotting                       | [Address][apk-kws]           | [点此][apk-kws-cn]          |
 ### Links for pre-built Flutter APPs
 #### Real-time speech recognition
-| Description                    | URL                                                                 | 中国用户                                                            |
+| Description                    | URL                                 | 中国用户                            |
-|--------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------|
+|--------------------------------|-------------------------------------|-------------------------------------|
-| Streaming speech recognition   | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app-cn.html)|
+| Streaming speech recognition   | [Address][apk-flutter-streaming-asr]| [点此][apk-flutter-streaming-asr-cn]|
 #### Text-to-speech
-| Description                    | URL                                                          | 中国用户                                                                    |
+| Description                              | URL                                | 中国用户                           |
-|--------------------------------|--------------------------------------------------------------|-----------------------------------------------------------------------------|
+|------------------------------------------|------------------------------------|------------------------------------|
-| Android (arm64-v8a, armeabi-v7a, x86_64) | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android-cn.html)|
+| Android (arm64-v8a, armeabi-v7a, x86_64) | [Address][flutter-tts-android]     | [点此][flutter-tts-android-cn]     |
-| Linux (x64)    | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux.html)       | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux-cn.html)      |
+| Linux (x64)                              | [Address][flutter-tts-linux]       | [点此][flutter-tts-linux-cn]       |
-| macOS (x64)    | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64.html)   | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64-cn.html)  |
+| macOS (x64)                              | [Address][flutter-tts-macos-x64]   | [点此][flutter-tts-macos-arm64-cn] |
-| macOS (arm64)  | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64.html) | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64-cn.html)|
+| macOS (arm64)                            | [Address][flutter-tts-macos-arm64] | [点此][flutter-tts-macos-x64-cn]   |
-| Windows (x64)  | [Address](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win.html)         | [点此](https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win-cn.html)        |
+| Windows (x64)                            | [Address][flutter-tts-win-x64]     | [点此][flutter-tts-win-x64-cn]     |
 > Note: You need to build from source for iOS.
@@ -141,23 +151,23 @@ We also have spaces built using WebAssembly. The are listed below:
 #### Generating subtitles
-| Description                    | URL                                                                 | 中国用户                                                            |
+| Description                    | URL                        | 中国用户                   |
-|--------------------------------|---------------------------------------------------------------------|---------------------------------------------------------------------|
+|--------------------------------|----------------------------|----------------------------|
-| Generate subtitles (生成字幕)   | [Address](https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html)| [点此](https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles-cn.html)|
+| Generate subtitles (生成字幕)  | [Address][lazarus-subtitle]| [点此][lazarus-subtitle-cn]|
 ### Links for pre-trained models
-| Description                    | URL                                                                                                                            |
+| Description                                 | URL                                                                                   |
-|--------------------------------|--------------------------------------------------------------------------------------------------------------------------------|
+|---------------------------------------------|---------------------------------------------------------------------------------------|
-| Speech recognition (speech to text, ASR)             | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models)              |
+| Speech recognition (speech to text, ASR)    | [Address][asr-models]                                                                 |
-| Text-to-speech (TTS)                 | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models)                             |
+| Text-to-speech (TTS)                        | [Address][tts-models]                                                                 |
-| VAD | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx)|
+| VAD                                         | [Address][vad-models]                                                                 |
-| Keyword spotting |[Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models)|
+| Keyword spotting                            | [Address][kws-models]                                                                 |
-| Audio tagging                  | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models)|
+| Audio tagging                               | [Address][at-models]                                                                  |
-| Speaker identification (Speaker ID)         | [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models)|
+| Speaker identification (Speaker ID)         | [Address][sid-models]                                                                 |
-| Spoken language identification (Language ID) | See multi-lingual [Whisper](https://github.com/openai/whisper) ASR models from  [Speech recognition](https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models) |
+| Spoken language identification (Language ID)| See multi-lingual [Whisper][Whisper] ASR models from  [Speech recognition][asr-models]|
-| Punctuation| [Address](https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models)|
+| Punctuation                                 | [Address][punct-models]                                                               |
 ### Useful links
@@ -169,3 +179,100 @@ We also have spaces built using WebAssembly. The are listed below:
 Please see
 https://k2-fsa.github.io/sherpa/social-groups.html
 for 新一代 Kaldi **微信交流群** and **QQ 交流群**.
 [sherpa-rs]: https://github.com/thewh1teagle/sherpa-rs
 [silero-vad]: https://github.com/snakers4/silero-vad
 [Raspberry Pi]: https://www.raspberrypi.com/
 [RV1126]: https://www.rock-chips.com/uploads/pdf/2022.8.26/191/RV1126%20Brief%20Datasheet.pdf
 [LicheePi4A]: https://sipeed.com/licheepi4a
 [VisionFive 2]: https://www.starfivetech.com/en/site/boards
 [旭日X3派]: https://developer.horizon.ai/api/v1/fileData/documents_pi/index.html
 [爱芯派]: https://wiki.sipeed.com/hardware/zh/maixIII/ax-pi/axpi.html
 [hf-space-asr]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition
 [Whisper]: https://github.com/openai/whisper
 [hf-space-asr-whisper]: https://huggingface.co/spaces/k2-fsa/automatic-speech-recognition-with-whisper
 [hf-space-tts]: https://huggingface.co/spaces/k2-fsa/text-to-speech
 [hf-space-subtitle]: https://huggingface.co/spaces/k2-fsa/generate-subtitles-for-videos
 [hf-space-audio-tagging]: https://huggingface.co/spaces/k2-fsa/audio-tagging
 [hf-space-slid-whisper]: https://huggingface.co/spaces/k2-fsa/spoken-language-identification
 [wasm-hf-vad]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-sherpa-onnx
 [wasm-ms-vad]: https://modelscope.cn/studios/csukuangfj/web-assembly-vad-sherpa-onnx
 [wasm-hf-streaming-asr-zh-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en
 [wasm-ms-streaming-asr-zh-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en
 [wasm-hf-streaming-asr-zh-en-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
 [wasm-ms-streaming-asr-zh-en-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-en-paraformer
 [Paraformer-large]: https://www.modelscope.cn/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-pytorch/summary
 [wasm-hf-streaming-asr-zh-en-yue-paraformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer
 [wasm-ms-streaming-asr-zh-en-yue-paraformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-zh-cantonese-en-paraformer
 [wasm-hf-streaming-asr-en-zipformer]: https://huggingface.co/spaces/k2-fsa/web-assembly-asr-sherpa-onnx-en
 [wasm-ms-streaming-asr-en-zipformer]: https://modelscope.cn/studios/k2-fsa/web-assembly-asr-sherpa-onnx-en
 [SenseVoice]: https://github.com/FunAudioLLM/SenseVoice
 [wasm-hf-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice
 [wasm-ms-vad-asr-zh-en-ko-ja-yue-sense-voice]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice
 [wasm-hf-vad-asr-en-whisper-tiny-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
 [wasm-ms-vad-asr-en-whisper-tiny-en]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny
 [wasm-hf-vad-asr-en-zipformer-gigaspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
 [wasm-ms-vad-asr-en-zipformer-gigaspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech
 [wasm-hf-vad-asr-zh-zipformer-wenetspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
 [wasm-ms-vad-asr-zh-zipformer-wenetspeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech
 [ReazonSpeech]: https://research.reazon.jp/_static/reazonspeech_nlp2023.pdf
 [wasm-hf-vad-asr-ja-zipformer-reazonspeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
 [wasm-ms-vad-asr-ja-zipformer-reazonspeech]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer
 [GigaSpeech2]: https://github.com/SpeechColab/GigaSpeech2
 [wasm-hf-vad-asr-th-zipformer-gigaspeech2]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer
 [wasm-ms-vad-asr-th-zipformer-gigaspeech2]: https://www.modelscope.cn/studios/csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer
 [TeleSpeech-ASR]: https://github.com/Tele-AI/TeleSpeech-ASR
 [wasm-hf-vad-asr-zh-telespeech]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
 [wasm-ms-vad-asr-zh-telespeech]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech
 [wasm-hf-vad-asr-zh-en-paraformer-large]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
 [wasm-ms-vad-asr-zh-en-paraformer-large]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer
 [wasm-hf-vad-asr-zh-en-paraformer-small]: https://huggingface.co/spaces/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
 [wasm-ms-vad-asr-zh-en-paraformer-small]: https://www.modelscope.cn/studios/k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small
 [wasm-hf-tts-piper-en]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-en
 [wasm-ms-tts-piper-en]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-en
 [wasm-hf-tts-piper-de]: https://huggingface.co/spaces/k2-fsa/web-assembly-tts-sherpa-onnx-de
 [wasm-ms-tts-piper-de]: https://modelscope.cn/studios/k2-fsa/web-assembly-tts-sherpa-onnx-de
 [apk-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/android/apk.html
 [apk-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-cn.html
 [apk-tts]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine.html
 [apk-tts-cn]: https://k2-fsa.github.io/sherpa/onnx/tts/apk-engine-cn.html
 [apk-vad]: https://k2-fsa.github.io/sherpa/onnx/vad/apk.html
 [apk-vad-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-cn.html
 [apk-vad-asr]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr.html
 [apk-vad-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/vad/apk-asr-cn.html
 [apk-2pass]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass.html
 [apk-2pass-cn]: https://k2-fsa.github.io/sherpa/onnx/android/apk-2pass-cn.html
 [apk-at]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk.html
 [apk-at-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-cn.html
 [apk-at-wearos]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos.html
 [apk-at-wearos-cn]: https://k2-fsa.github.io/sherpa/onnx/audio-tagging/apk-wearos-cn.html
 [apk-sid]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk.html
 [apk-sid-cn]: https://k2-fsa.github.io/sherpa/onnx/speaker-identification/apk-cn.html
 [apk-slid]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk.html
 [apk-slid-cn]: https://k2-fsa.github.io/sherpa/onnx/spoken-language-identification/apk-cn.html
 [apk-kws]: https://k2-fsa.github.io/sherpa/onnx/kws/apk.html
 [apk-kws-cn]: https://k2-fsa.github.io/sherpa/onnx/kws/apk-cn.html
 [apk-flutter-streaming-asr]: https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app.html
 [apk-flutter-streaming-asr-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/asr/app-cn.html
 [flutter-tts-android]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android.html
 [flutter-tts-android-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-android-cn.html
 [flutter-tts-linux]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux.html
 [flutter-tts-linux-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-linux-cn.html
 [flutter-tts-macos-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64.html
 [flutter-tts-macos-arm64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-x64-cn.html
 [flutter-tts-macos-arm64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64.html
 [flutter-tts-macos-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-macos-arm64-cn.html
 [flutter-tts-win-x64]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win.html
 [flutter-tts-win-x64-cn]: https://k2-fsa.github.io/sherpa/onnx/flutter/tts-win-cn.html
 [lazarus-subtitle]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles.html
 [lazarus-subtitle-cn]: https://k2-fsa.github.io/sherpa/onnx/lazarus/download-generated-subtitles-cn.html
 [asr-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
 [tts-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
 [vad-models]: https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
 [kws-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/kws-models
 [at-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/audio-tagging-models
 [sid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
 [slid-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
 [punct-models]: https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models
 [GigaSpeech]: https://github.com/SpeechColab/GigaSpeech
 [WenetSpeech]: https://github.com/wenet-e2e/WenetSpeech
--- a/build-wasm-simd-vad-asr.sh
+++ b/build-wasm-simd-vad-asr.sh
@@ -0,0 +1,68 @@
 #!/usr/bin/env bash
 # Copyright (c)  2024  Xiaomi Corporation
 #
 # This script is to build sherpa-onnx for WebAssembly (VAD+ASR)
 # Note: ASR here means non-streaming ASR
 set -ex
 if [ x"$EMSCRIPTEN" == x"" ]; then
  if ! command -v emcc &> /dev/null; then
    echo "Please install emscripten first"
    echo ""
    echo "You can use the following commands to install it:"
    echo ""
    echo "git clone https://github.com/emscripten-core/emsdk.git"
    echo "cd emsdk"
    echo "git pull"
    echo "./emsdk install latest"
    echo "./emsdk activate latest"
    echo "source ./emsdk_env.sh"
    exit 1
  else
    EMSCRIPTEN=$(dirname $(realpath $(which emcc)))
  fi
 fi
 export EMSCRIPTEN=$EMSCRIPTEN
 echo "EMSCRIPTEN: $EMSCRIPTEN"
 if [ ! -f $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake ]; then
  echo "Cannot find $EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake"
  echo "Please make sure you have installed emsdk correctly"
  exit 1
 fi
 mkdir -p build-wasm-simd-vad-asr
 pushd build-wasm-simd-vad-asr
 export SHERPA_ONNX_IS_USING_BUILD_WASM_SH=ON
 cmake \
  -DCMAKE_INSTALL_PREFIX=./install \
  -DCMAKE_BUILD_TYPE=Release \
  -DCMAKE_TOOLCHAIN_FILE=$EMSCRIPTEN/cmake/Modules/Platform/Emscripten.cmake \
  \
  -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
  -DSHERPA_ONNX_ENABLE_TESTS=OFF \
  -DSHERPA_ONNX_ENABLE_CHECK=OFF \
  -DBUILD_SHARED_LIBS=OFF \
  -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
  -DSHERPA_ONNX_ENABLE_JNI=OFF \
  -DSHERPA_ONNX_ENABLE_TTS=OFF \
  -DSHERPA_ONNX_ENABLE_C_API=ON \
  -DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
  -DSHERPA_ONNX_ENABLE_GPU=OFF \
  -DSHERPA_ONNX_ENABLE_WASM=ON \
  -DSHERPA_ONNX_ENABLE_WASM_VAD_ASR=ON \
  -DSHERPA_ONNX_ENABLE_BINARY=OFF \
  -DSHERPA_ONNX_LINK_LIBSTDCPP_STATICALLY=OFF \
  ..
 make -j2
 make install
 echo "pwd: $PWD"
 cp -fv ../wasm/vad/sherpa-onnx-vad.js ./install/bin/wasm/vad-asr/
 cp -fv ../wasm/asr/sherpa-onnx-asr.js ./install/bin/wasm/vad-asr/
 ls -lh install/bin/wasm/vad-asr
--- a/scripts/wasm/generate-vad-asr.py
+++ b/scripts/wasm/generate-vad-asr.py
@@ -0,0 +1,229 @@
 #!/usr/bin/env python3
 import argparse
 from dataclasses import dataclass
 from typing import List, Optional
 import jinja2
 def get_args():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--total",
        type=int,
        default=1,
        help="Number of runners",
    )
    parser.add_argument(
        "--index",
        type=int,
        default=0,
        help="Index of the current runner",
    )
    return parser.parse_args()
@dataclass
 class Model:
    model_name: str
    hf: str  # huggingface space name
    ms: str  # modelscope space name
    short_name: str
    cmd: str = ""
 def get_models():
    models = [
        Model(
            model_name="sherpa-onnx-whisper-tiny.en",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-en-whisper-tiny",
            short_name="vad-asr-en-whisper_tiny",
            cmd="""
            pushd $model_name
            mv -v tiny.en-encoder.int8.onnx ../whisper-encoder.onnx
            mv -v tiny.en-decoder.int8.onnx ../whisper-decoder.onnx
            mv -v tiny.en-tokens.txt ../tokens.txt
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Whisper tiny.en supporting English 英文/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-ja-ko-cantonese-sense-voice",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-zh-en-jp-ko-cantonese-sense-voice",
            short_name="vad-asr-zh_en_ja_ko_cantonese-sense_voice_small",
            cmd="""
            pushd $model_name
            mv -v model.int8.onnx ../sense-voice.onnx
            mv -v tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/SenseVoice Small supporting English, Chinese, Japanese, Korean, Cantonese 中英日韩粤/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-2023-09-14",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer",
            short_name="vad-asr-zh_en-paraformer_large",
            cmd="""
            pushd $model_name
            mv -v model.int8.onnx ../paraformer.onnx
            mv -v tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Paraformer supporting Chinese, English 中英/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-paraformer-zh-small-2024-03-09",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-en-paraformer-small",
            short_name="vad-asr-zh_en-paraformer_small",
            cmd="""
            pushd $model_name
            mv -v model.int8.onnx ../paraformer.onnx
            mv -v tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Paraformer-small supporting Chinese, English 中英文/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-gigaspeech-2023-12-12",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-en-zipformer-gigaspeech",
            short_name="vad-asr-en-zipformer_gigaspeech",
            cmd="""
            pushd $model_name
            mv encoder-epoch-30-avg-1.int8.onnx ../transducer-encoder.onnx
            mv decoder-epoch-30-avg-1.onnx ../transducer-decoder.onnx
            mv joiner-epoch-30-avg-1.int8.onnx ../transducer-joiner.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Zipformer supporting English 英语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="icefall-asr-zipformer-wenetspeech-20230615",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-zipformer-wenetspeech",
            short_name="vad-asr-zh-zipformer_wenetspeech",
            cmd="""
            pushd $model_name
            mv -v data/lang_char/tokens.txt ../
            mv -v exp/encoder-epoch-12-avg-4.int8.onnx ../transducer-encoder.onnx
            mv -v exp/decoder-epoch-12-avg-4.onnx ../transducer-decoder.onnx
            mv -v exp/joiner-epoch-12-avg-4.int8.onnx ../transducer-joiner.onnx
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Zipformer supporting Chinese 中文/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-ja-zipformer",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-ja-zipformer",
            short_name="vad-asr-ja-zipformer_reazonspeech",
            cmd="""
            pushd $model_name
            mv encoder-epoch-99-avg-1.int8.onnx ../transducer-encoder.onnx
            mv decoder-epoch-99-avg-1.onnx ../transducer-decoder.onnx
            mv joiner-epoch-99-avg-1.int8.onnx ../transducer-joiner.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Zipformer supporting Japanese 日语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-zipformer-thai-2024-06-20",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-th-zipformer",
            ms="csukuangfj/web-assembly-vad-asr-sherpa-onnx-th-zipformer",
            short_name="vad-asr-th-zipformer_gigaspeech2",
            cmd="""
            pushd $model_name
            mv encoder-epoch-12-avg-5.int8.onnx ../transducer-encoder.onnx
            mv decoder-epoch-12-avg-5.onnx ../transducer-decoder.onnx
            mv joiner-epoch-12-avg-5.int8.onnx ../transducer-joiner.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/Zipformer supporting Thai 泰语/g' ../index.html
            git diff
            """,
        ),
        Model(
            model_name="sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04",
            hf="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech",
            ms="k2-fsa/web-assembly-vad-asr-sherpa-onnx-zh-telespeech",
            short_name="vad-asr-zh-telespeech",
            cmd="""
            pushd $model_name
            mv model.int8.onnx ../telespeech.onnx
            mv tokens.txt ../
            popd
            rm -rf $model_name
            sed -i.bak 's/Zipformer/TeleSpeech-ASR supporting Chinese 多种中文方言/g' ../index.html
            git diff
            """,
        ),
    ]
    return models
 def main():
    args = get_args()
    index = args.index
    total = args.total
    assert 0 <= index < total, (index, total)
    all_model_list = get_models()
    num_models = len(all_model_list)
    num_per_runner = num_models // total
    if num_per_runner <= 0:
        raise ValueError(f"num_models: {num_models}, num_runners: {total}")
    start = index * num_per_runner
    end = start + num_per_runner
    remaining = num_models - args.total * num_per_runner
    print(f"{index}/{total}: {start}-{end}/{num_models}")
    d = dict()
    d["model_list"] = all_model_list[start:end]
    if index < remaining:
        s = args.total * num_per_runner + index
        d["model_list"].append(all_model_list[s])
        print(f"{s}/{num_models}")
    filename_list = [
        "./run-vad-asr.sh",
    ]
    for filename in filename_list:
        environment = jinja2.Environment()
        with open(f"{filename}.in") as f:
            s = f.read()
        template = environment.from_string(s)
        s = template.render(**d)
        with open(filename, "w") as f:
            print(s, file=f)
 if __name__ == "__main__":
    main()
--- a/scripts/wasm/run-vad-asr.sh.in
+++ b/scripts/wasm/run-vad-asr.sh.in
@@ -0,0 +1,92 @@
 #!/usr/bin/env bash
 #
 # Build WebAssembly APPs for huggingface spaces and modelscope spaces
 set -ex
 log() {
  # This function is from espnet
  local fname=${BASH_SOURCE[1]##*/}
  echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
 }
 SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt  | cut -d " " -f 2  | cut -d '"' -f 2)
 {% for model in model_list %}
 model_name={{ model.model_name }}
 short_name={{ model.short_name }}
 hf_name={{ model.hf }}
 ms_name={{ model.ms }}
 pushd wasm/vad-asr
 git checkout .
 rm -rf assets
 mkdir assets
 cd assets
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/${model_name}.tar.bz2
 tar xvf ${model_name}.tar.bz2
 rm ${model_name}.tar.bz2
 {{ model.cmd }}
 popd
 ls -lh wasm/vad-asr/assets
 rm -rf build-wasm-simd-vad-asr/install
 rm -rf build-wasm-simd-vad-asr/wasm
 ./build-wasm-simd-vad-asr.sh
 dst=sherpa-onnx-wasm-simd-${SHERPA_ONNX_VERSION}-${short_name}
 mv build-wasm-simd-vad-asr/install/bin/wasm/vad-asr $dst
 ls -lh $dst
 tar cjfv $dst.tar.bz2 ./$dst
 ls -lh *.tar.bz2
 git config --global user.email "csukuangfj@gmail.com"
 git config --global user.name "Fangjun Kuang"
 export GIT_LFS_SKIP_SMUDGE=1
 export GIT_CLONE_PROTECTION_ACTIVE=false
 rm -rf ms
 git clone https://www.modelscope.cn/studios/$ms_name.git ms
 cd ms
 cp -v ../$dst/* .
 git status
 git lfs track "*.data"
 git lfs track "*.wasm"
 ls -lh
 git add .
 git commit -m "update model"
 git push https://oauth2:${MS_TOKEN}@www.modelscope.cn/studios/$ms_name.git
 cd ..
 rm -rf ms
 rm -rf huggingface
 git clone https://huggingface.co/spaces/$hf_name huggingface
 cd huggingface
 cp -v ../$dst/* .
 git status
 git lfs track "*.data"
 git lfs track "*.wasm"
 ls -lh
 git add .
 git commit -m "update model"
 git push https://csukuangfj:$HF_TOKEN@huggingface.co/spaces/$hf_name main
 cd ..
 rm -rf huggingface
 rm -rf $dst
 ls -lh *.tar.bz2
 {% endfor %}
--- a/sherpa-onnx/c-api/c-api.cc
+++ b/sherpa-onnx/c-api/c-api.cc
@@ -13,6 +13,7 @@
 #include "sherpa-onnx/csrc/audio-tagging.h"
 #include "sherpa-onnx/csrc/circular-buffer.h"
 #include "sherpa-onnx/csrc/display.h"
 #include "sherpa-onnx/csrc/file-utils.h"
 #include "sherpa-onnx/csrc/keyword-spotter.h"
 #include "sherpa-onnx/csrc/macros.h"
 #include "sherpa-onnx/csrc/offline-punctuation.h"
@@ -1638,3 +1639,7 @@ int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
 void SherpaOnnxLinearResamplerReset(SherpaOnnxLinearResampler *p) {
  p->impl->Reset();
 }
 int32_t SherpaOnnxFileExists(const char *filename) {
  return sherpa_onnx::FileExists(filename);
 }
--- a/sherpa-onnx/c-api/c-api.h
+++ b/sherpa-onnx/c-api/c-api.h
@@ -1361,6 +1361,9 @@ SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetInputSampleRate(
 SHERPA_ONNX_API int32_t SherpaOnnxLinearResamplerResampleGetOutputSampleRate(
    const SherpaOnnxLinearResampler *p);
 // Return 1 if the file exists; return 0 if the file does not exist.
 SHERPA_ONNX_API int32_t SherpaOnnxFileExists(const char *filename);
 #if defined(__GNUC__)
 #pragma GCC diagnostic pop
 #endif
--- a/wasm/CMakeLists.txt
+++ b/wasm/CMakeLists.txt
@@ -14,6 +14,10 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD)
  add_subdirectory(vad)
 endif()
 if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
  add_subdirectory(vad-asr)
 endif()
 if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
  add_subdirectory(nodejs)
 endif()
--- a/wasm/asr/assets/README.md
+++ b/wasm/asr/assets/README.md
@@ -80,3 +80,10 @@ assets fangjun$ tree -L 1
 0 directories, 4 files
 ```
 You can find example build scripts at:
  - Streaming Zipformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/ wasm-simd-hf-space-zh-en-asr-zipformer.yaml
  - Streaming Zipformer (English): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
  - Streaming Paraformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
  - Streaming Paraformer (English + Chinese + Cantonese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
--- a/wasm/asr/index.html
+++ b/wasm/asr/index.html
@@ -3,7 +3,7 @@
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
-  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for ASR</title>
  <style>
    h1,div {
      text-align: center;
--- a/wasm/tts/assets/README.md
+++ b/wasm/tts/assets/README.md
@@ -30,3 +30,8 @@ assets fangjun$ tree -L 1
 1 directory, 3 files
 ```
 You can find example build scripts at:
  - English TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-tts.yaml
  - German TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-de-tts.yaml
--- a/wasm/vad-asr/CMakeLists.txt
+++ b/wasm/vad-asr/CMakeLists.txt
@@ -0,0 +1,83 @@
 if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
  message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD")
 endif()
 if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx" OR NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/tokens.txt")
  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
 endif()
 set(exported_functions
  # VAD
  SherpaOnnxCreateCircularBuffer
  SherpaOnnxDestroyCircularBuffer
  SherpaOnnxCircularBufferPush
  SherpaOnnxCircularBufferGet
  SherpaOnnxCircularBufferFree
  SherpaOnnxCircularBufferPop
  SherpaOnnxCircularBufferSize
  SherpaOnnxCircularBufferHead
  SherpaOnnxCircularBufferReset
  SherpaOnnxCreateVoiceActivityDetector
  SherpaOnnxDestroyVoiceActivityDetector
  SherpaOnnxVoiceActivityDetectorAcceptWaveform
  SherpaOnnxVoiceActivityDetectorEmpty
  SherpaOnnxVoiceActivityDetectorDetected
  SherpaOnnxVoiceActivityDetectorPop
  SherpaOnnxVoiceActivityDetectorClear
  SherpaOnnxVoiceActivityDetectorFront
  SherpaOnnxDestroySpeechSegment
  SherpaOnnxVoiceActivityDetectorReset
  SherpaOnnxVoiceActivityDetectorFlush
  # non-streaming ASR
  SherpaOnnxAcceptWaveformOffline
  SherpaOnnxCreateOfflineRecognizer
  SherpaOnnxCreateOfflineStream
  SherpaOnnxDecodeMultipleOfflineStreams
  SherpaOnnxDecodeOfflineStream
  SherpaOnnxDestroyOfflineRecognizer
  SherpaOnnxDestroyOfflineRecognizerResult
  SherpaOnnxDestroyOfflineStream
  SherpaOnnxDestroyOfflineStreamResultJson
  SherpaOnnxGetOfflineStreamResult
  SherpaOnnxGetOfflineStreamResultAsJson
  #
  SherpaOnnxFileExists
 )
 set(mangled_exported_functions)
 foreach(x IN LISTS exported_functions)
  list(APPEND mangled_exported_functions "_${x}")
 endforeach()
 list(JOIN mangled_exported_functions "," all_exported_functions)
 include_directories(${CMAKE_SOURCE_DIR})
 set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
 string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
 string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
 string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
 string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
 message(STATUS "MY_FLAGS: ${MY_FLAGS}")
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
 set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
 if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
  message(FATAL_ERROR "The default suffix for building executables should be .js!")
 endif()
 # set(CMAKE_EXECUTABLE_SUFFIX ".html")
 add_executable(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-wasm-main-vad-asr.cc)
 target_link_libraries(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-c-api)
 install(TARGETS sherpa-onnx-wasm-main-vad-asr DESTINATION bin/wasm/vad-asr)
 install(
  FILES
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.js"
    "index.html"
    "app-vad-asr.js"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.wasm"
    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.data"
  DESTINATION
    bin/wasm/vad-asr
 )
--- a/wasm/vad-asr/app-vad-asr.js
+++ b/wasm/vad-asr/app-vad-asr.js
@@ -0,0 +1,389 @@
 // This file copies and modifies code
 // from https://mdn.github.io/web-dictaphone/scripts/app.js
 // and https://gist.github.com/meziantou/edb7217fddfbb70e899e
 const startBtn = document.getElementById('startBtn');
 const stopBtn = document.getElementById('stopBtn');
 const clearBtn = document.getElementById('clearBtn');
 const hint = document.getElementById('hint');
 const soundClips = document.getElementById('sound-clips');
 let textArea = document.getElementById('results');
 let lastResult = '';
 let resultList = [];
 clearBtn.onclick = function() {
  resultList = [];
  textArea.value = getDisplayResult();
  textArea.scrollTop = textArea.scrollHeight;  // auto scroll
 };
 function getDisplayResult() {
  let i = 0;
  let ans = '';
  for (let s in resultList) {
    if (resultList[s] == '') {
      continue;
    }
    if (resultList[s] == 'Speech detected') {
      ans += '' + i + ': ' + resultList[s];
      i += 1;
    } else {
      ans += ', ' + resultList[s] + '\n';
    }
  }
  if (lastResult.length > 0) {
    ans += '' + i + ': ' + lastResult + '\n';
  }
  return ans;
 }
 Module = {};
 let audioCtx;
 let mediaStream;
 let expectedSampleRate = 16000;
 let recordSampleRate;  // the sampleRate of the microphone
 let recorder = null;   // the microphone
 let leftchannel = [];  // TODO: Use a single channel
 let recordingLength = 0;  // number of samples so far
 let vad = null;
 let buffer = null;
 let recognizer = null;
 let printed = false;
 function fileExists(filename) {
  const filenameLen = Module.lengthBytesUTF8(filename) + 1;
  const buffer = Module._malloc(filenameLen);
  Module.stringToUTF8(filename, buffer, filenameLen);
  let exists = Module._SherpaOnnxFileExists(buffer);
  Module._free(buffer);
  return exists;
 }
 function createOfflineRecognizerSenseVoice() {}
 function initOfflineRecognizer() {
  let config = {
    modelConfig: {
      debug: 1,
      tokens: './tokens.txt',
    },
  };
  if (fileExists('sense-voice.onnx') == 1) {
    config.modelConfig.senseVoice = {
      model: './sense-voice.onnx',
      useInverseTextNormalization: 1,
    };
  } else if (fileExists('whisper-encoder.onnx')) {
    config.modelConfig.whisper = {
      encoder: './whisper-encoder.onnx',
      decoder: './whisper-decoder.onnx',
    };
  } else if (fileExists('transducer-encoder.onnx')) {
    config.modelConfig.transducer = {
      encoder: './transducer-encoder.onnx',
      decoder: './transducer-decoder.onnx',
      joiner: './transducer-joiner.onnx',
    };
    config.modelConfig.modelType = 'transducer';
  } else if (fileExists('nemo-transducer-encoder.onnx')) {
    config.modelConfig.transducer = {
      encoder: './nemo-transducer-encoder.onnx',
      decoder: './nemo-transducer-decoder.onnx',
      joiner: './nemo-transducer-joiner.onnx',
    };
    config.modelConfig.modelType = 'nemo_transducer';
  } else if (fileExists('paraformer.onnx')) {
    config.modelConfig.paraformer = {
      model: './paraformer.onnx',
    };
  } else if (fileExists('telespeech.onnx')) {
    config.modelConfig.telespeechCtc = './telespeech.onnx';
  } else {
    console.log('Please specify a model.');
    alert('Please specify a model.');
  }
  recognizer = new OfflineRecognizer(config, Module);
 }
 Module.onRuntimeInitialized = function() {
  console.log('inited!');
  hint.innerText = 'Model loaded! Please click start';
  startBtn.disabled = false;
  vad = createVad(Module);
  console.log('vad is created!', vad);
  buffer = new CircularBuffer(30 * 16000, Module);
  console.log('CircularBuffer is created!', buffer);
  initOfflineRecognizer();
 };
 if (navigator.mediaDevices.getUserMedia) {
  console.log('getUserMedia supported.');
  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
  const constraints = {audio: true};
  let onSuccess = function(stream) {
    if (!audioCtx) {
      audioCtx = new AudioContext({sampleRate: expectedSampleRate});
    }
    console.log(audioCtx);
    recordSampleRate = audioCtx.sampleRate;
    console.log('sample rate ' + recordSampleRate);
    // creates an audio node from the microphone incoming stream
    mediaStream = audioCtx.createMediaStreamSource(stream);
    console.log('media stream', mediaStream);
    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
    // bufferSize: the onaudioprocess event is called when the buffer is full
    var bufferSize = 4096;
    var numberOfInputChannels = 1;
    var numberOfOutputChannels = 2;
    if (audioCtx.createScriptProcessor) {
      recorder = audioCtx.createScriptProcessor(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    } else {
      recorder = audioCtx.createJavaScriptNode(
          bufferSize, numberOfInputChannels, numberOfOutputChannels);
    }
    console.log('recorder', recorder);
    recorder.onaudioprocess = function(e) {
      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
      samples = downsampleBuffer(samples, expectedSampleRate);
      buffer.push(samples);
      while (buffer.size() > vad.config.sileroVad.windowSize) {
        const s = buffer.get(buffer.head(), vad.config.sileroVad.windowSize);
        vad.acceptWaveform(s);
        buffer.pop(vad.config.sileroVad.windowSize);
        if (vad.isDetected() && !printed) {
          printed = true;
          lastResult = 'Speech detected';
        }
        if (!vad.isDetected()) {
          printed = false;
          if (lastResult != '') {
            resultList.push(lastResult);
          }
          lastResult = '';
        }
        while (!vad.isEmpty()) {
          const segment = vad.front();
          const duration = segment.samples.length / expectedSampleRate;
          let durationStr = `Duration: ${duration.toFixed(3)} seconds`;
          vad.pop();
          // non-streaming asr
          const stream = recognizer.createStream();
          stream.acceptWaveform(expectedSampleRate, segment.samples);
          recognizer.decode(stream);
          let recognitionResult = recognizer.getResult(stream);
          console.log(recognitionResult);
          let text = recognitionResult.text;
          stream.free();
          console.log(text);
          if (text != '') {
            durationStr += `. Result: ${text}`;
          }
          resultList.push(durationStr);
          // now save the segment to a wav file
          let buf = new Int16Array(segment.samples.length);
          for (var i = 0; i < segment.samples.length; ++i) {
            let s = segment.samples[i];
            if (s >= 1)
              s = 1;
            else if (s <= -1)
              s = -1;
            buf[i] = s * 32767;
          }
          let clipName = new Date().toISOString() + '--' + durationStr;
          const clipContainer = document.createElement('article');
          const clipLabel = document.createElement('p');
          const audio = document.createElement('audio');
          const deleteButton = document.createElement('button');
          clipContainer.classList.add('clip');
          audio.setAttribute('controls', '');
          deleteButton.textContent = 'Delete';
          deleteButton.className = 'delete';
          clipLabel.textContent = clipName;
          clipContainer.appendChild(audio);
          clipContainer.appendChild(clipLabel);
          clipContainer.appendChild(deleteButton);
          soundClips.appendChild(clipContainer);
          audio.controls = true;
          const blob = toWav(buf);
          leftchannel = [];
          const audioURL = window.URL.createObjectURL(blob);
          audio.src = audioURL;
          deleteButton.onclick = function(e) {
            let evtTgt = e.target;
            evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
          };
          clipLabel.onclick = function() {
            const existingName = clipLabel.textContent;
            const newClipName = prompt('Enter a new name for your sound clip?');
            if (newClipName === null) {
              clipLabel.textContent = existingName;
            } else {
              clipLabel.textContent = newClipName;
            }
          };
        }
      }
      textArea.value = getDisplayResult();
      textArea.scrollTop = textArea.scrollHeight;  // auto scroll
    };
    startBtn.onclick = function() {
      mediaStream.connect(recorder);
      recorder.connect(audioCtx.destination);
      console.log('recorder started');
      stopBtn.disabled = false;
      startBtn.disabled = true;
    };
    stopBtn.onclick = function() {
      vad.reset();
      buffer.reset();
      console.log('recorder stopped');
      // stopBtn recording
      recorder.disconnect(audioCtx.destination);
      mediaStream.disconnect(recorder);
      startBtn.style.background = '';
      startBtn.style.color = '';
      // mediaRecorder.requestData();
      stopBtn.disabled = true;
      startBtn.disabled = false;
    };
  };
  let onError = function(err) {
    console.log('The following error occured: ' + err);
  };
  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
 } else {
  console.log('getUserMedia not supported on your browser!');
  alert('getUserMedia not supported on your browser!');
 }
 // this function is copied/modified from
 // https://gist.github.com/meziantou/edb7217fddfbb70e899e
 function flatten(listOfSamples) {
  let n = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    n += listOfSamples[i].length;
  }
  let ans = new Int16Array(n);
  let offset = 0;
  for (let i = 0; i < listOfSamples.length; ++i) {
    ans.set(listOfSamples[i], offset);
    offset += listOfSamples[i].length;
  }
  return ans;
 }
 // this function is copied/modified from
 // https://gist.github.com/meziantou/edb7217fddfbb70e899e
 function toWav(samples) {
  let buf = new ArrayBuffer(44 + samples.length * 2);
  var view = new DataView(buf);
  // http://soundfile.sapp.org/doc/WaveFormat/
  //                   F F I R
  view.setUint32(0, 0x46464952, true);               // chunkID
  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
  //                   E V A W
  view.setUint32(8, 0x45564157, true);  // format
                                        //
  //                      t m f
  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
  view.setUint16(22, 1, true);                   // numChannels: 1 channel
  view.setUint32(24, expectedSampleRate, true);  // sampleRate
  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
  view.setUint16(32, 2, true);                       // blockAlign
  view.setUint16(34, 16, true);                      // bitsPerSample
  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
  view.setUint32(40, samples.length * 2, true);      // subchunk2Size
  let offset = 44;
  for (let i = 0; i < samples.length; ++i) {
    view.setInt16(offset, samples[i], true);
    offset += 2;
  }
  return new Blob([view], {type: 'audio/wav'});
 }
 // this function is copied from
 // https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
 function downsampleBuffer(buffer, exportSampleRate) {
  if (exportSampleRate === recordSampleRate) {
    return buffer;
  }
  var sampleRateRatio = recordSampleRate / exportSampleRate;
  var newLength = Math.round(buffer.length / sampleRateRatio);
  var result = new Float32Array(newLength);
  var offsetResult = 0;
  var offsetBuffer = 0;
  while (offsetResult < result.length) {
    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
    var accum = 0, count = 0;
    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
      accum += buffer[i];
      count++;
    }
    result[offsetResult] = accum / count;
    offsetResult++;
    offsetBuffer = nextOffsetBuffer;
  }
  return result;
 };
--- a/wasm/vad-asr/assets/README.md
+++ b/wasm/vad-asr/assets/README.md
@@ -0,0 +1,23 @@
 # Introduction
 ## Download VAD models
 Please download
 https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
 and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
 ## Download non-streaming ASR models
 Please refer to
 https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
 to download a non-streaming ASR model, i.e., an offline ASR model.
 After downloading, you should rename the model files.
 Please refer to
 https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-a-speech-recognition-model
 for how to rename.
 You can find example build scripts at the following address:
  https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
--- a/wasm/vad-asr/index.html
+++ b/wasm/vad-asr/index.html
@@ -0,0 +1,43 @@
 <html lang="en">
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD + ASR</title>
  <style>
    h1,div {
      text-align: center;
    }
    textarea {
      width:100%;
    }
  </style>
 </head>
 <body>
  <h1>
    Next-gen Kaldi + WebAssembly<br/>
    VAD+ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
    (with Zipformer)
  </h1>
  <div>
    <span id="hint">Loading model ... ...</span>
    <br/>
    <br/>
    <button id="startBtn" disabled>Start</button>
    <button id="stopBtn" disabled>Stop</button>
    <button id="clearBtn">Clear</button>
    <br/>
    <br/>
    <textarea id="results" rows="10" readonly></textarea>
  </div>
  <section flex="1" overflow="auto" id="sound-clips">
  </section>
  <script src="sherpa-onnx-asr.js"></script>
  <script src="sherpa-onnx-vad.js"></script>
  <script src="app-vad-asr.js"></script>
  <script src="sherpa-onnx-wasm-main-vad-asr.js"></script>
 </body>
--- a/wasm/vad-asr/sherpa-onnx-asr.js
+++ b/wasm/vad-asr/sherpa-onnx-asr.js
@@ -0,0 +1 @@
 ../asr/sherpa-onnx-asr.js
--- a/wasm/vad-asr/sherpa-onnx-vad.js
+++ b/wasm/vad-asr/sherpa-onnx-vad.js
@@ -0,0 +1 @@
 ../vad/sherpa-onnx-vad.js
--- a/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc
+++ b/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc
@@ -0,0 +1,19 @@
 // wasm/sherpa-onnx-wasm-main-vad-asr.cc
 //
 // Copyright (c)  2024  Xiaomi Corporation
 #include <stdio.h>
 #include <algorithm>
 #include <memory>
 #include "sherpa-onnx/c-api/c-api.h"
 // see also
 // https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html
 extern "C" {
 void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
  std::copy(src, src + num_bytes, dst);
 }
 }
--- a/wasm/vad/assets/README.md
+++ b/wasm/vad/assets/README.md
@@ -3,3 +3,6 @@
 Please download
 https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
 and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
 You can find example build script at
 https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
--- a/wasm/vad/index.html
+++ b/wasm/vad/index.html
@@ -3,7 +3,7 @@
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
-  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD</title>
  <style>
    h1,div {
      text-align: center;
--- a/wasm/vad/sherpa-onnx-vad.js
+++ b/wasm/vad/sherpa-onnx-vad.js
@@ -172,7 +172,6 @@ class Vad {
  constructor(configObj, Module) {
    this.config = configObj;
    const config = initSherpaOnnxVadModelConfig(configObj, Module);
    Module._MyPrint(config.ptr);
    const handle = Module._SherpaOnnxCreateVoiceActivityDetector(
        config.ptr, configObj.bufferSizeInSeconds || 30);
    freeConfig(config, Module);