WebAssembly example for VAD + Non-streaming ASR (#1284)

2024-08-24 13:24:52 +08:00
parent 1ef8a7a202
commit 537e163dd0
29 changed files with 1281 additions and 70 deletions
--- a/wasm/CMakeLists.txt
+++ b/wasm/CMakeLists.txt
@@ -14,6 +14,10 @@ if(SHERPA_ONNX_ENABLE_WASM_VAD)
  add_subdirectory(vad)
 endif()

+if(SHERPA_ONNX_ENABLE_WASM_VAD_ASR)
+  add_subdirectory(vad-asr)
+endif()
+
 if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
  add_subdirectory(nodejs)
 endif()
--- a/wasm/asr/assets/README.md
+++ b/wasm/asr/assets/README.md
@@ -80,3 +80,10 @@ assets fangjun$ tree -L 1

 0 directories, 4 files
 ```
+
+You can find example build scripts at:
+
+  - Streaming Zipformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/ wasm-simd-hf-space-zh-en-asr-zipformer.yaml
+  - Streaming Zipformer (English): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-asr-zipformer.yaml
+  - Streaming Paraformer (English + Chinese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-en-asr-paraformer.yaml
+  - Streaming Paraformer (English + Chinese + Cantonese): https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-zh-cantonese-en-asr-paraformer.yaml
--- a/wasm/asr/index.html
+++ b/wasm/asr/index.html
@@ -3,7 +3,7 @@
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
-  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for ASR</title>
  <style>
    h1,div {
      text-align: center;
--- a/wasm/tts/assets/README.md
+++ b/wasm/tts/assets/README.md
@@ -30,3 +30,8 @@ assets fangjun$ tree -L 1

 1 directory, 3 files
 ```
+
+You can find example build scripts at:
+
+  - English TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-en-tts.yaml
+  - German TTS: https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-de-tts.yaml
--- a/wasm/vad-asr/CMakeLists.txt
+++ b/wasm/vad-asr/CMakeLists.txt
@@ -0,0 +1,83 @@
+if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
+  message(FATAL_ERROR "Please use ./build-wasm-simd-vad.sh to build for wasm VAD")
+endif()
+
+if(NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/silero_vad.onnx" OR NOT EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/assets/tokens.txt")
+  message(FATAL_ERROR "Please read ${CMAKE_CURRENT_SOURCE_DIR}/assets/README.md before you continue")
+endif()
+
+set(exported_functions
+  # VAD
+  SherpaOnnxCreateCircularBuffer
+  SherpaOnnxDestroyCircularBuffer
+  SherpaOnnxCircularBufferPush
+  SherpaOnnxCircularBufferGet
+  SherpaOnnxCircularBufferFree
+  SherpaOnnxCircularBufferPop
+  SherpaOnnxCircularBufferSize
+  SherpaOnnxCircularBufferHead
+  SherpaOnnxCircularBufferReset
+  SherpaOnnxCreateVoiceActivityDetector
+  SherpaOnnxDestroyVoiceActivityDetector
+  SherpaOnnxVoiceActivityDetectorAcceptWaveform
+  SherpaOnnxVoiceActivityDetectorEmpty
+  SherpaOnnxVoiceActivityDetectorDetected
+  SherpaOnnxVoiceActivityDetectorPop
+  SherpaOnnxVoiceActivityDetectorClear
+  SherpaOnnxVoiceActivityDetectorFront
+  SherpaOnnxDestroySpeechSegment
+  SherpaOnnxVoiceActivityDetectorReset
+  SherpaOnnxVoiceActivityDetectorFlush
+  # non-streaming ASR
+  SherpaOnnxAcceptWaveformOffline
+  SherpaOnnxCreateOfflineRecognizer
+  SherpaOnnxCreateOfflineStream
+  SherpaOnnxDecodeMultipleOfflineStreams
+  SherpaOnnxDecodeOfflineStream
+  SherpaOnnxDestroyOfflineRecognizer
+  SherpaOnnxDestroyOfflineRecognizerResult
+  SherpaOnnxDestroyOfflineStream
+  SherpaOnnxDestroyOfflineStreamResultJson
+  SherpaOnnxGetOfflineStreamResult
+  SherpaOnnxGetOfflineStreamResultAsJson
+  #
+  SherpaOnnxFileExists
+)
+set(mangled_exported_functions)
+foreach(x IN LISTS exported_functions)
+  list(APPEND mangled_exported_functions "_${x}")
+endforeach()
+list(JOIN mangled_exported_functions "," all_exported_functions)
+
+include_directories(${CMAKE_SOURCE_DIR})
+set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
+string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
+string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
+string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
+string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
+
+message(STATUS "MY_FLAGS: ${MY_FLAGS}")
+
+set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
+set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
+
+if (NOT CMAKE_EXECUTABLE_SUFFIX STREQUAL ".js")
+  message(FATAL_ERROR "The default suffix for building executables should be .js!")
+endif()
+# set(CMAKE_EXECUTABLE_SUFFIX ".html")
+
+add_executable(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-wasm-main-vad-asr.cc)
+target_link_libraries(sherpa-onnx-wasm-main-vad-asr sherpa-onnx-c-api)
+install(TARGETS sherpa-onnx-wasm-main-vad-asr DESTINATION bin/wasm/vad-asr)
+
+install(
+  FILES
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.js"
+    "index.html"
+    "app-vad-asr.js"
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.wasm"
+    "$<TARGET_FILE_DIR:sherpa-onnx-wasm-main-vad-asr>/sherpa-onnx-wasm-main-vad-asr.data"
+  DESTINATION
+    bin/wasm/vad-asr
+)
--- a/wasm/vad-asr/app-vad-asr.js
+++ b/wasm/vad-asr/app-vad-asr.js
@@ -0,0 +1,389 @@
+// This file copies and modifies code
+// from https://mdn.github.io/web-dictaphone/scripts/app.js
+// and https://gist.github.com/meziantou/edb7217fddfbb70e899e
+
+const startBtn = document.getElementById('startBtn');
+const stopBtn = document.getElementById('stopBtn');
+const clearBtn = document.getElementById('clearBtn');
+const hint = document.getElementById('hint');
+const soundClips = document.getElementById('sound-clips');
+
+let textArea = document.getElementById('results');
+
+let lastResult = '';
+let resultList = [];
+
+clearBtn.onclick = function() {
+  resultList = [];
+  textArea.value = getDisplayResult();
+  textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+};
+
+function getDisplayResult() {
+  let i = 0;
+  let ans = '';
+  for (let s in resultList) {
+    if (resultList[s] == '') {
+      continue;
+    }
+
+    if (resultList[s] == 'Speech detected') {
+      ans += '' + i + ': ' + resultList[s];
+      i += 1;
+    } else {
+      ans += ', ' + resultList[s] + '\n';
+    }
+  }
+
+  if (lastResult.length > 0) {
+    ans += '' + i + ': ' + lastResult + '\n';
+  }
+  return ans;
+}
+
+
+
+Module = {};
+
+let audioCtx;
+let mediaStream;
+
+let expectedSampleRate = 16000;
+let recordSampleRate;  // the sampleRate of the microphone
+let recorder = null;   // the microphone
+let leftchannel = [];  // TODO: Use a single channel
+
+let recordingLength = 0;  // number of samples so far
+
+let vad = null;
+let buffer = null;
+let recognizer = null;
+let printed = false;
+
+function fileExists(filename) {
+  const filenameLen = Module.lengthBytesUTF8(filename) + 1;
+  const buffer = Module._malloc(filenameLen);
+  Module.stringToUTF8(filename, buffer, filenameLen);
+
+  let exists = Module._SherpaOnnxFileExists(buffer);
+
+  Module._free(buffer);
+
+  return exists;
+}
+
+function createOfflineRecognizerSenseVoice() {}
+
+function initOfflineRecognizer() {
+  let config = {
+    modelConfig: {
+      debug: 1,
+      tokens: './tokens.txt',
+    },
+  };
+  if (fileExists('sense-voice.onnx') == 1) {
+    config.modelConfig.senseVoice = {
+      model: './sense-voice.onnx',
+      useInverseTextNormalization: 1,
+    };
+  } else if (fileExists('whisper-encoder.onnx')) {
+    config.modelConfig.whisper = {
+      encoder: './whisper-encoder.onnx',
+      decoder: './whisper-decoder.onnx',
+    };
+  } else if (fileExists('transducer-encoder.onnx')) {
+    config.modelConfig.transducer = {
+      encoder: './transducer-encoder.onnx',
+      decoder: './transducer-decoder.onnx',
+      joiner: './transducer-joiner.onnx',
+    };
+    config.modelConfig.modelType = 'transducer';
+  } else if (fileExists('nemo-transducer-encoder.onnx')) {
+    config.modelConfig.transducer = {
+      encoder: './nemo-transducer-encoder.onnx',
+      decoder: './nemo-transducer-decoder.onnx',
+      joiner: './nemo-transducer-joiner.onnx',
+    };
+    config.modelConfig.modelType = 'nemo_transducer';
+  } else if (fileExists('paraformer.onnx')) {
+    config.modelConfig.paraformer = {
+      model: './paraformer.onnx',
+    };
+  } else if (fileExists('telespeech.onnx')) {
+    config.modelConfig.telespeechCtc = './telespeech.onnx';
+  } else {
+    console.log('Please specify a model.');
+    alert('Please specify a model.');
+  }
+
+  recognizer = new OfflineRecognizer(config, Module);
+}
+
+Module.onRuntimeInitialized = function() {
+  console.log('inited!');
+  hint.innerText = 'Model loaded! Please click start';
+
+  startBtn.disabled = false;
+
+  vad = createVad(Module);
+  console.log('vad is created!', vad);
+
+  buffer = new CircularBuffer(30 * 16000, Module);
+  console.log('CircularBuffer is created!', buffer);
+
+  initOfflineRecognizer();
+};
+
+
+
+if (navigator.mediaDevices.getUserMedia) {
+  console.log('getUserMedia supported.');
+
+  // see https://w3c.github.io/mediacapture-main/#dom-mediadevices-getusermedia
+  const constraints = {audio: true};
+
+  let onSuccess = function(stream) {
+    if (!audioCtx) {
+      audioCtx = new AudioContext({sampleRate: expectedSampleRate});
+    }
+    console.log(audioCtx);
+    recordSampleRate = audioCtx.sampleRate;
+    console.log('sample rate ' + recordSampleRate);
+
+    // creates an audio node from the microphone incoming stream
+    mediaStream = audioCtx.createMediaStreamSource(stream);
+    console.log('media stream', mediaStream);
+
+    // https://developer.mozilla.org/en-US/docs/Web/API/AudioContext/createScriptProcessor
+    // bufferSize: the onaudioprocess event is called when the buffer is full
+    var bufferSize = 4096;
+    var numberOfInputChannels = 1;
+    var numberOfOutputChannels = 2;
+    if (audioCtx.createScriptProcessor) {
+      recorder = audioCtx.createScriptProcessor(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    } else {
+      recorder = audioCtx.createJavaScriptNode(
+          bufferSize, numberOfInputChannels, numberOfOutputChannels);
+    }
+    console.log('recorder', recorder);
+
+    recorder.onaudioprocess = function(e) {
+      let samples = new Float32Array(e.inputBuffer.getChannelData(0))
+      samples = downsampleBuffer(samples, expectedSampleRate);
+      buffer.push(samples);
+      while (buffer.size() > vad.config.sileroVad.windowSize) {
+        const s = buffer.get(buffer.head(), vad.config.sileroVad.windowSize);
+        vad.acceptWaveform(s);
+        buffer.pop(vad.config.sileroVad.windowSize);
+
+        if (vad.isDetected() && !printed) {
+          printed = true;
+          lastResult = 'Speech detected';
+        }
+
+        if (!vad.isDetected()) {
+          printed = false;
+          if (lastResult != '') {
+            resultList.push(lastResult);
+          }
+          lastResult = '';
+        }
+
+        while (!vad.isEmpty()) {
+          const segment = vad.front();
+          const duration = segment.samples.length / expectedSampleRate;
+          let durationStr = `Duration: ${duration.toFixed(3)} seconds`;
+          vad.pop();
+
+          // non-streaming asr
+          const stream = recognizer.createStream();
+          stream.acceptWaveform(expectedSampleRate, segment.samples);
+          recognizer.decode(stream);
+          let recognitionResult = recognizer.getResult(stream);
+          console.log(recognitionResult);
+          let text = recognitionResult.text;
+          stream.free();
+          console.log(text);
+
+          if (text != '') {
+            durationStr += `. Result: ${text}`;
+          }
+
+          resultList.push(durationStr);
+
+
+          // now save the segment to a wav file
+          let buf = new Int16Array(segment.samples.length);
+          for (var i = 0; i < segment.samples.length; ++i) {
+            let s = segment.samples[i];
+            if (s >= 1)
+              s = 1;
+            else if (s <= -1)
+              s = -1;
+
+            buf[i] = s * 32767;
+          }
+
+          let clipName = new Date().toISOString() + '--' + durationStr;
+
+          const clipContainer = document.createElement('article');
+          const clipLabel = document.createElement('p');
+          const audio = document.createElement('audio');
+          const deleteButton = document.createElement('button');
+
+          clipContainer.classList.add('clip');
+          audio.setAttribute('controls', '');
+          deleteButton.textContent = 'Delete';
+          deleteButton.className = 'delete';
+
+          clipLabel.textContent = clipName;
+
+          clipContainer.appendChild(audio);
+
+          clipContainer.appendChild(clipLabel);
+          clipContainer.appendChild(deleteButton);
+          soundClips.appendChild(clipContainer);
+
+          audio.controls = true;
+          const blob = toWav(buf);
+
+          leftchannel = [];
+          const audioURL = window.URL.createObjectURL(blob);
+          audio.src = audioURL;
+
+          deleteButton.onclick = function(e) {
+            let evtTgt = e.target;
+            evtTgt.parentNode.parentNode.removeChild(evtTgt.parentNode);
+          };
+
+          clipLabel.onclick = function() {
+            const existingName = clipLabel.textContent;
+            const newClipName = prompt('Enter a new name for your sound clip?');
+            if (newClipName === null) {
+              clipLabel.textContent = existingName;
+            } else {
+              clipLabel.textContent = newClipName;
+            }
+          };
+        }
+      }
+
+      textArea.value = getDisplayResult();
+      textArea.scrollTop = textArea.scrollHeight;  // auto scroll
+    };
+
+    startBtn.onclick = function() {
+      mediaStream.connect(recorder);
+      recorder.connect(audioCtx.destination);
+
+      console.log('recorder started');
+
+      stopBtn.disabled = false;
+      startBtn.disabled = true;
+    };
+
+    stopBtn.onclick = function() {
+      vad.reset();
+      buffer.reset();
+      console.log('recorder stopped');
+
+      // stopBtn recording
+      recorder.disconnect(audioCtx.destination);
+      mediaStream.disconnect(recorder);
+
+      startBtn.style.background = '';
+      startBtn.style.color = '';
+      // mediaRecorder.requestData();
+
+      stopBtn.disabled = true;
+      startBtn.disabled = false;
+    };
+  };
+
+  let onError = function(err) {
+    console.log('The following error occured: ' + err);
+  };
+
+  navigator.mediaDevices.getUserMedia(constraints).then(onSuccess, onError);
+} else {
+  console.log('getUserMedia not supported on your browser!');
+  alert('getUserMedia not supported on your browser!');
+}
+
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function flatten(listOfSamples) {
+  let n = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    n += listOfSamples[i].length;
+  }
+  let ans = new Int16Array(n);
+
+  let offset = 0;
+  for (let i = 0; i < listOfSamples.length; ++i) {
+    ans.set(listOfSamples[i], offset);
+    offset += listOfSamples[i].length;
+  }
+  return ans;
+}
+
+// this function is copied/modified from
+// https://gist.github.com/meziantou/edb7217fddfbb70e899e
+function toWav(samples) {
+  let buf = new ArrayBuffer(44 + samples.length * 2);
+  var view = new DataView(buf);
+
+  // http://soundfile.sapp.org/doc/WaveFormat/
+  //                   F F I R
+  view.setUint32(0, 0x46464952, true);               // chunkID
+  view.setUint32(4, 36 + samples.length * 2, true);  // chunkSize
+  //                   E V A W
+  view.setUint32(8, 0x45564157, true);  // format
+                                        //
+  //                      t m f
+  view.setUint32(12, 0x20746d66, true);          // subchunk1ID
+  view.setUint32(16, 16, true);                  // subchunk1Size, 16 for PCM
+  view.setUint32(20, 1, true);                   // audioFormat, 1 for PCM
+  view.setUint16(22, 1, true);                   // numChannels: 1 channel
+  view.setUint32(24, expectedSampleRate, true);  // sampleRate
+  view.setUint32(28, expectedSampleRate * 2, true);  // byteRate
+  view.setUint16(32, 2, true);                       // blockAlign
+  view.setUint16(34, 16, true);                      // bitsPerSample
+  view.setUint32(36, 0x61746164, true);              // Subchunk2ID
+  view.setUint32(40, samples.length * 2, true);      // subchunk2Size
+
+  let offset = 44;
+  for (let i = 0; i < samples.length; ++i) {
+    view.setInt16(offset, samples[i], true);
+    offset += 2;
+  }
+
+  return new Blob([view], {type: 'audio/wav'});
+}
+
+// this function is copied from
+// https://github.com/awslabs/aws-lex-browser-audio-capture/blob/master/lib/worker.js#L46
+function downsampleBuffer(buffer, exportSampleRate) {
+  if (exportSampleRate === recordSampleRate) {
+    return buffer;
+  }
+  var sampleRateRatio = recordSampleRate / exportSampleRate;
+  var newLength = Math.round(buffer.length / sampleRateRatio);
+  var result = new Float32Array(newLength);
+  var offsetResult = 0;
+  var offsetBuffer = 0;
+  while (offsetResult < result.length) {
+    var nextOffsetBuffer = Math.round((offsetResult + 1) * sampleRateRatio);
+    var accum = 0, count = 0;
+    for (var i = offsetBuffer; i < nextOffsetBuffer && i < buffer.length; i++) {
+      accum += buffer[i];
+      count++;
+    }
+    result[offsetResult] = accum / count;
+    offsetResult++;
+    offsetBuffer = nextOffsetBuffer;
+  }
+  return result;
+};
--- a/wasm/vad-asr/assets/README.md
+++ b/wasm/vad-asr/assets/README.md
@@ -0,0 +1,23 @@
+# Introduction
+
+## Download VAD models
+
+Please download
+https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
+
+## Download non-streaming ASR models
+
+Please refer to
+https://k2-fsa.github.io/sherpa/onnx/pretrained_models/index.html
+to download a non-streaming ASR model, i.e., an offline ASR model.
+
+After downloading, you should rename the model files.
+
+Please refer to
+https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html#download-a-speech-recognition-model
+for how to rename.
+
+You can find example build scripts at the following address:
+
+  https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-vad-asr.yaml
--- a/wasm/vad-asr/index.html
+++ b/wasm/vad-asr/index.html
@@ -0,0 +1,43 @@
+<html lang="en">
+
+<head>
+  <meta charset="utf-8">
+  <meta name="viewport" content="width=device-width" />
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD + ASR</title>
+  <style>
+    h1,div {
+      text-align: center;
+    }
+    textarea {
+      width:100%;
+    }
+  </style>
+</head>
+
+<body>
+  <h1>
+    Next-gen Kaldi + WebAssembly<br/>
+    VAD+ASR Demo with <a href="https://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a><br/>
+    (with Zipformer)
+  </h1>
+
+  <div>
+    <span id="hint">Loading model ... ...</span>
+    <br/>
+    <br/>
+    <button id="startBtn" disabled>Start</button>
+    <button id="stopBtn" disabled>Stop</button>
+    <button id="clearBtn">Clear</button>
+    <br/>
+    <br/>
+    <textarea id="results" rows="10" readonly></textarea>
+  </div>
+
+  <section flex="1" overflow="auto" id="sound-clips">
+  </section>
+
+  <script src="sherpa-onnx-asr.js"></script>
+  <script src="sherpa-onnx-vad.js"></script>
+  <script src="app-vad-asr.js"></script>
+  <script src="sherpa-onnx-wasm-main-vad-asr.js"></script>
+</body>
--- a/wasm/vad-asr/sherpa-onnx-asr.js
+++ b/wasm/vad-asr/sherpa-onnx-asr.js
@@ -0,0 +1 @@
+../asr/sherpa-onnx-asr.js
--- a/wasm/vad-asr/sherpa-onnx-vad.js
+++ b/wasm/vad-asr/sherpa-onnx-vad.js
@@ -0,0 +1 @@
+../vad/sherpa-onnx-vad.js
--- a/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc
+++ b/wasm/vad-asr/sherpa-onnx-wasm-main-vad-asr.cc
@@ -0,0 +1,19 @@
+// wasm/sherpa-onnx-wasm-main-vad-asr.cc
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#include <stdio.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "sherpa-onnx/c-api/c-api.h"
+
+// see also
+// https://emscripten.org/docs/porting/connecting_cpp_and_javascript/Interacting-with-code.html
+
+extern "C" {
+
+void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
+  std::copy(src, src + num_bytes, dst);
+}
+}
--- a/wasm/vad/assets/README.md
+++ b/wasm/vad/assets/README.md
@@ -3,3 +3,6 @@
 Please download
 https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
 and put `silero_vad.onnx` into the current directory, i.e., `wasm/vad/assets`.
+
+You can find example build script at
+https://github.com/k2-fsa/sherpa-onnx/blob/master/.github/workflows/wasm-simd-hf-space-silero-vad.yaml
--- a/wasm/vad/index.html
+++ b/wasm/vad/index.html
@@ -3,7 +3,7 @@
 <head>
  <meta charset="utf-8">
  <meta name="viewport" content="width=device-width" />
-  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for Text-to-speech</title>
+  <title>Next-gen Kaldi WebAssembly with sherpa-onnx for VAD</title>
  <style>
    h1,div {
      text-align: center;
--- a/wasm/vad/sherpa-onnx-vad.js
+++ b/wasm/vad/sherpa-onnx-vad.js
@@ -172,7 +172,6 @@ class Vad {
  constructor(configObj, Module) {
    this.config = configObj;
    const config = initSherpaOnnxVadModelConfig(configObj, Module);
-    Module._MyPrint(config.ptr);
    const handle = Module._SherpaOnnxCreateVoiceActivityDetector(
        config.ptr, configObj.bufferSizeInSeconds || 30);
    freeConfig(config, Module);