From c972554ad1781974382c61b5dd20dd8aac6c8d3b Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Sat, 15 Mar 2025 17:41:23 +0800 Subject: [PATCH] Add JavaScript API (wasm) for speech enhancement GTCRN models (#2007) --- .../scripts/node-addon/package-optional.json | 6 ++-- .github/scripts/node-addon/package.json | 6 ++-- .github/scripts/test-nodejs-npm.sh | 10 +++++++ ...imd-hf-space-speech-enhancement-gtcrn.yaml | 7 ++--- nodejs-examples/README.md | 10 +++++++ .../test-offline-speech-enhancement-gtcrn.js | 30 +++++++++++++++++++ scripts/nodejs/index.js | 15 ++++++++++ scripts/nodejs/package.json | 6 ++-- wasm/nodejs/CMakeLists.txt | 8 +++++ wasm/nodejs/sherpa-onnx-wave.js | 10 +++---- wasm/speech-enhancement/CMakeLists.txt | 6 ++-- 11 files changed, 96 insertions(+), 18 deletions(-) create mode 100644 nodejs-examples/test-offline-speech-enhancement-gtcrn.js diff --git a/.github/scripts/node-addon/package-optional.json b/.github/scripts/node-addon/package-optional.json index d2db2e19..4f42176a 100644 --- a/.github/scripts/node-addon/package-optional.json +++ b/.github/scripts/node-addon/package-optional.json @@ -1,7 +1,7 @@ { "name": "sherpa-onnx-PLATFORM2-ARCH", "version": "SHERPA_ONNX_VERSION", - "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", + "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection", "main": "index.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" @@ -46,7 +46,9 @@ "vad", "node-addon-api", "speaker id", - "language id" + "language id", + "speech enhancement", + "denoising" ], "author": "The next-gen Kaldi team", "license": "Apache-2.0", diff --git a/.github/scripts/node-addon/package.json b/.github/scripts/node-addon/package.json index bc2d89e8..924173fa 100644 --- a/.github/scripts/node-addon/package.json +++ b/.github/scripts/node-addon/package.json @@ -1,7 +1,7 @@ { "name": "sherpa-onnx-node", "version": "SHERPA_ONNX_VERSION", - "description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", + "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection", "main": "sherpa-onnx.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" @@ -46,7 +46,9 @@ "vad", "node-addon-api", "speaker id", - "language id" + "language id", + "speech enhancement", + "denoising" ], "author": "The next-gen Kaldi team", "license": "Apache-2.0", diff --git a/.github/scripts/test-nodejs-npm.sh b/.github/scripts/test-nodejs-npm.sh index 536310af..89a9df21 100755 --- a/.github/scripts/test-nodejs-npm.sh +++ b/.github/scripts/test-nodejs-npm.sh @@ -9,6 +9,16 @@ git status ls -lh ls -lh node_modules +# speech enhancement +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav +node ./test-offline-speech-enhancement-gtcrn.js +ls -lh *.wav +rm gtcrn_simple.onnx +rm inp_16k.wav +rm enhanced-16k.wav + + # offline tts # curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 diff --git a/.github/workflows/wasm-simd-hf-space-speech-enhancement-gtcrn.yaml b/.github/workflows/wasm-simd-hf-space-speech-enhancement-gtcrn.yaml index 7b82055b..3de86fa9 100644 --- a/.github/workflows/wasm-simd-hf-space-speech-enhancement-gtcrn.yaml +++ b/.github/workflows/wasm-simd-hf-space-speech-enhancement-gtcrn.yaml @@ -4,7 +4,6 @@ on: push: branches: - wasm - - wasm-gtcrn tags: - 'v[0-9]+.[0-9]+.[0-9]+*' @@ -79,9 +78,9 @@ jobs: file_glob: true overwrite: true file: ./*.tar.bz2 - repo_name: k2-fsa/sherpa-onnx - repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} - tag: v1.10.46 + # repo_name: k2-fsa/sherpa-onnx + # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} + # tag: v1.10.46 - name: Release if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') diff --git a/nodejs-examples/README.md b/nodejs-examples/README.md index fa98a5b7..3e8b860b 100644 --- a/nodejs-examples/README.md +++ b/nodejs-examples/README.md @@ -24,6 +24,16 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa for text-to-speech and speech-to-text. +# Speech enhancement + +In the following, we demonstrate how to run speech enhancement. + +```bash +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav +node ./test-offline-speech-enhancement-gtcrn.js +``` + # Speaker diarization In the following, we demonstrate how to run speaker diarization. diff --git a/nodejs-examples/test-offline-speech-enhancement-gtcrn.js b/nodejs-examples/test-offline-speech-enhancement-gtcrn.js new file mode 100644 index 00000000..5e060e3f --- /dev/null +++ b/nodejs-examples/test-offline-speech-enhancement-gtcrn.js @@ -0,0 +1,30 @@ +// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang) +// +// Please download ./gtcrn_simple.onnx and ./inp_16k.wav used in this file +// from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models +// +// This script shows how to use speech enhancement API from sherpa-onnx. +const sherpa_onnx = require('sherpa-onnx'); + +function createOfflineSpeechDenoiser() { + let config = { + model: { + gtcrn: {model: './gtcrn_simple.onnx'}, + debug: 1, + }, + }; + + return sherpa_onnx.createOfflineSpeechDenoiser(config); +} + +speech_denoiser = createOfflineSpeechDenoiser(); + +const waveFilename = './inp_16k.wav'; +const wave = sherpa_onnx.readWave(waveFilename); + +const denoised = speech_denoiser.run(wave.samples, wave.sampleRate); +sherpa_onnx.writeWave('./enhanced-16k.wav', denoised); +console.log('Saved to ./enhanced-16k.wav'); + +speech_denoiser.free(); diff --git a/scripts/nodejs/index.js b/scripts/nodejs/index.js index b1b77841..cbc88166 100644 --- a/scripts/nodejs/index.js +++ b/scripts/nodejs/index.js @@ -9,6 +9,10 @@ const sherpa_onnx_wave = require('./sherpa-onnx-wave.js'); const sherpa_onnx_vad = require('./sherpa-onnx-vad.js'); const sherpa_onnx_speaker_diarization = require('./sherpa-onnx-speaker-diarization.js'); +const sherpa_onnx_speech_enhancement = + require('./sherpa-onnx-speech-enhancement.js'); + + function createOnlineRecognizer(config) { return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); @@ -47,6 +51,15 @@ function writeWave(filename, data) { sherpa_onnx_wave.writeWave(filename, data, wasmModule); } +function readWaveFromBinaryData(uint8Array) { + return sherpa_onnx_wave.readWaveFromBinaryData(uint8Array, wasmModule); +} + +function createOfflineSpeechDenoiser(config) { + return sherpa_onnx_speech_enhancement.createOfflineSpeechDenoiser( + wasmModule, config); +} + // Note: online means streaming and offline means non-streaming here. // Both of them don't require internet connection. module.exports = { @@ -55,8 +68,10 @@ module.exports = { createOfflineTts, createKws, readWave, + readWaveFromBinaryData, writeWave, createCircularBuffer, createVad, createOfflineSpeakerDiarization, + createOfflineSpeechDenoiser, }; diff --git a/scripts/nodejs/package.json b/scripts/nodejs/package.json index 49f47d29..4aa8e45a 100644 --- a/scripts/nodejs/package.json +++ b/scripts/nodejs/package.json @@ -1,7 +1,7 @@ { "name": "sherpa-onnx", "version": "SHERPA_ONNX_VERSION", - "description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection", + "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection", "main": "index.js", "scripts": { "test": "echo \"Error: no test specified\" && exit 1" @@ -34,7 +34,9 @@ "speech", "recognition", "WebAssembly", - "wasm" + "wasm", + "speech enhancement", + "denoising" ], "author": "The next-gen Kaldi team", "license": "Apache-2.0", diff --git a/wasm/nodejs/CMakeLists.txt b/wasm/nodejs/CMakeLists.txt index 19312e96..999c5f67 100644 --- a/wasm/nodejs/CMakeLists.txt +++ b/wasm/nodejs/CMakeLists.txt @@ -49,6 +49,7 @@ set(exported_functions SherpaOnnxDestroyKeywordSpotter SherpaOnnxGetKeywordResult SherpaOnnxIsKeywordStreamReady + SherpaOnnxResetKeywordStream # VAD SherpaOnnxCreateCircularBuffer SherpaOnnxDestroyCircularBuffer @@ -87,6 +88,12 @@ set(exported_functions SherpaOnnxReadWaveFromBinaryData SherpaOnnxFreeWave SherpaOnnxWriteWave + # speech enhancement + SherpaOnnxCreateOfflineSpeechDenoiser + SherpaOnnxDestroyDenoisedAudio + SherpaOnnxDestroyOfflineSpeechDenoiser + SherpaOnnxOfflineSpeechDenoiserGetSampleRate + SherpaOnnxOfflineSpeechDenoiserRun ) @@ -122,6 +129,7 @@ install( ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js ${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js + ${CMAKE_SOURCE_DIR}/wasm/speech-enhancement/sherpa-onnx-speech-enhancement.js ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js "$/sherpa-onnx-wasm-nodejs.js" "$/sherpa-onnx-wasm-nodejs.wasm" diff --git a/wasm/nodejs/sherpa-onnx-wave.js b/wasm/nodejs/sherpa-onnx-wave.js index e1a6f959..9abd3b3e 100644 --- a/wasm/nodejs/sherpa-onnx-wave.js +++ b/wasm/nodejs/sherpa-onnx-wave.js @@ -26,21 +26,21 @@ function readWave(filename, Module) { return {samples: samples, sampleRate: sampleRate}; } -function readWaveFromBinaryData(uint8Array) { +function readWaveFromBinaryData(uint8Array, Module) { const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT; - const pointer = this.Module._malloc(numBytes); + const pointer = Module._malloc(numBytes); const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes); dataOnHeap.set(uint8Array); - const w = this.Module._SherpaOnnxReadWaveFromBinaryData( - dataOnHeap.byteOffset, numBytes); + const w = + Module._SherpaOnnxReadWaveFromBinaryData(dataOnHeap.byteOffset, numBytes); if (w == 0) { console.log('Failed to read wave from binary data'); return null; } - this.Module._free(pointer); + Module._free(pointer); const samplesPtr = Module.HEAP32[w / 4] / 4; const sampleRate = Module.HEAP32[w / 4 + 1]; diff --git a/wasm/speech-enhancement/CMakeLists.txt b/wasm/speech-enhancement/CMakeLists.txt index 41dc7a6c..6dc9f848 100644 --- a/wasm/speech-enhancement/CMakeLists.txt +++ b/wasm/speech-enhancement/CMakeLists.txt @@ -9,14 +9,14 @@ endif() set(exported_functions MyPrint SherpaOnnxCreateOfflineSpeechDenoiser + SherpaOnnxDestroyDenoisedAudio SherpaOnnxDestroyOfflineSpeechDenoiser + SherpaOnnxFreeWave SherpaOnnxOfflineSpeechDenoiserGetSampleRate SherpaOnnxOfflineSpeechDenoiserRun - SherpaOnnxDestroyDenoisedAudio - SherpaOnnxWriteWave SherpaOnnxReadWave SherpaOnnxReadWaveFromBinaryData - SherpaOnnxFreeWave + SherpaOnnxWriteWave ) set(mangled_exported_functions) foreach(x IN LISTS exported_functions)