Add JavaScript API (wasm) for speech enhancement GTCRN models (#2007)

This commit is contained in:
Fangjun Kuang
2025-03-15 17:41:23 +08:00
committed by GitHub
parent d320fdf65e
commit c972554ad1
11 changed files with 96 additions and 18 deletions

View File

@@ -1,7 +1,7 @@
{ {
"name": "sherpa-onnx-PLATFORM2-ARCH", "name": "sherpa-onnx-PLATFORM2-ARCH",
"version": "SHERPA_ONNX_VERSION", "version": "SHERPA_ONNX_VERSION",
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
"main": "index.js", "main": "index.js",
"scripts": { "scripts": {
"test": "echo \"Error: no test specified\" && exit 1" "test": "echo \"Error: no test specified\" && exit 1"
@@ -46,7 +46,9 @@
"vad", "vad",
"node-addon-api", "node-addon-api",
"speaker id", "speaker id",
"language id" "language id",
"speech enhancement",
"denoising"
], ],
"author": "The next-gen Kaldi team", "author": "The next-gen Kaldi team",
"license": "Apache-2.0", "license": "Apache-2.0",

View File

@@ -1,7 +1,7 @@
{ {
"name": "sherpa-onnx-node", "name": "sherpa-onnx-node",
"version": "SHERPA_ONNX_VERSION", "version": "SHERPA_ONNX_VERSION",
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection", "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
"main": "sherpa-onnx.js", "main": "sherpa-onnx.js",
"scripts": { "scripts": {
"test": "echo \"Error: no test specified\" && exit 1" "test": "echo \"Error: no test specified\" && exit 1"
@@ -46,7 +46,9 @@
"vad", "vad",
"node-addon-api", "node-addon-api",
"speaker id", "speaker id",
"language id" "language id",
"speech enhancement",
"denoising"
], ],
"author": "The next-gen Kaldi team", "author": "The next-gen Kaldi team",
"license": "Apache-2.0", "license": "Apache-2.0",

View File

@@ -9,6 +9,16 @@ git status
ls -lh ls -lh
ls -lh node_modules ls -lh node_modules
# speech enhancement
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
node ./test-offline-speech-enhancement-gtcrn.js
ls -lh *.wav
rm gtcrn_simple.onnx
rm inp_16k.wav
rm enhanced-16k.wav
# offline tts # offline tts
# #
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2

View File

@@ -4,7 +4,6 @@ on:
push: push:
branches: branches:
- wasm - wasm
- wasm-gtcrn
tags: tags:
- 'v[0-9]+.[0-9]+.[0-9]+*' - 'v[0-9]+.[0-9]+.[0-9]+*'
@@ -79,9 +78,9 @@ jobs:
file_glob: true file_glob: true
overwrite: true overwrite: true
file: ./*.tar.bz2 file: ./*.tar.bz2
repo_name: k2-fsa/sherpa-onnx # repo_name: k2-fsa/sherpa-onnx
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }} # repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
tag: v1.10.46 # tag: v1.10.46
- name: Release - name: Release
if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/') if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')

View File

@@ -24,6 +24,16 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa
for text-to-speech and speech-to-text. for text-to-speech and speech-to-text.
# Speech enhancement
In the following, we demonstrate how to run speech enhancement.
```bash
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
node ./test-offline-speech-enhancement-gtcrn.js
```
# Speaker diarization # Speaker diarization
In the following, we demonstrate how to run speaker diarization. In the following, we demonstrate how to run speaker diarization.

View File

@@ -0,0 +1,30 @@
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
//
// Please download ./gtcrn_simple.onnx and ./inp_16k.wav used in this file
// from
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
//
// This script shows how to use speech enhancement API from sherpa-onnx.
const sherpa_onnx = require('sherpa-onnx');
function createOfflineSpeechDenoiser() {
let config = {
model: {
gtcrn: {model: './gtcrn_simple.onnx'},
debug: 1,
},
};
return sherpa_onnx.createOfflineSpeechDenoiser(config);
}
speech_denoiser = createOfflineSpeechDenoiser();
const waveFilename = './inp_16k.wav';
const wave = sherpa_onnx.readWave(waveFilename);
const denoised = speech_denoiser.run(wave.samples, wave.sampleRate);
sherpa_onnx.writeWave('./enhanced-16k.wav', denoised);
console.log('Saved to ./enhanced-16k.wav');
speech_denoiser.free();

View File

@@ -9,6 +9,10 @@ const sherpa_onnx_wave = require('./sherpa-onnx-wave.js');
const sherpa_onnx_vad = require('./sherpa-onnx-vad.js'); const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
const sherpa_onnx_speaker_diarization = const sherpa_onnx_speaker_diarization =
require('./sherpa-onnx-speaker-diarization.js'); require('./sherpa-onnx-speaker-diarization.js');
const sherpa_onnx_speech_enhancement =
require('./sherpa-onnx-speech-enhancement.js');
function createOnlineRecognizer(config) { function createOnlineRecognizer(config) {
return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config); return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
@@ -47,6 +51,15 @@ function writeWave(filename, data) {
sherpa_onnx_wave.writeWave(filename, data, wasmModule); sherpa_onnx_wave.writeWave(filename, data, wasmModule);
} }
function readWaveFromBinaryData(uint8Array) {
return sherpa_onnx_wave.readWaveFromBinaryData(uint8Array, wasmModule);
}
function createOfflineSpeechDenoiser(config) {
return sherpa_onnx_speech_enhancement.createOfflineSpeechDenoiser(
wasmModule, config);
}
// Note: online means streaming and offline means non-streaming here. // Note: online means streaming and offline means non-streaming here.
// Both of them don't require internet connection. // Both of them don't require internet connection.
module.exports = { module.exports = {
@@ -55,8 +68,10 @@ module.exports = {
createOfflineTts, createOfflineTts,
createKws, createKws,
readWave, readWave,
readWaveFromBinaryData,
writeWave, writeWave,
createCircularBuffer, createCircularBuffer,
createVad, createVad,
createOfflineSpeakerDiarization, createOfflineSpeakerDiarization,
createOfflineSpeechDenoiser,
}; };

View File

@@ -1,7 +1,7 @@
{ {
"name": "sherpa-onnx", "name": "sherpa-onnx",
"version": "SHERPA_ONNX_VERSION", "version": "SHERPA_ONNX_VERSION",
"description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection", "description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
"main": "index.js", "main": "index.js",
"scripts": { "scripts": {
"test": "echo \"Error: no test specified\" && exit 1" "test": "echo \"Error: no test specified\" && exit 1"
@@ -34,7 +34,9 @@
"speech", "speech",
"recognition", "recognition",
"WebAssembly", "WebAssembly",
"wasm" "wasm",
"speech enhancement",
"denoising"
], ],
"author": "The next-gen Kaldi team", "author": "The next-gen Kaldi team",
"license": "Apache-2.0", "license": "Apache-2.0",

View File

@@ -49,6 +49,7 @@ set(exported_functions
SherpaOnnxDestroyKeywordSpotter SherpaOnnxDestroyKeywordSpotter
SherpaOnnxGetKeywordResult SherpaOnnxGetKeywordResult
SherpaOnnxIsKeywordStreamReady SherpaOnnxIsKeywordStreamReady
SherpaOnnxResetKeywordStream
# VAD # VAD
SherpaOnnxCreateCircularBuffer SherpaOnnxCreateCircularBuffer
SherpaOnnxDestroyCircularBuffer SherpaOnnxDestroyCircularBuffer
@@ -87,6 +88,12 @@ set(exported_functions
SherpaOnnxReadWaveFromBinaryData SherpaOnnxReadWaveFromBinaryData
SherpaOnnxFreeWave SherpaOnnxFreeWave
SherpaOnnxWriteWave SherpaOnnxWriteWave
# speech enhancement
SherpaOnnxCreateOfflineSpeechDenoiser
SherpaOnnxDestroyDenoisedAudio
SherpaOnnxDestroyOfflineSpeechDenoiser
SherpaOnnxOfflineSpeechDenoiserGetSampleRate
SherpaOnnxOfflineSpeechDenoiserRun
) )
@@ -122,6 +129,7 @@ install(
${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js ${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js ${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js ${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
${CMAKE_SOURCE_DIR}/wasm/speech-enhancement/sherpa-onnx-speech-enhancement.js
${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js ${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js" "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm" "$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"

View File

@@ -26,21 +26,21 @@ function readWave(filename, Module) {
return {samples: samples, sampleRate: sampleRate}; return {samples: samples, sampleRate: sampleRate};
} }
function readWaveFromBinaryData(uint8Array) { function readWaveFromBinaryData(uint8Array, Module) {
const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT; const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT;
const pointer = this.Module._malloc(numBytes); const pointer = Module._malloc(numBytes);
const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes); const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes);
dataOnHeap.set(uint8Array); dataOnHeap.set(uint8Array);
const w = this.Module._SherpaOnnxReadWaveFromBinaryData( const w =
dataOnHeap.byteOffset, numBytes); Module._SherpaOnnxReadWaveFromBinaryData(dataOnHeap.byteOffset, numBytes);
if (w == 0) { if (w == 0) {
console.log('Failed to read wave from binary data'); console.log('Failed to read wave from binary data');
return null; return null;
} }
this.Module._free(pointer); Module._free(pointer);
const samplesPtr = Module.HEAP32[w / 4] / 4; const samplesPtr = Module.HEAP32[w / 4] / 4;
const sampleRate = Module.HEAP32[w / 4 + 1]; const sampleRate = Module.HEAP32[w / 4 + 1];

View File

@@ -9,14 +9,14 @@ endif()
set(exported_functions set(exported_functions
MyPrint MyPrint
SherpaOnnxCreateOfflineSpeechDenoiser SherpaOnnxCreateOfflineSpeechDenoiser
SherpaOnnxDestroyDenoisedAudio
SherpaOnnxDestroyOfflineSpeechDenoiser SherpaOnnxDestroyOfflineSpeechDenoiser
SherpaOnnxFreeWave
SherpaOnnxOfflineSpeechDenoiserGetSampleRate SherpaOnnxOfflineSpeechDenoiserGetSampleRate
SherpaOnnxOfflineSpeechDenoiserRun SherpaOnnxOfflineSpeechDenoiserRun
SherpaOnnxDestroyDenoisedAudio
SherpaOnnxWriteWave
SherpaOnnxReadWave SherpaOnnxReadWave
SherpaOnnxReadWaveFromBinaryData SherpaOnnxReadWaveFromBinaryData
SherpaOnnxFreeWave SherpaOnnxWriteWave
) )
set(mangled_exported_functions) set(mangled_exported_functions)
foreach(x IN LISTS exported_functions) foreach(x IN LISTS exported_functions)