Add JavaScript API (wasm) for speech enhancement GTCRN models (#2007)
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "sherpa-onnx-PLATFORM2-ARCH",
|
"name": "sherpa-onnx-PLATFORM2-ARCH",
|
||||||
"version": "SHERPA_ONNX_VERSION",
|
"version": "SHERPA_ONNX_VERSION",
|
||||||
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
|
"description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
@@ -46,7 +46,9 @@
|
|||||||
"vad",
|
"vad",
|
||||||
"node-addon-api",
|
"node-addon-api",
|
||||||
"speaker id",
|
"speaker id",
|
||||||
"language id"
|
"language id",
|
||||||
|
"speech enhancement",
|
||||||
|
"denoising"
|
||||||
],
|
],
|
||||||
"author": "The next-gen Kaldi team",
|
"author": "The next-gen Kaldi team",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
|
|||||||
6
.github/scripts/node-addon/package.json
vendored
6
.github/scripts/node-addon/package.json
vendored
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "sherpa-onnx-node",
|
"name": "sherpa-onnx-node",
|
||||||
"version": "SHERPA_ONNX_VERSION",
|
"version": "SHERPA_ONNX_VERSION",
|
||||||
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
|
"description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
|
||||||
"main": "sherpa-onnx.js",
|
"main": "sherpa-onnx.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
@@ -46,7 +46,9 @@
|
|||||||
"vad",
|
"vad",
|
||||||
"node-addon-api",
|
"node-addon-api",
|
||||||
"speaker id",
|
"speaker id",
|
||||||
"language id"
|
"language id",
|
||||||
|
"speech enhancement",
|
||||||
|
"denoising"
|
||||||
],
|
],
|
||||||
"author": "The next-gen Kaldi team",
|
"author": "The next-gen Kaldi team",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
|
|||||||
10
.github/scripts/test-nodejs-npm.sh
vendored
10
.github/scripts/test-nodejs-npm.sh
vendored
@@ -9,6 +9,16 @@ git status
|
|||||||
ls -lh
|
ls -lh
|
||||||
ls -lh node_modules
|
ls -lh node_modules
|
||||||
|
|
||||||
|
# speech enhancement
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||||
|
node ./test-offline-speech-enhancement-gtcrn.js
|
||||||
|
ls -lh *.wav
|
||||||
|
rm gtcrn_simple.onnx
|
||||||
|
rm inp_16k.wav
|
||||||
|
rm enhanced-16k.wav
|
||||||
|
|
||||||
|
|
||||||
# offline tts
|
# offline tts
|
||||||
#
|
#
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ on:
|
|||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- wasm
|
- wasm
|
||||||
- wasm-gtcrn
|
|
||||||
tags:
|
tags:
|
||||||
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
||||||
|
|
||||||
@@ -79,9 +78,9 @@ jobs:
|
|||||||
file_glob: true
|
file_glob: true
|
||||||
overwrite: true
|
overwrite: true
|
||||||
file: ./*.tar.bz2
|
file: ./*.tar.bz2
|
||||||
repo_name: k2-fsa/sherpa-onnx
|
# repo_name: k2-fsa/sherpa-onnx
|
||||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
# repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||||
tag: v1.10.46
|
# tag: v1.10.46
|
||||||
|
|
||||||
- name: Release
|
- name: Release
|
||||||
if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
|
if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
|
||||||
|
|||||||
@@ -24,6 +24,16 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa
|
|||||||
for text-to-speech and speech-to-text.
|
for text-to-speech and speech-to-text.
|
||||||
|
|
||||||
|
|
||||||
|
# Speech enhancement
|
||||||
|
|
||||||
|
In the following, we demonstrate how to run speech enhancement.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||||
|
node ./test-offline-speech-enhancement-gtcrn.js
|
||||||
|
```
|
||||||
|
|
||||||
# Speaker diarization
|
# Speaker diarization
|
||||||
|
|
||||||
In the following, we demonstrate how to run speaker diarization.
|
In the following, we demonstrate how to run speaker diarization.
|
||||||
|
|||||||
30
nodejs-examples/test-offline-speech-enhancement-gtcrn.js
Normal file
30
nodejs-examples/test-offline-speech-enhancement-gtcrn.js
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
// Please download ./gtcrn_simple.onnx and ./inp_16k.wav used in this file
|
||||||
|
// from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
|
||||||
|
//
|
||||||
|
// This script shows how to use speech enhancement API from sherpa-onnx.
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createOfflineSpeechDenoiser() {
|
||||||
|
let config = {
|
||||||
|
model: {
|
||||||
|
gtcrn: {model: './gtcrn_simple.onnx'},
|
||||||
|
debug: 1,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOfflineSpeechDenoiser(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
speech_denoiser = createOfflineSpeechDenoiser();
|
||||||
|
|
||||||
|
const waveFilename = './inp_16k.wav';
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
|
||||||
|
const denoised = speech_denoiser.run(wave.samples, wave.sampleRate);
|
||||||
|
sherpa_onnx.writeWave('./enhanced-16k.wav', denoised);
|
||||||
|
console.log('Saved to ./enhanced-16k.wav');
|
||||||
|
|
||||||
|
speech_denoiser.free();
|
||||||
@@ -9,6 +9,10 @@ const sherpa_onnx_wave = require('./sherpa-onnx-wave.js');
|
|||||||
const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
|
const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
|
||||||
const sherpa_onnx_speaker_diarization =
|
const sherpa_onnx_speaker_diarization =
|
||||||
require('./sherpa-onnx-speaker-diarization.js');
|
require('./sherpa-onnx-speaker-diarization.js');
|
||||||
|
const sherpa_onnx_speech_enhancement =
|
||||||
|
require('./sherpa-onnx-speech-enhancement.js');
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function createOnlineRecognizer(config) {
|
function createOnlineRecognizer(config) {
|
||||||
return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
|
return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
|
||||||
@@ -47,6 +51,15 @@ function writeWave(filename, data) {
|
|||||||
sherpa_onnx_wave.writeWave(filename, data, wasmModule);
|
sherpa_onnx_wave.writeWave(filename, data, wasmModule);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function readWaveFromBinaryData(uint8Array) {
|
||||||
|
return sherpa_onnx_wave.readWaveFromBinaryData(uint8Array, wasmModule);
|
||||||
|
}
|
||||||
|
|
||||||
|
function createOfflineSpeechDenoiser(config) {
|
||||||
|
return sherpa_onnx_speech_enhancement.createOfflineSpeechDenoiser(
|
||||||
|
wasmModule, config);
|
||||||
|
}
|
||||||
|
|
||||||
// Note: online means streaming and offline means non-streaming here.
|
// Note: online means streaming and offline means non-streaming here.
|
||||||
// Both of them don't require internet connection.
|
// Both of them don't require internet connection.
|
||||||
module.exports = {
|
module.exports = {
|
||||||
@@ -55,8 +68,10 @@ module.exports = {
|
|||||||
createOfflineTts,
|
createOfflineTts,
|
||||||
createKws,
|
createKws,
|
||||||
readWave,
|
readWave,
|
||||||
|
readWaveFromBinaryData,
|
||||||
writeWave,
|
writeWave,
|
||||||
createCircularBuffer,
|
createCircularBuffer,
|
||||||
createVad,
|
createVad,
|
||||||
createOfflineSpeakerDiarization,
|
createOfflineSpeakerDiarization,
|
||||||
|
createOfflineSpeechDenoiser,
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"name": "sherpa-onnx",
|
"name": "sherpa-onnx",
|
||||||
"version": "SHERPA_ONNX_VERSION",
|
"version": "SHERPA_ONNX_VERSION",
|
||||||
"description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
|
"description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
|
||||||
"main": "index.js",
|
"main": "index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
@@ -34,7 +34,9 @@
|
|||||||
"speech",
|
"speech",
|
||||||
"recognition",
|
"recognition",
|
||||||
"WebAssembly",
|
"WebAssembly",
|
||||||
"wasm"
|
"wasm",
|
||||||
|
"speech enhancement",
|
||||||
|
"denoising"
|
||||||
],
|
],
|
||||||
"author": "The next-gen Kaldi team",
|
"author": "The next-gen Kaldi team",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
|
|||||||
@@ -49,6 +49,7 @@ set(exported_functions
|
|||||||
SherpaOnnxDestroyKeywordSpotter
|
SherpaOnnxDestroyKeywordSpotter
|
||||||
SherpaOnnxGetKeywordResult
|
SherpaOnnxGetKeywordResult
|
||||||
SherpaOnnxIsKeywordStreamReady
|
SherpaOnnxIsKeywordStreamReady
|
||||||
|
SherpaOnnxResetKeywordStream
|
||||||
# VAD
|
# VAD
|
||||||
SherpaOnnxCreateCircularBuffer
|
SherpaOnnxCreateCircularBuffer
|
||||||
SherpaOnnxDestroyCircularBuffer
|
SherpaOnnxDestroyCircularBuffer
|
||||||
@@ -87,6 +88,12 @@ set(exported_functions
|
|||||||
SherpaOnnxReadWaveFromBinaryData
|
SherpaOnnxReadWaveFromBinaryData
|
||||||
SherpaOnnxFreeWave
|
SherpaOnnxFreeWave
|
||||||
SherpaOnnxWriteWave
|
SherpaOnnxWriteWave
|
||||||
|
# speech enhancement
|
||||||
|
SherpaOnnxCreateOfflineSpeechDenoiser
|
||||||
|
SherpaOnnxDestroyDenoisedAudio
|
||||||
|
SherpaOnnxDestroyOfflineSpeechDenoiser
|
||||||
|
SherpaOnnxOfflineSpeechDenoiserGetSampleRate
|
||||||
|
SherpaOnnxOfflineSpeechDenoiserRun
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@@ -122,6 +129,7 @@ install(
|
|||||||
${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
|
${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
|
||||||
${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
|
${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
|
||||||
${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
|
${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
|
||||||
|
${CMAKE_SOURCE_DIR}/wasm/speech-enhancement/sherpa-onnx-speech-enhancement.js
|
||||||
${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
|
${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
|
||||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
|
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
|
||||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
|
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
|
||||||
|
|||||||
@@ -26,21 +26,21 @@ function readWave(filename, Module) {
|
|||||||
return {samples: samples, sampleRate: sampleRate};
|
return {samples: samples, sampleRate: sampleRate};
|
||||||
}
|
}
|
||||||
|
|
||||||
function readWaveFromBinaryData(uint8Array) {
|
function readWaveFromBinaryData(uint8Array, Module) {
|
||||||
const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT;
|
const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT;
|
||||||
const pointer = this.Module._malloc(numBytes);
|
const pointer = Module._malloc(numBytes);
|
||||||
|
|
||||||
const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes);
|
const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes);
|
||||||
dataOnHeap.set(uint8Array);
|
dataOnHeap.set(uint8Array);
|
||||||
|
|
||||||
const w = this.Module._SherpaOnnxReadWaveFromBinaryData(
|
const w =
|
||||||
dataOnHeap.byteOffset, numBytes);
|
Module._SherpaOnnxReadWaveFromBinaryData(dataOnHeap.byteOffset, numBytes);
|
||||||
if (w == 0) {
|
if (w == 0) {
|
||||||
console.log('Failed to read wave from binary data');
|
console.log('Failed to read wave from binary data');
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.Module._free(pointer);
|
Module._free(pointer);
|
||||||
|
|
||||||
const samplesPtr = Module.HEAP32[w / 4] / 4;
|
const samplesPtr = Module.HEAP32[w / 4] / 4;
|
||||||
const sampleRate = Module.HEAP32[w / 4 + 1];
|
const sampleRate = Module.HEAP32[w / 4 + 1];
|
||||||
|
|||||||
@@ -9,14 +9,14 @@ endif()
|
|||||||
set(exported_functions
|
set(exported_functions
|
||||||
MyPrint
|
MyPrint
|
||||||
SherpaOnnxCreateOfflineSpeechDenoiser
|
SherpaOnnxCreateOfflineSpeechDenoiser
|
||||||
|
SherpaOnnxDestroyDenoisedAudio
|
||||||
SherpaOnnxDestroyOfflineSpeechDenoiser
|
SherpaOnnxDestroyOfflineSpeechDenoiser
|
||||||
|
SherpaOnnxFreeWave
|
||||||
SherpaOnnxOfflineSpeechDenoiserGetSampleRate
|
SherpaOnnxOfflineSpeechDenoiserGetSampleRate
|
||||||
SherpaOnnxOfflineSpeechDenoiserRun
|
SherpaOnnxOfflineSpeechDenoiserRun
|
||||||
SherpaOnnxDestroyDenoisedAudio
|
|
||||||
SherpaOnnxWriteWave
|
|
||||||
SherpaOnnxReadWave
|
SherpaOnnxReadWave
|
||||||
SherpaOnnxReadWaveFromBinaryData
|
SherpaOnnxReadWaveFromBinaryData
|
||||||
SherpaOnnxFreeWave
|
SherpaOnnxWriteWave
|
||||||
)
|
)
|
||||||
set(mangled_exported_functions)
|
set(mangled_exported_functions)
|
||||||
foreach(x IN LISTS exported_functions)
|
foreach(x IN LISTS exported_functions)
|
||||||
|
|||||||
Reference in New Issue
Block a user