Add JavaScript API (wasm) for speech enhancement GTCRN models (#2007)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "sherpa-onnx-PLATFORM2-ARCH",
|
||||
"version": "SHERPA_ONNX_VERSION",
|
||||
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
|
||||
"description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
@@ -46,7 +46,9 @@
|
||||
"vad",
|
||||
"node-addon-api",
|
||||
"speaker id",
|
||||
"language id"
|
||||
"language id",
|
||||
"speech enhancement",
|
||||
"denoising"
|
||||
],
|
||||
"author": "The next-gen Kaldi team",
|
||||
"license": "Apache-2.0",
|
||||
|
||||
6
.github/scripts/node-addon/package.json
vendored
6
.github/scripts/node-addon/package.json
vendored
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "sherpa-onnx-node",
|
||||
"version": "SHERPA_ONNX_VERSION",
|
||||
"description": "Speech-to-text, text-to-speech, and speaker diarization using Next-gen Kaldi without internet connection",
|
||||
"description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
|
||||
"main": "sherpa-onnx.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
@@ -46,7 +46,9 @@
|
||||
"vad",
|
||||
"node-addon-api",
|
||||
"speaker id",
|
||||
"language id"
|
||||
"language id",
|
||||
"speech enhancement",
|
||||
"denoising"
|
||||
],
|
||||
"author": "The next-gen Kaldi team",
|
||||
"license": "Apache-2.0",
|
||||
|
||||
10
.github/scripts/test-nodejs-npm.sh
vendored
10
.github/scripts/test-nodejs-npm.sh
vendored
@@ -9,6 +9,16 @@ git status
|
||||
ls -lh
|
||||
ls -lh node_modules
|
||||
|
||||
# speech enhancement
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||
node ./test-offline-speech-enhancement-gtcrn.js
|
||||
ls -lh *.wav
|
||||
rm gtcrn_simple.onnx
|
||||
rm inp_16k.wav
|
||||
rm enhanced-16k.wav
|
||||
|
||||
|
||||
# offline tts
|
||||
#
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/kokoro-multi-lang-v1_0.tar.bz2
|
||||
|
||||
@@ -4,7 +4,6 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- wasm
|
||||
- wasm-gtcrn
|
||||
tags:
|
||||
- 'v[0-9]+.[0-9]+.[0-9]+*'
|
||||
|
||||
@@ -79,9 +78,9 @@ jobs:
|
||||
file_glob: true
|
||||
overwrite: true
|
||||
file: ./*.tar.bz2
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: v1.10.46
|
||||
# repo_name: k2-fsa/sherpa-onnx
|
||||
# repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
# tag: v1.10.46
|
||||
|
||||
- name: Release
|
||||
if: github.repository_owner == 'k2-fsa' && github.event_name == 'push' && contains(github.ref, 'refs/tags/')
|
||||
|
||||
@@ -24,6 +24,16 @@ In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa
|
||||
for text-to-speech and speech-to-text.
|
||||
|
||||
|
||||
# Speech enhancement
|
||||
|
||||
In the following, we demonstrate how to run speech enhancement.
|
||||
|
||||
```bash
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||
node ./test-offline-speech-enhancement-gtcrn.js
|
||||
```
|
||||
|
||||
# Speaker diarization
|
||||
|
||||
In the following, we demonstrate how to run speaker diarization.
|
||||
|
||||
30
nodejs-examples/test-offline-speech-enhancement-gtcrn.js
Normal file
30
nodejs-examples/test-offline-speech-enhancement-gtcrn.js
Normal file
@@ -0,0 +1,30 @@
|
||||
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||
//
|
||||
// Please download ./gtcrn_simple.onnx and ./inp_16k.wav used in this file
|
||||
// from
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
|
||||
//
|
||||
// This script shows how to use speech enhancement API from sherpa-onnx.
|
||||
const sherpa_onnx = require('sherpa-onnx');
|
||||
|
||||
function createOfflineSpeechDenoiser() {
|
||||
let config = {
|
||||
model: {
|
||||
gtcrn: {model: './gtcrn_simple.onnx'},
|
||||
debug: 1,
|
||||
},
|
||||
};
|
||||
|
||||
return sherpa_onnx.createOfflineSpeechDenoiser(config);
|
||||
}
|
||||
|
||||
speech_denoiser = createOfflineSpeechDenoiser();
|
||||
|
||||
const waveFilename = './inp_16k.wav';
|
||||
const wave = sherpa_onnx.readWave(waveFilename);
|
||||
|
||||
const denoised = speech_denoiser.run(wave.samples, wave.sampleRate);
|
||||
sherpa_onnx.writeWave('./enhanced-16k.wav', denoised);
|
||||
console.log('Saved to ./enhanced-16k.wav');
|
||||
|
||||
speech_denoiser.free();
|
||||
@@ -9,6 +9,10 @@ const sherpa_onnx_wave = require('./sherpa-onnx-wave.js');
|
||||
const sherpa_onnx_vad = require('./sherpa-onnx-vad.js');
|
||||
const sherpa_onnx_speaker_diarization =
|
||||
require('./sherpa-onnx-speaker-diarization.js');
|
||||
const sherpa_onnx_speech_enhancement =
|
||||
require('./sherpa-onnx-speech-enhancement.js');
|
||||
|
||||
|
||||
|
||||
function createOnlineRecognizer(config) {
|
||||
return sherpa_onnx_asr.createOnlineRecognizer(wasmModule, config);
|
||||
@@ -47,6 +51,15 @@ function writeWave(filename, data) {
|
||||
sherpa_onnx_wave.writeWave(filename, data, wasmModule);
|
||||
}
|
||||
|
||||
function readWaveFromBinaryData(uint8Array) {
|
||||
return sherpa_onnx_wave.readWaveFromBinaryData(uint8Array, wasmModule);
|
||||
}
|
||||
|
||||
function createOfflineSpeechDenoiser(config) {
|
||||
return sherpa_onnx_speech_enhancement.createOfflineSpeechDenoiser(
|
||||
wasmModule, config);
|
||||
}
|
||||
|
||||
// Note: online means streaming and offline means non-streaming here.
|
||||
// Both of them don't require internet connection.
|
||||
module.exports = {
|
||||
@@ -55,8 +68,10 @@ module.exports = {
|
||||
createOfflineTts,
|
||||
createKws,
|
||||
readWave,
|
||||
readWaveFromBinaryData,
|
||||
writeWave,
|
||||
createCircularBuffer,
|
||||
createVad,
|
||||
createOfflineSpeakerDiarization,
|
||||
createOfflineSpeechDenoiser,
|
||||
};
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"name": "sherpa-onnx",
|
||||
"version": "SHERPA_ONNX_VERSION",
|
||||
"description": "Speech-to-text and text-to-speech using Next-gen Kaldi without internet connection",
|
||||
"description": "Speech-to-text, text-to-speech, speaker diarization, and speech enhancement using Next-gen Kaldi without internet connection",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
@@ -34,7 +34,9 @@
|
||||
"speech",
|
||||
"recognition",
|
||||
"WebAssembly",
|
||||
"wasm"
|
||||
"wasm",
|
||||
"speech enhancement",
|
||||
"denoising"
|
||||
],
|
||||
"author": "The next-gen Kaldi team",
|
||||
"license": "Apache-2.0",
|
||||
|
||||
@@ -49,6 +49,7 @@ set(exported_functions
|
||||
SherpaOnnxDestroyKeywordSpotter
|
||||
SherpaOnnxGetKeywordResult
|
||||
SherpaOnnxIsKeywordStreamReady
|
||||
SherpaOnnxResetKeywordStream
|
||||
# VAD
|
||||
SherpaOnnxCreateCircularBuffer
|
||||
SherpaOnnxDestroyCircularBuffer
|
||||
@@ -87,6 +88,12 @@ set(exported_functions
|
||||
SherpaOnnxReadWaveFromBinaryData
|
||||
SherpaOnnxFreeWave
|
||||
SherpaOnnxWriteWave
|
||||
# speech enhancement
|
||||
SherpaOnnxCreateOfflineSpeechDenoiser
|
||||
SherpaOnnxDestroyDenoisedAudio
|
||||
SherpaOnnxDestroyOfflineSpeechDenoiser
|
||||
SherpaOnnxOfflineSpeechDenoiserGetSampleRate
|
||||
SherpaOnnxOfflineSpeechDenoiserRun
|
||||
)
|
||||
|
||||
|
||||
@@ -122,6 +129,7 @@ install(
|
||||
${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
|
||||
${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
|
||||
${CMAKE_SOURCE_DIR}/wasm/speaker-diarization/sherpa-onnx-speaker-diarization.js
|
||||
${CMAKE_SOURCE_DIR}/wasm/speech-enhancement/sherpa-onnx-speech-enhancement.js
|
||||
${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
|
||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
|
||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
|
||||
|
||||
@@ -26,21 +26,21 @@ function readWave(filename, Module) {
|
||||
return {samples: samples, sampleRate: sampleRate};
|
||||
}
|
||||
|
||||
function readWaveFromBinaryData(uint8Array) {
|
||||
function readWaveFromBinaryData(uint8Array, Module) {
|
||||
const numBytes = uint8Array.length * uint8Array.BYTES_PER_ELEMENT;
|
||||
const pointer = this.Module._malloc(numBytes);
|
||||
const pointer = Module._malloc(numBytes);
|
||||
|
||||
const dataOnHeap = new Uint8Array(Module.HEAPU8.buffer, pointer, numBytes);
|
||||
dataOnHeap.set(uint8Array);
|
||||
|
||||
const w = this.Module._SherpaOnnxReadWaveFromBinaryData(
|
||||
dataOnHeap.byteOffset, numBytes);
|
||||
const w =
|
||||
Module._SherpaOnnxReadWaveFromBinaryData(dataOnHeap.byteOffset, numBytes);
|
||||
if (w == 0) {
|
||||
console.log('Failed to read wave from binary data');
|
||||
return null;
|
||||
}
|
||||
|
||||
this.Module._free(pointer);
|
||||
Module._free(pointer);
|
||||
|
||||
const samplesPtr = Module.HEAP32[w / 4] / 4;
|
||||
const sampleRate = Module.HEAP32[w / 4 + 1];
|
||||
|
||||
@@ -9,14 +9,14 @@ endif()
|
||||
set(exported_functions
|
||||
MyPrint
|
||||
SherpaOnnxCreateOfflineSpeechDenoiser
|
||||
SherpaOnnxDestroyDenoisedAudio
|
||||
SherpaOnnxDestroyOfflineSpeechDenoiser
|
||||
SherpaOnnxFreeWave
|
||||
SherpaOnnxOfflineSpeechDenoiserGetSampleRate
|
||||
SherpaOnnxOfflineSpeechDenoiserRun
|
||||
SherpaOnnxDestroyDenoisedAudio
|
||||
SherpaOnnxWriteWave
|
||||
SherpaOnnxReadWave
|
||||
SherpaOnnxReadWaveFromBinaryData
|
||||
SherpaOnnxFreeWave
|
||||
SherpaOnnxWriteWave
|
||||
)
|
||||
set(mangled_exported_functions)
|
||||
foreach(x IN LISTS exported_functions)
|
||||
|
||||
Reference in New Issue
Block a user