Add VAD and keyword spotting for the Node package with WebAssembly (#1286)

This commit is contained in:
Fangjun Kuang
2024-08-24 23:05:54 +08:00
committed by GitHub
parent 537e163dd0
commit 5ed8e31868
40 changed files with 456 additions and 524 deletions

View File

@@ -546,7 +546,7 @@ function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
Module.setValue(ptr + 12, buffer + offset, 'i8*');
offset += taskLen;
Module.setValue(ptr + 16, config.tailPaddings || -1, 'i32');
Module.setValue(ptr + 16, config.tailPaddings || 2000, 'i32');
return {
buffer: buffer, ptr: ptr, len: len,

View File

@@ -69,13 +69,14 @@ function initModelConfig(config, Module) {
const len = transducer.len + paraformer_len + ctc_len + 7 * 4;
const ptr = Module._malloc(len);
Module.HEAPU8.fill(0, ptr, ptr + len);
let offset = 0;
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
const modelTypeLen = Module.lengthBytesUTF8(config.modelType || '') + 1;
const modelingUnitLen = Module.lengthBytesUTF8(config.modelingUnit || '') + 1;
const bpeVocabLen = Module.lengthBytesUTF8(config.bpeVocab || '') + 1;
const bufferLen =
@@ -86,10 +87,10 @@ function initModelConfig(config, Module) {
Module.stringToUTF8(config.tokens, buffer, tokensLen);
offset += tokensLen;
Module.stringToUTF8(config.provider, buffer + offset, providerLen);
Module.stringToUTF8(config.provider || 'cpu', buffer + offset, providerLen);
offset += providerLen;
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
Module.stringToUTF8(config.modelType || '', buffer + offset, modelTypeLen);
offset += modelTypeLen;
Module.stringToUTF8(
@@ -103,7 +104,7 @@ function initModelConfig(config, Module) {
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
offset += 4;
Module.setValue(ptr + offset, config.numThreads, 'i32');
Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
offset += 4;
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
@@ -134,14 +135,21 @@ function initModelConfig(config, Module) {
function initFeatureExtractorConfig(config, Module) {
let ptr = Module._malloc(4 * 2);
Module.setValue(ptr, config.samplingRate, 'i32');
Module.setValue(ptr + 4, config.featureDim, 'i32');
Module.setValue(ptr, config.samplingRate || 16000, 'i32');
Module.setValue(ptr + 4, config.featureDim || 80, 'i32');
return {
ptr: ptr, len: 8,
}
}
function initKwsConfig(config, Module) {
if (!('featConfig' in config)) {
config.featConfig = {
sampleRate: 16000,
featureDim: 80,
};
}
let featConfig = initFeatureExtractorConfig(config.featConfig, Module);
let modelConfig = initModelConfig(config.modelConfig, Module);
@@ -155,16 +163,16 @@ function initKwsConfig(config, Module) {
Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset)
offset += modelConfig.len;
Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
Module.setValue(ptr + offset, config.maxActivePaths || 4, 'i32');
offset += 4;
Module.setValue(ptr + offset, config.numTrailingBlanks, 'i32');
Module.setValue(ptr + offset, config.numTrailingBlanks || 1, 'i32');
offset += 4;
Module.setValue(ptr + offset, config.keywordsScore, 'float');
Module.setValue(ptr + offset, config.keywordsScore || 1.0, 'float');
offset += 4;
Module.setValue(ptr + offset, config.keywordsThreshold, 'float');
Module.setValue(ptr + offset, config.keywordsThreshold || 0.25, 'float');
offset += 4;
let keywordsLen = Module.lengthBytesUTF8(config.keywords) + 1;

View File

@@ -49,6 +49,32 @@ set(exported_functions
SherpaOnnxDestroyKeywordSpotter
SherpaOnnxGetKeywordResult
SherpaOnnxIsKeywordStreamReady
# VAD
SherpaOnnxCreateCircularBuffer
SherpaOnnxDestroyCircularBuffer
SherpaOnnxCircularBufferPush
SherpaOnnxCircularBufferGet
SherpaOnnxCircularBufferFree
SherpaOnnxCircularBufferPop
SherpaOnnxCircularBufferSize
SherpaOnnxCircularBufferHead
SherpaOnnxCircularBufferReset
SherpaOnnxCreateVoiceActivityDetector
SherpaOnnxDestroyVoiceActivityDetector
SherpaOnnxVoiceActivityDetectorAcceptWaveform
SherpaOnnxVoiceActivityDetectorEmpty
SherpaOnnxVoiceActivityDetectorDetected
SherpaOnnxVoiceActivityDetectorPop
SherpaOnnxVoiceActivityDetectorClear
SherpaOnnxVoiceActivityDetectorFront
SherpaOnnxDestroySpeechSegment
SherpaOnnxVoiceActivityDetectorReset
SherpaOnnxVoiceActivityDetectorFlush
#
SherpaOnnxFileExists
SherpaOnnxReadWave
SherpaOnnxFreeWave
SherpaOnnxWriteWave
)
@@ -82,6 +108,8 @@ install(
${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js
${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
${CMAKE_SOURCE_DIR}/wasm/kws/sherpa-onnx-kws.js
${CMAKE_SOURCE_DIR}/wasm/vad/sherpa-onnx-vad.js
${CMAKE_SOURCE_DIR}/wasm/nodejs/sherpa-onnx-wave.js
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
DESTINATION

View File

@@ -0,0 +1,57 @@
// return an object
// {
// samples: a float32 array
// sampleRate: an integer
// }
function readWave(filename, Module) {
const filenameLen = Module.lengthBytesUTF8(filename) + 1;
const pFilename = Module._malloc(filenameLen);
Module.stringToUTF8(filename, pFilename, filenameLen);
const w = Module._SherpaOnnxReadWave(pFilename);
Module._free(pFilename);
const samplesPtr = Module.HEAP32[w / 4] / 4;
const sampleRate = Module.HEAP32[w / 4 + 1];
const numSamples = Module.HEAP32[w / 4 + 2];
const samples = new Float32Array(numSamples);
for (let i = 0; i < numSamples; i++) {
samples[i] = Module.HEAPF32[samplesPtr + i];
}
Module._SherpaOnnxFreeWave(w);
return {samples: samples, sampleRate: sampleRate};
}
// data is an object
// {
// samples: a float32 array
// sampleRate: an integer
// }
function writeWave(filename, data, Module) {
const pSamples =
Module._malloc(data.samples.length * data.samples.BYTES_PER_ELEMENT);
Module.HEAPF32.set(data.samples, pSamples / data.samples.BYTES_PER_ELEMENT);
const filenameLen = Module.lengthBytesUTF8(filename) + 1;
const pFilename = Module._malloc(filenameLen);
Module.stringToUTF8(filename, pFilename, filenameLen);
Module._SherpaOnnxWriteWave(
pSamples, data.samples.length, data.sampleRate, pFilename);
Module._free(pFilename);
Module._free(pSamples);
}
if (typeof process == 'object' && typeof process.versions == 'object' &&
typeof process.versions.node == 'string') {
module.exports = {
readWave,
writeWave,
};
}