258 lines
6.0 KiB
JavaScript
258 lines
6.0 KiB
JavaScript
function freeConfig(config, Module) {
|
|
if ('buffer' in config) {
|
|
Module._free(config.buffer);
|
|
}
|
|
|
|
if ('sileroVad' in config) {
|
|
freeConfig(config.sileroVad, Module)
|
|
}
|
|
|
|
|
|
Module._free(config.ptr);
|
|
}
|
|
|
|
// The user should free the returned pointers
|
|
function initSherpaOnnxSileroVadModelConfig(config, Module) {
|
|
const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
|
|
|
|
const n = modelLen;
|
|
|
|
const buffer = Module._malloc(n);
|
|
|
|
const len = 6 * 4;
|
|
const ptr = Module._malloc(len);
|
|
|
|
Module.stringToUTF8(config.model || '', buffer, modelLen);
|
|
|
|
offset = 0;
|
|
Module.setValue(ptr, buffer, 'i8*');
|
|
offset += 4;
|
|
|
|
Module.setValue(ptr + offset, config.threshold || 0.5, 'float');
|
|
offset += 4;
|
|
|
|
Module.setValue(ptr + offset, config.minSilenceDuration || 0.5, 'float');
|
|
offset += 4;
|
|
|
|
Module.setValue(ptr + offset, config.minSpeechDuration || 0.25, 'float');
|
|
offset += 4;
|
|
|
|
Module.setValue(ptr + offset, config.windowSize || 512, 'i32');
|
|
offset += 4;
|
|
|
|
Module.setValue(ptr + offset, config.maxSpeechDuration || 20, 'float');
|
|
offset += 4;
|
|
|
|
return {
|
|
buffer: buffer, ptr: ptr, len: len,
|
|
}
|
|
}
|
|
|
|
function initSherpaOnnxVadModelConfig(config, Module) {
|
|
if (!('sileroVad' in config)) {
|
|
config.sileroVad = {
|
|
model: '',
|
|
threshold: 0.50,
|
|
minSilenceDuration: 0.50,
|
|
minSpeechDuration: 0.25,
|
|
windowSize: 512,
|
|
maxSpeechDuration: 20,
|
|
};
|
|
}
|
|
|
|
const sileroVad =
|
|
initSherpaOnnxSileroVadModelConfig(config.sileroVad, Module);
|
|
|
|
const len = sileroVad.len + 4 * 4;
|
|
const ptr = Module._malloc(len);
|
|
|
|
const providerLen = Module.lengthBytesUTF8(config.provider || 'cpu') + 1;
|
|
const buffer = Module._malloc(providerLen);
|
|
Module.stringToUTF8(config.provider || 'cpu', buffer, providerLen);
|
|
|
|
let offset = 0;
|
|
Module._CopyHeap(sileroVad.ptr, sileroVad.len, ptr + offset);
|
|
offset += sileroVad.len;
|
|
|
|
Module.setValue(ptr + offset, config.sampleRate || 16000, 'i32');
|
|
offset += 4;
|
|
|
|
Module.setValue(ptr + offset, config.numThreads || 1, 'i32');
|
|
offset += 4;
|
|
|
|
Module.setValue(ptr + offset, buffer, 'i8*'); // provider
|
|
offset += 4;
|
|
|
|
Module.setValue(ptr + offset, config.debug || 0, 'i32');
|
|
offset += 4;
|
|
|
|
return {
|
|
buffer: buffer, ptr: ptr, len: len, sileroVad: sileroVad,
|
|
}
|
|
}
|
|
|
|
function createVad(Module, myConfig) {
|
|
const sileroVad = {
|
|
model: './silero_vad.onnx',
|
|
threshold: 0.50,
|
|
minSilenceDuration: 0.50,
|
|
minSpeechDuration: 0.25,
|
|
maxSpeechDuration: 20,
|
|
windowSize: 512,
|
|
};
|
|
|
|
let config = {
|
|
sileroVad: sileroVad,
|
|
sampleRate: 16000,
|
|
numThreads: 1,
|
|
provider: 'cpu',
|
|
debug: 1,
|
|
bufferSizeInSeconds: 30,
|
|
};
|
|
|
|
if (myConfig) {
|
|
config = myConfig;
|
|
}
|
|
|
|
return new Vad(config, Module);
|
|
}
|
|
|
|
|
|
class CircularBuffer {
|
|
constructor(capacity, Module) {
|
|
this.handle = Module._SherpaOnnxCreateCircularBuffer(capacity);
|
|
this.Module = Module;
|
|
}
|
|
|
|
free() {
|
|
this.Module._SherpaOnnxDestroyCircularBuffer(this.handle);
|
|
this.handle = 0
|
|
}
|
|
|
|
/**
|
|
* @param samples {Float32Array}
|
|
*/
|
|
push(samples) {
|
|
const pointer =
|
|
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
|
|
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
|
|
this.Module._SherpaOnnxCircularBufferPush(
|
|
this.handle, pointer, samples.length);
|
|
this.Module._free(pointer);
|
|
}
|
|
|
|
get(startIndex, n) {
|
|
const p =
|
|
this.Module._SherpaOnnxCircularBufferGet(this.handle, startIndex, n);
|
|
|
|
const samplesPtr = p / 4;
|
|
const samples = new Float32Array(n);
|
|
for (let i = 0; i < n; i++) {
|
|
samples[i] = this.Module.HEAPF32[samplesPtr + i];
|
|
}
|
|
|
|
this.Module._SherpaOnnxCircularBufferFree(p);
|
|
|
|
return samples;
|
|
}
|
|
|
|
pop(n) {
|
|
this.Module._SherpaOnnxCircularBufferPop(this.handle, n);
|
|
}
|
|
|
|
size() {
|
|
return this.Module._SherpaOnnxCircularBufferSize(this.handle);
|
|
}
|
|
|
|
head() {
|
|
return this.Module._SherpaOnnxCircularBufferHead(this.handle);
|
|
}
|
|
|
|
reset() {
|
|
this.Module._SherpaOnnxCircularBufferReset(this.handle);
|
|
}
|
|
}
|
|
|
|
class Vad {
|
|
constructor(configObj, Module) {
|
|
this.config = configObj;
|
|
const config = initSherpaOnnxVadModelConfig(configObj, Module);
|
|
const handle = Module._SherpaOnnxCreateVoiceActivityDetector(
|
|
config.ptr, configObj.bufferSizeInSeconds || 30);
|
|
freeConfig(config, Module);
|
|
|
|
this.handle = handle;
|
|
this.Module = Module;
|
|
}
|
|
|
|
free() {
|
|
this.Module._SherpaOnnxDestroyVoiceActivityDetector(this.handle);
|
|
this.handle = 0
|
|
}
|
|
|
|
// samples is a float32 array
|
|
acceptWaveform(samples) {
|
|
const pointer =
|
|
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
|
|
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
|
|
this.Module._SherpaOnnxVoiceActivityDetectorAcceptWaveform(
|
|
this.handle, pointer, samples.length);
|
|
this.Module._free(pointer);
|
|
}
|
|
|
|
isEmpty() {
|
|
return this.Module._SherpaOnnxVoiceActivityDetectorEmpty(this.handle) == 1;
|
|
}
|
|
|
|
isDetected() {
|
|
return this.Module._SherpaOnnxVoiceActivityDetectorDetected(this.handle) ==
|
|
1;
|
|
}
|
|
|
|
pop() {
|
|
this.Module._SherpaOnnxVoiceActivityDetectorPop(this.handle);
|
|
}
|
|
|
|
clear() {
|
|
this.Module._SherpaOnnxVoiceActivityDetectorClear(this.handle);
|
|
}
|
|
|
|
/*
|
|
{
|
|
samples: a 1-d float32 array,
|
|
start: an int32
|
|
}
|
|
*/
|
|
front() {
|
|
const h = this.Module._SherpaOnnxVoiceActivityDetectorFront(this.handle);
|
|
|
|
const start = this.Module.HEAP32[h / 4];
|
|
const samplesPtr = this.Module.HEAP32[h / 4 + 1] / 4;
|
|
const numSamples = this.Module.HEAP32[h / 4 + 2];
|
|
|
|
const samples = new Float32Array(numSamples);
|
|
for (let i = 0; i < numSamples; i++) {
|
|
samples[i] = this.Module.HEAPF32[samplesPtr + i];
|
|
}
|
|
|
|
this.Module._SherpaOnnxDestroySpeechSegment(h);
|
|
return {samples: samples, start: start};
|
|
}
|
|
|
|
reset() {
|
|
this.Module._SherpaOnnxVoiceActivityDetectorReset(this.handle);
|
|
}
|
|
|
|
flush() {
|
|
this.Module._SherpaOnnxVoiceActivityDetectorFlush(this.handle);
|
|
}
|
|
};
|
|
|
|
if (typeof process == 'object' && typeof process.versions == 'object' &&
|
|
typeof process.versions.node == 'string') {
|
|
module.exports = {
|
|
createVad,
|
|
CircularBuffer,
|
|
};
|
|
}
|