Add WebAssembly for NodeJS. (#628)

This commit is contained in:
Fangjun Kuang
2024-03-03 20:00:36 +08:00
committed by GitHub
parent ac6825ff11
commit ed06ced16f
39 changed files with 1450 additions and 1887 deletions

View File

@@ -5,3 +5,7 @@ endif()
if(SHERPA_ONNX_ENABLE_WASM_ASR)
add_subdirectory(asr)
endif()
if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
add_subdirectory(nodejs)
endif()

View File

@@ -45,7 +45,7 @@ Module.onRuntimeInitialized = function() {
startBtn.disabled = false;
recognizer = createRecognizer();
recognizer = createOnlineRecognizer(Module);
console.log('recognizer is created!', recognizer);
};

View File

@@ -1,161 +1,181 @@
function freeConfig(config) {
function freeConfig(config, Module) {
if ('buffer' in config) {
_free(config.buffer);
Module._free(config.buffer);
}
if ('config' in config) {
freeConfig(config.config)
freeConfig(config.config, Module)
}
if ('transducer' in config) {
freeConfig(config.transducer)
freeConfig(config.transducer, Module)
}
if ('paraformer' in config) {
freeConfig(config.paraformer)
freeConfig(config.paraformer, Module)
}
if ('ctc' in config) {
freeConfig(config.ctc)
freeConfig(config.ctc, Module)
}
if ('feat' in config) {
freeConfig(config.feat)
freeConfig(config.feat, Module)
}
if ('model' in config) {
freeConfig(config.model)
freeConfig(config.model, Module)
}
_free(config.ptr);
if ('nemoCtc' in config) {
freeConfig(config.nemoCtc, Module)
}
if ('whisper' in config) {
freeConfig(config.whisper, Module)
}
if ('tdnn' in config) {
freeConfig(config.tdnn, Module)
}
if ('lm' in config) {
freeConfig(config.lm, Module)
}
Module._free(config.ptr);
}
// The user should free the returned pointers
function initSherpaOnnxOnlineTransducerModelConfig(config) {
let encoderLen = lengthBytesUTF8(config.encoder) + 1;
let decoderLen = lengthBytesUTF8(config.decoder) + 1;
let joinerLen = lengthBytesUTF8(config.joiner) + 1;
function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1;
let n = encoderLen + decoderLen + joinerLen;
const n = encoderLen + decoderLen + joinerLen;
let buffer = _malloc(n);
const buffer = Module._malloc(n);
let len = 3 * 4; // 3 pointers
let ptr = _malloc(len);
const len = 3 * 4; // 3 pointers
const ptr = Module._malloc(len);
let offset = 0;
stringToUTF8(config.encoder, buffer + offset, encoderLen);
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
offset += encoderLen;
stringToUTF8(config.decoder, buffer + offset, decoderLen);
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
offset += decoderLen;
stringToUTF8(config.joiner, buffer + offset, joinerLen);
Module.stringToUTF8(config.joiner, buffer + offset, joinerLen);
offset = 0;
setValue(ptr, buffer + offset, 'i8*');
Module.setValue(ptr, buffer + offset, 'i8*');
offset += encoderLen;
setValue(ptr + 4, buffer + offset, 'i8*');
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += decoderLen;
setValue(ptr + 8, buffer + offset, 'i8*');
Module.setValue(ptr + 8, buffer + offset, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOnlineParaformerModelConfig(config) {
let encoderLen = lengthBytesUTF8(config.encoder) + 1;
let decoderLen = lengthBytesUTF8(config.decoder) + 1;
function initSherpaOnnxOnlineParaformerModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
let n = encoderLen + decoderLen;
let buffer = _malloc(n);
const n = encoderLen + decoderLen;
const buffer = Module._malloc(n);
let len = 2 * 4; // 2 pointers
let ptr = _malloc(len);
const len = 2 * 4; // 2 pointers
const ptr = Module._malloc(len);
let offset = 0;
stringToUTF8(config.encoder, buffer + offset, encoderLen);
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
offset += encoderLen;
stringToUTF8(config.decoder, buffer + offset, decoderLen);
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
offset = 0;
setValue(ptr, buffer + offset, 'i8*');
Module.setValue(ptr, buffer + offset, 'i8*');
offset += encoderLen;
setValue(ptr + 4, buffer + offset, 'i8*');
Module.setValue(ptr + 4, buffer + offset, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config) {
let n = lengthBytesUTF8(config.model) + 1;
let buffer = _malloc(n);
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model) + 1;
const buffer = Module._malloc(n);
let len = 1 * 4; // 1 pointer
let ptr = _malloc(len);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
stringToUTF8(config.model, buffer, n);
Module.stringToUTF8(config.model, buffer, n);
setValue(ptr, buffer, 'i8*');
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOnlineModelConfig(config) {
let transducer = initSherpaOnnxOnlineTransducerModelConfig(config.transducer);
let paraformer = initSherpaOnnxOnlineParaformerModelConfig(config.paraformer);
let ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(config.zipformer2Ctc);
function initSherpaOnnxOnlineModelConfig(config, Module) {
const transducer =
initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module);
const paraformer =
initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module);
const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(
config.zipformer2Ctc, Module);
let len = transducer.len + paraformer.len + ctc.len + 5 * 4;
let ptr = _malloc(len);
const len = transducer.len + paraformer.len + ctc.len + 5 * 4;
const ptr = Module._malloc(len);
let offset = 0;
_CopyHeap(transducer.ptr, transducer.len, ptr + offset);
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
offset += transducer.len;
_CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
offset += paraformer.len;
_CopyHeap(ctc.ptr, ctc.len, ptr + offset);
Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset);
offset += ctc.len;
let tokensLen = lengthBytesUTF8(config.tokens) + 1;
let providerLen = lengthBytesUTF8(config.provider) + 1;
let modelTypeLen = lengthBytesUTF8(config.modelType) + 1;
let bufferLen = tokensLen + providerLen + modelTypeLen;
let buffer = _malloc(bufferLen);
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
const bufferLen = tokensLen + providerLen + modelTypeLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
stringToUTF8(config.tokens, buffer, tokensLen);
Module.stringToUTF8(config.tokens, buffer, tokensLen);
offset += tokensLen;
stringToUTF8(config.provider, buffer + offset, providerLen);
Module.stringToUTF8(config.provider, buffer + offset, providerLen);
offset += providerLen;
stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
offset = transducer.len + paraformer.len + ctc.len;
setValue(ptr + offset, buffer, 'i8*'); // tokens
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
offset += 4;
setValue(ptr + offset, config.numThreads, 'i32');
Module.setValue(ptr + offset, config.numThreads, 'i32');
offset += 4;
setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
offset += 4;
setValue(ptr + offset, config.debug, 'i32');
Module.setValue(ptr + offset, config.debug, 'i32');
offset += 4;
setValue(ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
Module.setValue(
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
offset += 4;
return {
@@ -164,63 +184,63 @@ function initSherpaOnnxOnlineModelConfig(config) {
}
}
function initSherpaOnnxFeatureConfig(config) {
let len = 2 * 4; // 2 pointers
let ptr = _malloc(len);
function initSherpaOnnxFeatureConfig(config, Module) {
const len = 2 * 4; // 2 pointers
const ptr = Module._malloc(len);
setValue(ptr, config.sampleRate, 'i32');
setValue(ptr + 4, config.featureDim, 'i32');
Module.setValue(ptr, config.sampleRate, 'i32');
Module.setValue(ptr + 4, config.featureDim, 'i32');
return {ptr: ptr, len: len};
}
function initSherpaOnnxOnlineRecognizerConfig(config) {
let feat = initSherpaOnnxFeatureConfig(config.featConfig);
let model = initSherpaOnnxOnlineModelConfig(config.modelConfig);
function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
let len = feat.len + model.len + 8 * 4;
let ptr = _malloc(len);
const len = feat.len + model.len + 8 * 4;
const ptr = Module._malloc(len);
let offset = 0;
_CopyHeap(feat.ptr, feat.len, ptr + offset);
Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
offset += feat.len;
_CopyHeap(model.ptr, model.len, ptr + offset);
Module._CopyHeap(model.ptr, model.len, ptr + offset);
offset += model.len;
let decodingMethodLen = lengthBytesUTF8(config.decodingMethod) + 1;
let hotwordsFileLen = lengthBytesUTF8(config.hotwordsFile) + 1;
let bufferLen = decodingMethodLen + hotwordsFileLen;
let buffer = _malloc(bufferLen);
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
const bufferLen = decodingMethodLen + hotwordsFileLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
offset += decodingMethodLen;
stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
offset = feat.len + model.len;
setValue(ptr + offset, buffer, 'i8*'); // decoding method
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
offset += 4;
setValue(ptr + offset, config.maxActivePaths, 'i32');
Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
offset += 4;
setValue(ptr + offset, config.enableEndpoint, 'i32');
Module.setValue(ptr + offset, config.enableEndpoint, 'i32');
offset += 4;
setValue(ptr + offset, config.rule1MinTrailingSilence, 'float');
Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float');
offset += 4;
setValue(ptr + offset, config.rule2MinTrailingSilence, 'float');
Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float');
offset += 4;
setValue(ptr + offset, config.rule3MinUtteranceLength, 'float');
Module.setValue(ptr + offset, config.rule3MinUtteranceLength, 'float');
offset += 4;
setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
offset += 4;
setValue(ptr + offset, config.hotwordsScore, 'float');
Module.setValue(ptr + offset, config.hotwordsScore, 'float');
offset += 4;
return {
@@ -229,21 +249,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config) {
}
function createRecognizer() {
let onlineTransducerModelConfig = {
function createOnlineRecognizer(Module, myConfig) {
const onlineTransducerModelConfig = {
encoder: '',
decoder: '',
joiner: '',
}
};
let onlineParaformerModelConfig = {
const onlineParaformerModelConfig = {
encoder: '',
decoder: '',
}
};
let onlineZipformer2CtcModelConfig = {
const onlineZipformer2CtcModelConfig = {
model: '',
}
};
let type = 0;
@@ -266,7 +286,7 @@ function createRecognizer() {
}
let onlineModelConfig = {
const onlineModelConfig = {
transducer: onlineTransducerModelConfig,
paraformer: onlineParaformerModelConfig,
zipformer2Ctc: onlineZipformer2CtcModelConfig,
@@ -275,12 +295,12 @@ function createRecognizer() {
provider: 'cpu',
debug: 1,
modelType: '',
}
};
let featureConfig = {
const featureConfig = {
sampleRate: 16000,
featureDim: 80,
}
};
let recognizerConfig = {
featConfig: featureConfig,
@@ -293,23 +313,336 @@ function createRecognizer() {
rule3MinUtteranceLength: 20,
hotwordsFile: '',
hotwordsScore: 1.5,
};
if (myConfig) {
recognizerConfig = myConfig;
}
return new OnlineRecognizer(recognizerConfig);
return new OnlineRecognizer(recognizerConfig, Module);
}
class OnlineStream {
constructor(handle) {
function initSherpaOnnxOfflineTransducerModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1;
const n = encoderLen + decoderLen + joinerLen;
const buffer = Module._malloc(n);
const len = 3 * 4; // 3 pointers
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
offset += encoderLen;
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
offset += decoderLen;
Module.stringToUTF8(config.joiner, buffer + offset, joinerLen);
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += encoderLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += decoderLen;
Module.setValue(ptr + 8, buffer + offset, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineParaformerModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model) + 1;
const buffer = Module._malloc(n);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model, buffer, n);
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model) + 1;
const buffer = Module._malloc(n);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model, buffer, n);
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
const n = encoderLen + decoderLen;
const buffer = Module._malloc(n);
const len = 2 * 4; // 2 pointers
const ptr = Module._malloc(len);
let offset = 0;
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
offset += encoderLen;
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
offset = 0;
Module.setValue(ptr, buffer + offset, 'i8*');
offset += encoderLen;
Module.setValue(ptr + 4, buffer + offset, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model) + 1;
const buffer = Module._malloc(n);
const len = 1 * 4; // 1 pointer
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model, buffer, n);
Module.setValue(ptr, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineLMConfig(config, Module) {
const n = Module.lengthBytesUTF8(config.model) + 1;
const buffer = Module._malloc(n);
const len = 2 * 4;
const ptr = Module._malloc(len);
Module.stringToUTF8(config.model, buffer, n);
Module.setValue(ptr, buffer, 'i8*');
Module.setValue(ptr + 4, config.scale, 'float');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineModelConfig(config, Module) {
const transducer =
initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module);
const paraformer =
initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module);
const nemoCtc =
initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module);
const whisper =
initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module);
const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module);
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
tdnn.len + 5 * 4;
const ptr = Module._malloc(len);
let offset = 0;
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
offset += transducer.len;
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
offset += paraformer.len;
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
offset += nemoCtc.len;
Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset);
offset += whisper.len;
Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
offset += tdnn.len;
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
const bufferLen = tokensLen + providerLen + modelTypeLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
Module.stringToUTF8(config.tokens, buffer, tokensLen);
offset += tokensLen;
Module.stringToUTF8(config.provider, buffer + offset, providerLen);
offset += providerLen;
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
offset =
transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len;
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
offset += 4;
Module.setValue(ptr + offset, config.numThreads, 'i32');
offset += 4;
Module.setValue(ptr + offset, config.debug, 'i32');
offset += 4;
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
offset += 4;
Module.setValue(
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn
}
}
function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
const len = feat.len + model.len + lm.len + 4 * 4;
const ptr = Module._malloc(len);
let offset = 0;
Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
offset += feat.len;
Module._CopyHeap(model.ptr, model.len, ptr + offset);
offset += model.len;
Module._CopyHeap(lm.ptr, lm.len, ptr + offset);
offset += lm.len;
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
const bufferLen = decodingMethodLen + hotwordsFileLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
offset += decodingMethodLen;
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
offset = feat.len + model.len + lm.len;
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
offset += 4;
Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
offset += 4;
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
offset += 4;
Module.setValue(ptr + offset, config.hotwordsScore, 'float');
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
}
}
class OfflineStream {
constructor(handle, Module) {
this.handle = handle;
this.pointer = null; // buffer
this.n = 0; // buffer size
this.Module = Module;
}
free() {
if (this.handle) {
_DestroyOnlineStream(this.handle);
this.Module._DestroyOfflineStream(this.handle);
this.handle = null;
_free(this.pointer);
}
}
/**
* @param sampleRate {Number}
* @param samples {Float32Array} Containing samples in the range [-1, 1]
*/
acceptWaveform(sampleRate, samples) {
const pointer =
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
this.Module._AcceptWaveformOffline(
this.handle, sampleRate, pointer, samples.length);
this.Module._free(pointer);
}
};
class OfflineRecognizer {
constructor(configObj, Module) {
this.config = configObj;
const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module);
const handle = Module._CreateOfflineRecognizer(config.ptr);
freeConfig(config, Module);
this.handle = handle;
this.Module = Module;
}
free() {
this.Module._DestroyOfflineRecognizer(this.handle);
this.handle = 0
}
createStream() {
const handle = this.Module._CreateOfflineStream(this.handle);
return new OfflineStream(handle, this.Module);
}
decode(stream) {
this.Module._DecodeOfflineStream(this.handle, stream.handle);
}
getResult(stream) {
const r = this.Module._GetOfflineStreamResult(stream.handle);
const textPtr = this.Module.getValue(r, 'i8*');
const text = this.Module.UTF8ToString(textPtr);
this.Module._DestroyOfflineRecognizerResult(r);
return text;
}
};
class OnlineStream {
constructor(handle, Module) {
this.handle = handle;
this.pointer = null; // buffer
this.n = 0; // buffer size
this.Module = Module;
}
free() {
if (this.handle) {
this.Module._DestroyOnlineStream(this.handle);
this.handle = null;
this.Module._free(this.pointer);
this.pointer = null;
this.n = 0;
}
@@ -321,61 +654,73 @@ class OnlineStream {
*/
acceptWaveform(sampleRate, samples) {
if (this.n < samples.length) {
_free(this.pointer);
this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT);
this.Module._free(this.pointer);
this.pointer =
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
this.n = samples.length
}
Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
_AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length);
this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
this.Module._AcceptWaveform(
this.handle, sampleRate, this.pointer, samples.length);
}
inputFinished() {
_InputFinished(this.handle);
this.Module._InputFinished(this.handle);
}
};
class OnlineRecognizer {
constructor(configObj) {
let config = initSherpaOnnxOnlineRecognizerConfig(configObj)
let handle = _CreateOnlineRecognizer(config.ptr);
constructor(configObj, Module) {
this.config = configObj;
const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module)
const handle = Module._CreateOnlineRecognizer(config.ptr);
freeConfig(config);
freeConfig(config, Module);
this.handle = handle;
this.Module = Module;
}
free() {
_DestroyOnlineRecognizer(this.handle);
this.Module._DestroyOnlineRecognizer(this.handle);
this.handle = 0
}
createStream() {
let handle = _CreateOnlineStream(this.handle);
return new OnlineStream(handle);
const handle = this.Module._CreateOnlineStream(this.handle);
return new OnlineStream(handle, this.Module);
}
isReady(stream) {
return _IsOnlineStreamReady(this.handle, stream.handle) == 1;
return this.Module._IsOnlineStreamReady(this.handle, stream.handle) == 1;
}
decode(stream) {
return _DecodeOnlineStream(this.handle, stream.handle);
this.Module._DecodeOnlineStream(this.handle, stream.handle);
}
isEndpoint(stream) {
return _IsEndpoint(this.handle, stream.handle) == 1;
return this.Module._IsEndpoint(this.handle, stream.handle) == 1;
}
reset(stream) {
_Reset(this.handle, stream.handle);
this.Module._Reset(this.handle, stream.handle);
}
getResult(stream) {
let r = _GetOnlineStreamResult(this.handle, stream.handle);
let textPtr = getValue(r, 'i8*');
let text = UTF8ToString(textPtr);
_DestroyOnlineRecognizerResult(r);
const r = this.Module._GetOnlineStreamResult(this.handle, stream.handle);
const textPtr = this.Module.getValue(r, 'i8*');
const text = this.Module.UTF8ToString(textPtr);
this.Module._DestroyOnlineRecognizerResult(r);
return text;
}
}
if (typeof process == 'object' && typeof process.versions == 'object' &&
typeof process.versions.node == 'string') {
module.exports = {
createOnlineRecognizer,
OfflineRecognizer,
};
}

View File

@@ -1,4 +1,4 @@
// wasm/sherpa-onnx-wasm-asr-main.cc
// wasm/sherpa-onnx-wasm-main-asr.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <stdio.h>

View File

@@ -0,0 +1,76 @@
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
message(FATAL_ERROR "Please use ./build-wasm-simd-nodejs.sh to build for wasm NodeJS")
endif()
set(exported_functions
#tts
PrintOfflineTtsConfig
SherpaOnnxCreateOfflineTts
SherpaOnnxDestroyOfflineTts
SherpaOnnxDestroyOfflineTtsGeneratedAudio
SherpaOnnxOfflineTtsGenerate
SherpaOnnxOfflineTtsGenerateWithCallback
SherpaOnnxOfflineTtsNumSpeakers
SherpaOnnxOfflineTtsSampleRate
SherpaOnnxWriteWave
# streaming asr
AcceptWaveform
CreateOnlineRecognizer
CreateOnlineStream
DecodeOnlineStream
DestroyOnlineRecognizer
DestroyOnlineRecognizerResult
DestroyOnlineStream
GetOnlineStreamResult
InputFinished
IsEndpoint
IsOnlineStreamReady
Reset
# non-streaming ASR
PrintOfflineRecognizerConfig
CreateOfflineRecognizer
DestroyOfflineRecognizer
CreateOfflineStream
DestroyOfflineStream
AcceptWaveformOffline
DecodeOfflineStream
DecodeMultipleOfflineStreams
GetOfflineStreamResult
DestroyOfflineRecognizerResult
)
set(mangled_exported_functions)
foreach(x IN LISTS exported_functions)
list(APPEND mangled_exported_functions "_${x}")
endforeach()
list(JOIN mangled_exported_functions "," all_exported_functions)
include_directories(${CMAKE_SOURCE_DIR})
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ")
string(APPEND MY_FLAGS " -sNODERAWFS=1 ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
string(APPEND MY_FLAGS " -sMODULARIZE=1 -sWASM_ASYNC_COMPILATION=0 ")
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
add_executable(sherpa-onnx-wasm-nodejs sherpa-onnx-wasm-nodejs.cc)
target_link_libraries(sherpa-onnx-wasm-nodejs sherpa-onnx-core sherpa-onnx-c-api)
install(TARGETS sherpa-onnx-wasm-nodejs DESTINATION bin/wasm/nodejs)
install(
FILES
${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js
${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
DESTINATION
bin/wasm/nodejs
)

View File

@@ -0,0 +1,104 @@
// wasm/sherpa-onnx-wasm-main-nodejs.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <stdio.h>
#include <algorithm>
#include <memory>
#include "sherpa-onnx/c-api/c-api.h"
extern "C" {
static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
sizeof(SherpaOnnxOfflineTransducerModelConfig) +
sizeof(SherpaOnnxOfflineParaformerModelConfig) +
sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
sizeof(SherpaOnnxOfflineWhisperModelConfig) +
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 5 * 4,
"");
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
sizeof(SherpaOnnxFeatureConfig) +
sizeof(SherpaOnnxOfflineLMConfig) +
sizeof(SherpaOnnxOfflineModelConfig) + 4 * 4,
"");
void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
auto tts_model_config = &tts_config->model;
auto vits_model_config = &tts_model_config->vits;
fprintf(stdout, "----------vits model config----------\n");
fprintf(stdout, "model: %s\n", vits_model_config->model);
fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
fprintf(stdout, "tokens: %s\n", vits_model_config->tokens);
fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir);
fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
fprintf(stdout, "----------tts model config----------\n");
fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
fprintf(stdout, "debug: %d\n", tts_model_config->debug);
fprintf(stdout, "provider: %s\n", tts_model_config->provider);
fprintf(stdout, "----------tts config----------\n");
fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
}
void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
auto model_config = &config->model_config;
auto feat = &config->feat_config;
auto transducer = &model_config->transducer;
auto paraformer = &model_config->paraformer;
auto nemo_ctc = &model_config->nemo_ctc;
auto whisper = &model_config->whisper;
auto tdnn = &model_config->tdnn;
fprintf(stdout, "----------offline transducer model config----------\n");
fprintf(stdout, "encoder: %s\n", transducer->encoder);
fprintf(stdout, "decoder: %s\n", transducer->decoder);
fprintf(stdout, "joiner: %s\n", transducer->joiner);
fprintf(stdout, "----------offline paraformer model config----------\n");
fprintf(stdout, "model: %s\n", paraformer->model);
fprintf(stdout, "----------offline nemo_ctc model config----------\n");
fprintf(stdout, "model: %s\n", nemo_ctc->model);
fprintf(stdout, "----------offline whisper model config----------\n");
fprintf(stdout, "encoder: %s\n", whisper->encoder);
fprintf(stdout, "decoder: %s\n", whisper->decoder);
fprintf(stdout, "----------offline tdnn model config----------\n");
fprintf(stdout, "model: %s\n", tdnn->model);
fprintf(stdout, "tokens: %s\n", model_config->tokens);
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
fprintf(stdout, "provider: %s\n", model_config->provider);
fprintf(stdout, "debug: %d\n", model_config->debug);
fprintf(stdout, "model type: %s\n", model_config->model_type);
fprintf(stdout, "----------feat config----------\n");
fprintf(stdout, "sample rate: %d\n", feat->sample_rate);
fprintf(stdout, "feat dim: %d\n", feat->feature_dim);
fprintf(stdout, "----------recognizer config----------\n");
fprintf(stdout, "decoding method: %s\n", config->decoding_method);
fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
}
void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
std::copy(src, src + num_bytes, dst);
}
}

View File

@@ -22,7 +22,7 @@ Module.onRuntimeInitialized = function() {
console.log('Model files downloaded!');
console.log('Initializing tts ......');
tts = initSherpaOnnxOfflineTts()
tts = createOfflineTts(Module)
if (tts.numSpeakers > 1) {
speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`;
}

View File

@@ -1,109 +1,109 @@
function freeConfig(config) {
function freeConfig(config, Module) {
if ('buffer' in config) {
_free(config.buffer);
Module._free(config.buffer);
}
if ('config' in config) {
freeConfig(config.config)
freeConfig(config.config, Module)
}
_free(config.ptr);
Module._free(config.ptr);
}
// The user should free the returned pointers
function initSherpaOnnxOfflineTtsVitsModelConfig(config) {
let modelLen = lengthBytesUTF8(config.model) + 1;
let lexiconLen = lengthBytesUTF8(config.lexicon) + 1;
let tokensLen = lengthBytesUTF8(config.tokens) + 1;
let dataDirLen = lengthBytesUTF8(config.dataDir) + 1;
function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
const modelLen = Module.lengthBytesUTF8(config.model) + 1;
const lexiconLen = Module.lengthBytesUTF8(config.lexicon) + 1;
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
const dataDirLen = Module.lengthBytesUTF8(config.dataDir) + 1;
let n = modelLen + lexiconLen + tokensLen + dataDirLen;
const n = modelLen + lexiconLen + tokensLen + dataDirLen;
let buffer = _malloc(n);
const buffer = Module._malloc(n);
let len = 7 * 4;
let ptr = _malloc(len);
const len = 7 * 4;
const ptr = Module._malloc(len);
let offset = 0;
stringToUTF8(config.model, buffer + offset, modelLen);
Module.stringToUTF8(config.model, buffer + offset, modelLen);
offset += modelLen;
stringToUTF8(config.lexicon, buffer + offset, lexiconLen);
Module.stringToUTF8(config.lexicon, buffer + offset, lexiconLen);
offset += lexiconLen;
stringToUTF8(config.tokens, buffer + offset, tokensLen);
Module.stringToUTF8(config.tokens, buffer + offset, tokensLen);
offset += tokensLen;
stringToUTF8(config.dataDir, buffer + offset, dataDirLen);
Module.stringToUTF8(config.dataDir, buffer + offset, dataDirLen);
offset += dataDirLen;
offset = 0;
setValue(ptr, buffer + offset, 'i8*');
Module.setValue(ptr, buffer + offset, 'i8*');
offset += modelLen;
setValue(ptr + 4, buffer + offset, 'i8*');
Module.setValue(ptr + 4, buffer + offset, 'i8*');
offset += lexiconLen;
setValue(ptr + 8, buffer + offset, 'i8*');
Module.setValue(ptr + 8, buffer + offset, 'i8*');
offset += tokensLen;
setValue(ptr + 12, buffer + offset, 'i8*');
Module.setValue(ptr + 12, buffer + offset, 'i8*');
offset += dataDirLen;
setValue(ptr + 16, config.noiseScale, 'float');
setValue(ptr + 20, config.noiseScaleW, 'float');
setValue(ptr + 24, config.lengthScale, 'float');
Module.setValue(ptr + 16, config.noiseScale, 'float');
Module.setValue(ptr + 20, config.noiseScaleW, 'float');
Module.setValue(ptr + 24, config.lengthScale, 'float');
return {
buffer: buffer, ptr: ptr, len: len,
}
}
function initSherpaOnnxOfflineTtsModelConfig(config) {
let vitsModelConfig =
initSherpaOnnxOfflineTtsVitsModelConfig(config.offlineTtsVitsModelConfig);
function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig(
config.offlineTtsVitsModelConfig, Module);
let len = vitsModelConfig.len + 3 * 4;
let ptr = _malloc(len);
const len = vitsModelConfig.len + 3 * 4;
const ptr = Module._malloc(len);
let offset = 0;
_CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
offset += vitsModelConfig.len;
setValue(ptr + offset, config.numThreads, 'i32');
Module.setValue(ptr + offset, config.numThreads, 'i32');
offset += 4;
setValue(ptr + offset, config.debug, 'i32');
Module.setValue(ptr + offset, config.debug, 'i32');
offset += 4;
let providerLen = lengthBytesUTF8(config.provider) + 1;
let buffer = _malloc(providerLen);
stringToUTF8(config.provider, buffer, providerLen);
setValue(ptr + offset, buffer, 'i8*');
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
const buffer = Module._malloc(providerLen);
Module.stringToUTF8(config.provider, buffer, providerLen);
Module.setValue(ptr + offset, buffer, 'i8*');
return {
buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
}
}
function initSherpaOnnxOfflineTtsConfig(config) {
let modelConfig =
initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig);
let len = modelConfig.len + 2 * 4;
let ptr = _malloc(len);
function initSherpaOnnxOfflineTtsConfig(config, Module) {
const modelConfig =
initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module);
const len = modelConfig.len + 2 * 4;
const ptr = Module._malloc(len);
let offset = 0;
_CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
offset += modelConfig.len;
let ruleFstsLen = lengthBytesUTF8(config.ruleFsts) + 1;
let buffer = _malloc(ruleFstsLen);
stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
setValue(ptr + offset, buffer, 'i8*');
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1;
const buffer = Module._malloc(ruleFstsLen);
Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
Module.setValue(ptr + offset, buffer, 'i8*');
offset += 4;
setValue(ptr + offset, config.maxNumSentences, 'i32');
Module.setValue(ptr + offset, config.maxNumSentences, 'i32');
return {
buffer: buffer, ptr: ptr, len: len, config: modelConfig,
@@ -111,19 +111,21 @@ function initSherpaOnnxOfflineTtsConfig(config) {
}
class OfflineTts {
constructor(configObj) {
let config = initSherpaOnnxOfflineTtsConfig(configObj)
let handle = _SherpaOnnxCreateOfflineTts(config.ptr);
constructor(configObj, Module) {
console.log(configObj)
const config = initSherpaOnnxOfflineTtsConfig(configObj, Module)
const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr);
freeConfig(config);
freeConfig(config, Module);
this.handle = handle;
this.sampleRate = _SherpaOnnxOfflineTtsSampleRate(this.handle);
this.numSpeakers = _SherpaOnnxOfflineTtsNumSpeakers(this.handle);
this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle);
this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle);
this.Module = Module
}
free() {
_SherpaOnnxDestroyOfflineTts(this.handle);
this.Module._SherpaOnnxDestroyOfflineTts(this.handle);
this.handle = 0
}
@@ -133,29 +135,44 @@ class OfflineTts {
// speed: 1.0
// }
generate(config) {
let textLen = lengthBytesUTF8(config.text) + 1;
let textPtr = _malloc(textLen);
stringToUTF8(config.text, textPtr, textLen);
const textLen = this.Module.lengthBytesUTF8(config.text) + 1;
const textPtr = this.Module._malloc(textLen);
this.Module.stringToUTF8(config.text, textPtr, textLen);
let h = _SherpaOnnxOfflineTtsGenerate(
const h = this.Module._SherpaOnnxOfflineTtsGenerate(
this.handle, textPtr, config.sid, config.speed);
let numSamples = HEAP32[h / 4 + 1];
let sampleRate = HEAP32[h / 4 + 2];
const numSamples = this.Module.HEAP32[h / 4 + 1];
const sampleRate = this.Module.HEAP32[h / 4 + 2];
let samplesPtr = HEAP32[h / 4] / 4;
let samples = new Float32Array(numSamples);
const samplesPtr = this.Module.HEAP32[h / 4] / 4;
const samples = new Float32Array(numSamples);
for (let i = 0; i < numSamples; i++) {
samples[i] = HEAPF32[samplesPtr + i];
samples[i] = this.Module.HEAPF32[samplesPtr + i];
}
_SherpaOnnxDestroyOfflineTtsGeneratedAudio(h);
this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h);
return {samples: samples, sampleRate: sampleRate};
}
save(filename, audio) {
const samples = audio.samples;
const sampleRate = audio.sampleRate;
const ptr = this.Module._malloc(samples.length * 4);
for (let i = 0; i < samples.length; i++) {
this.Module.HEAPF32[ptr / 4 + i] = samples[i];
}
const filenameLen = this.Module.lengthBytesUTF8(filename) + 1;
const buffer = this.Module._malloc(filenameLen);
this.Module.stringToUTF8(filename, buffer, filenameLen);
this.Module._SherpaOnnxWriteWave(ptr, samples.length, sampleRate, buffer);
this.Module._free(buffer);
this.Module._free(ptr);
}
}
function initSherpaOnnxOfflineTts() {
let offlineTtsVitsModelConfig = {
function createOfflineTts(Module, myConfig) {
const offlineTtsVitsModelConfig = {
model: './model.onnx',
lexicon: '',
tokens: './tokens.txt',
@@ -164,7 +181,7 @@ function initSherpaOnnxOfflineTts() {
noiseScaleW: 0.8,
lengthScale: 1.0,
};
let offlineTtsModelConfig = {
const offlineTtsModelConfig = {
offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
numThreads: 1,
debug: 1,
@@ -176,5 +193,16 @@ function initSherpaOnnxOfflineTts() {
maxNumSentences: 1,
}
return new OfflineTts(offlineTtsConfig);
if (myConfig) {
offlineTtsConfig = myConfig;
}
return new OfflineTts(offlineTtsConfig, Module);
}
if (typeof process == 'object' && typeof process.versions == 'object' &&
typeof process.versions.node == 'string') {
module.exports = {
createOfflineTts,
};
}

View File

@@ -1,4 +1,4 @@
// wasm/sherpa-onnx-wasm-main.cc
// wasm/sherpa-onnx-wasm-main-tts.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <stdio.h>