Add WebAssembly for NodeJS. (#628)
This commit is contained in:
@@ -5,3 +5,7 @@ endif()
|
||||
if(SHERPA_ONNX_ENABLE_WASM_ASR)
|
||||
add_subdirectory(asr)
|
||||
endif()
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_WASM_NODEJS)
|
||||
add_subdirectory(nodejs)
|
||||
endif()
|
||||
|
||||
@@ -45,7 +45,7 @@ Module.onRuntimeInitialized = function() {
|
||||
|
||||
startBtn.disabled = false;
|
||||
|
||||
recognizer = createRecognizer();
|
||||
recognizer = createOnlineRecognizer(Module);
|
||||
console.log('recognizer is created!', recognizer);
|
||||
};
|
||||
|
||||
|
||||
@@ -1,161 +1,181 @@
|
||||
function freeConfig(config) {
|
||||
function freeConfig(config, Module) {
|
||||
if ('buffer' in config) {
|
||||
_free(config.buffer);
|
||||
Module._free(config.buffer);
|
||||
}
|
||||
|
||||
if ('config' in config) {
|
||||
freeConfig(config.config)
|
||||
freeConfig(config.config, Module)
|
||||
}
|
||||
|
||||
if ('transducer' in config) {
|
||||
freeConfig(config.transducer)
|
||||
freeConfig(config.transducer, Module)
|
||||
}
|
||||
|
||||
if ('paraformer' in config) {
|
||||
freeConfig(config.paraformer)
|
||||
freeConfig(config.paraformer, Module)
|
||||
}
|
||||
|
||||
if ('ctc' in config) {
|
||||
freeConfig(config.ctc)
|
||||
freeConfig(config.ctc, Module)
|
||||
}
|
||||
|
||||
if ('feat' in config) {
|
||||
freeConfig(config.feat)
|
||||
freeConfig(config.feat, Module)
|
||||
}
|
||||
|
||||
if ('model' in config) {
|
||||
freeConfig(config.model)
|
||||
freeConfig(config.model, Module)
|
||||
}
|
||||
|
||||
_free(config.ptr);
|
||||
if ('nemoCtc' in config) {
|
||||
freeConfig(config.nemoCtc, Module)
|
||||
}
|
||||
|
||||
if ('whisper' in config) {
|
||||
freeConfig(config.whisper, Module)
|
||||
}
|
||||
|
||||
if ('tdnn' in config) {
|
||||
freeConfig(config.tdnn, Module)
|
||||
}
|
||||
|
||||
if ('lm' in config) {
|
||||
freeConfig(config.lm, Module)
|
||||
}
|
||||
|
||||
Module._free(config.ptr);
|
||||
}
|
||||
|
||||
// The user should free the returned pointers
|
||||
function initSherpaOnnxOnlineTransducerModelConfig(config) {
|
||||
let encoderLen = lengthBytesUTF8(config.encoder) + 1;
|
||||
let decoderLen = lengthBytesUTF8(config.decoder) + 1;
|
||||
let joinerLen = lengthBytesUTF8(config.joiner) + 1;
|
||||
function initSherpaOnnxOnlineTransducerModelConfig(config, Module) {
|
||||
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
|
||||
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
|
||||
const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1;
|
||||
|
||||
let n = encoderLen + decoderLen + joinerLen;
|
||||
const n = encoderLen + decoderLen + joinerLen;
|
||||
|
||||
let buffer = _malloc(n);
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
let len = 3 * 4; // 3 pointers
|
||||
let ptr = _malloc(len);
|
||||
const len = 3 * 4; // 3 pointers
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||
offset += encoderLen;
|
||||
|
||||
stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||
offset += decoderLen;
|
||||
|
||||
stringToUTF8(config.joiner, buffer + offset, joinerLen);
|
||||
Module.stringToUTF8(config.joiner, buffer + offset, joinerLen);
|
||||
|
||||
offset = 0;
|
||||
setValue(ptr, buffer + offset, 'i8*');
|
||||
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||
offset += encoderLen;
|
||||
|
||||
setValue(ptr + 4, buffer + offset, 'i8*');
|
||||
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||
offset += decoderLen;
|
||||
|
||||
setValue(ptr + 8, buffer + offset, 'i8*');
|
||||
Module.setValue(ptr + 8, buffer + offset, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOnlineParaformerModelConfig(config) {
|
||||
let encoderLen = lengthBytesUTF8(config.encoder) + 1;
|
||||
let decoderLen = lengthBytesUTF8(config.decoder) + 1;
|
||||
function initSherpaOnnxOnlineParaformerModelConfig(config, Module) {
|
||||
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
|
||||
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
|
||||
|
||||
let n = encoderLen + decoderLen;
|
||||
let buffer = _malloc(n);
|
||||
const n = encoderLen + decoderLen;
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
let len = 2 * 4; // 2 pointers
|
||||
let ptr = _malloc(len);
|
||||
const len = 2 * 4; // 2 pointers
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||
offset += encoderLen;
|
||||
|
||||
stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||
|
||||
offset = 0;
|
||||
setValue(ptr, buffer + offset, 'i8*');
|
||||
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||
offset += encoderLen;
|
||||
|
||||
setValue(ptr + 4, buffer + offset, 'i8*');
|
||||
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config) {
|
||||
let n = lengthBytesUTF8(config.model) + 1;
|
||||
let buffer = _malloc(n);
|
||||
function initSherpaOnnxOnlineZipformer2CtcModelConfig(config, Module) {
|
||||
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
let len = 1 * 4; // 1 pointer
|
||||
let ptr = _malloc(len);
|
||||
const len = 1 * 4; // 1 pointer
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
stringToUTF8(config.model, buffer, n);
|
||||
Module.stringToUTF8(config.model, buffer, n);
|
||||
|
||||
setValue(ptr, buffer, 'i8*');
|
||||
Module.setValue(ptr, buffer, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOnlineModelConfig(config) {
|
||||
let transducer = initSherpaOnnxOnlineTransducerModelConfig(config.transducer);
|
||||
let paraformer = initSherpaOnnxOnlineParaformerModelConfig(config.paraformer);
|
||||
let ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(config.zipformer2Ctc);
|
||||
function initSherpaOnnxOnlineModelConfig(config, Module) {
|
||||
const transducer =
|
||||
initSherpaOnnxOnlineTransducerModelConfig(config.transducer, Module);
|
||||
const paraformer =
|
||||
initSherpaOnnxOnlineParaformerModelConfig(config.paraformer, Module);
|
||||
const ctc = initSherpaOnnxOnlineZipformer2CtcModelConfig(
|
||||
config.zipformer2Ctc, Module);
|
||||
|
||||
let len = transducer.len + paraformer.len + ctc.len + 5 * 4;
|
||||
let ptr = _malloc(len);
|
||||
const len = transducer.len + paraformer.len + ctc.len + 5 * 4;
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
_CopyHeap(transducer.ptr, transducer.len, ptr + offset);
|
||||
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
|
||||
offset += transducer.len;
|
||||
|
||||
_CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
|
||||
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
|
||||
offset += paraformer.len;
|
||||
|
||||
_CopyHeap(ctc.ptr, ctc.len, ptr + offset);
|
||||
Module._CopyHeap(ctc.ptr, ctc.len, ptr + offset);
|
||||
offset += ctc.len;
|
||||
|
||||
let tokensLen = lengthBytesUTF8(config.tokens) + 1;
|
||||
let providerLen = lengthBytesUTF8(config.provider) + 1;
|
||||
let modelTypeLen = lengthBytesUTF8(config.modelType) + 1;
|
||||
let bufferLen = tokensLen + providerLen + modelTypeLen;
|
||||
let buffer = _malloc(bufferLen);
|
||||
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
|
||||
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
|
||||
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
|
||||
const bufferLen = tokensLen + providerLen + modelTypeLen;
|
||||
const buffer = Module._malloc(bufferLen);
|
||||
|
||||
offset = 0;
|
||||
stringToUTF8(config.tokens, buffer, tokensLen);
|
||||
Module.stringToUTF8(config.tokens, buffer, tokensLen);
|
||||
offset += tokensLen;
|
||||
|
||||
stringToUTF8(config.provider, buffer + offset, providerLen);
|
||||
Module.stringToUTF8(config.provider, buffer + offset, providerLen);
|
||||
offset += providerLen;
|
||||
|
||||
stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
|
||||
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
|
||||
|
||||
offset = transducer.len + paraformer.len + ctc.len;
|
||||
setValue(ptr + offset, buffer, 'i8*'); // tokens
|
||||
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.numThreads, 'i32');
|
||||
Module.setValue(ptr + offset, config.numThreads, 'i32');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
|
||||
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.debug, 'i32');
|
||||
Module.setValue(ptr + offset, config.debug, 'i32');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
|
||||
Module.setValue(
|
||||
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
|
||||
offset += 4;
|
||||
|
||||
return {
|
||||
@@ -164,63 +184,63 @@ function initSherpaOnnxOnlineModelConfig(config) {
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxFeatureConfig(config) {
|
||||
let len = 2 * 4; // 2 pointers
|
||||
let ptr = _malloc(len);
|
||||
function initSherpaOnnxFeatureConfig(config, Module) {
|
||||
const len = 2 * 4; // 2 pointers
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
setValue(ptr, config.sampleRate, 'i32');
|
||||
setValue(ptr + 4, config.featureDim, 'i32');
|
||||
Module.setValue(ptr, config.sampleRate, 'i32');
|
||||
Module.setValue(ptr + 4, config.featureDim, 'i32');
|
||||
return {ptr: ptr, len: len};
|
||||
}
|
||||
|
||||
function initSherpaOnnxOnlineRecognizerConfig(config) {
|
||||
let feat = initSherpaOnnxFeatureConfig(config.featConfig);
|
||||
let model = initSherpaOnnxOnlineModelConfig(config.modelConfig);
|
||||
function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
||||
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
|
||||
const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
|
||||
|
||||
let len = feat.len + model.len + 8 * 4;
|
||||
let ptr = _malloc(len);
|
||||
const len = feat.len + model.len + 8 * 4;
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
_CopyHeap(feat.ptr, feat.len, ptr + offset);
|
||||
Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
|
||||
offset += feat.len;
|
||||
|
||||
_CopyHeap(model.ptr, model.len, ptr + offset);
|
||||
Module._CopyHeap(model.ptr, model.len, ptr + offset);
|
||||
offset += model.len;
|
||||
|
||||
let decodingMethodLen = lengthBytesUTF8(config.decodingMethod) + 1;
|
||||
let hotwordsFileLen = lengthBytesUTF8(config.hotwordsFile) + 1;
|
||||
let bufferLen = decodingMethodLen + hotwordsFileLen;
|
||||
let buffer = _malloc(bufferLen);
|
||||
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
|
||||
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
|
||||
const bufferLen = decodingMethodLen + hotwordsFileLen;
|
||||
const buffer = Module._malloc(bufferLen);
|
||||
|
||||
offset = 0;
|
||||
stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
|
||||
Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
|
||||
offset += decodingMethodLen;
|
||||
|
||||
stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
|
||||
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
|
||||
|
||||
offset = feat.len + model.len;
|
||||
setValue(ptr + offset, buffer, 'i8*'); // decoding method
|
||||
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.maxActivePaths, 'i32');
|
||||
Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.enableEndpoint, 'i32');
|
||||
Module.setValue(ptr + offset, config.enableEndpoint, 'i32');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.rule1MinTrailingSilence, 'float');
|
||||
Module.setValue(ptr + offset, config.rule1MinTrailingSilence, 'float');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.rule2MinTrailingSilence, 'float');
|
||||
Module.setValue(ptr + offset, config.rule2MinTrailingSilence, 'float');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.rule3MinUtteranceLength, 'float');
|
||||
Module.setValue(ptr + offset, config.rule3MinUtteranceLength, 'float');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
|
||||
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.hotwordsScore, 'float');
|
||||
Module.setValue(ptr + offset, config.hotwordsScore, 'float');
|
||||
offset += 4;
|
||||
|
||||
return {
|
||||
@@ -229,21 +249,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config) {
|
||||
}
|
||||
|
||||
|
||||
function createRecognizer() {
|
||||
let onlineTransducerModelConfig = {
|
||||
function createOnlineRecognizer(Module, myConfig) {
|
||||
const onlineTransducerModelConfig = {
|
||||
encoder: '',
|
||||
decoder: '',
|
||||
joiner: '',
|
||||
}
|
||||
};
|
||||
|
||||
let onlineParaformerModelConfig = {
|
||||
const onlineParaformerModelConfig = {
|
||||
encoder: '',
|
||||
decoder: '',
|
||||
}
|
||||
};
|
||||
|
||||
let onlineZipformer2CtcModelConfig = {
|
||||
const onlineZipformer2CtcModelConfig = {
|
||||
model: '',
|
||||
}
|
||||
};
|
||||
|
||||
let type = 0;
|
||||
|
||||
@@ -266,7 +286,7 @@ function createRecognizer() {
|
||||
}
|
||||
|
||||
|
||||
let onlineModelConfig = {
|
||||
const onlineModelConfig = {
|
||||
transducer: onlineTransducerModelConfig,
|
||||
paraformer: onlineParaformerModelConfig,
|
||||
zipformer2Ctc: onlineZipformer2CtcModelConfig,
|
||||
@@ -275,12 +295,12 @@ function createRecognizer() {
|
||||
provider: 'cpu',
|
||||
debug: 1,
|
||||
modelType: '',
|
||||
}
|
||||
};
|
||||
|
||||
let featureConfig = {
|
||||
const featureConfig = {
|
||||
sampleRate: 16000,
|
||||
featureDim: 80,
|
||||
}
|
||||
};
|
||||
|
||||
let recognizerConfig = {
|
||||
featConfig: featureConfig,
|
||||
@@ -293,23 +313,336 @@ function createRecognizer() {
|
||||
rule3MinUtteranceLength: 20,
|
||||
hotwordsFile: '',
|
||||
hotwordsScore: 1.5,
|
||||
};
|
||||
if (myConfig) {
|
||||
recognizerConfig = myConfig;
|
||||
}
|
||||
|
||||
return new OnlineRecognizer(recognizerConfig);
|
||||
return new OnlineRecognizer(recognizerConfig, Module);
|
||||
}
|
||||
|
||||
class OnlineStream {
|
||||
constructor(handle) {
|
||||
function initSherpaOnnxOfflineTransducerModelConfig(config, Module) {
|
||||
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
|
||||
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
|
||||
const joinerLen = Module.lengthBytesUTF8(config.joiner) + 1;
|
||||
|
||||
const n = encoderLen + decoderLen + joinerLen;
|
||||
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 3 * 4; // 3 pointers
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||
offset += encoderLen;
|
||||
|
||||
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||
offset += decoderLen;
|
||||
|
||||
Module.stringToUTF8(config.joiner, buffer + offset, joinerLen);
|
||||
|
||||
offset = 0;
|
||||
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||
offset += encoderLen;
|
||||
|
||||
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||
offset += decoderLen;
|
||||
|
||||
Module.setValue(ptr + 8, buffer + offset, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineParaformerModelConfig(config, Module) {
|
||||
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 1 * 4; // 1 pointer
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
Module.stringToUTF8(config.model, buffer, n);
|
||||
|
||||
Module.setValue(ptr, buffer, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config, Module) {
|
||||
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 1 * 4; // 1 pointer
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
Module.stringToUTF8(config.model, buffer, n);
|
||||
|
||||
Module.setValue(ptr, buffer, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineWhisperModelConfig(config, Module) {
|
||||
const encoderLen = Module.lengthBytesUTF8(config.encoder) + 1;
|
||||
const decoderLen = Module.lengthBytesUTF8(config.decoder) + 1;
|
||||
|
||||
const n = encoderLen + decoderLen;
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 2 * 4; // 2 pointers
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
Module.stringToUTF8(config.encoder, buffer + offset, encoderLen);
|
||||
offset += encoderLen;
|
||||
|
||||
Module.stringToUTF8(config.decoder, buffer + offset, decoderLen);
|
||||
|
||||
offset = 0;
|
||||
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||
offset += encoderLen;
|
||||
|
||||
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
|
||||
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 1 * 4; // 1 pointer
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
Module.stringToUTF8(config.model, buffer, n);
|
||||
|
||||
Module.setValue(ptr, buffer, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineLMConfig(config, Module) {
|
||||
const n = Module.lengthBytesUTF8(config.model) + 1;
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
const len = 2 * 4;
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
Module.stringToUTF8(config.model, buffer, n);
|
||||
Module.setValue(ptr, buffer, 'i8*');
|
||||
Module.setValue(ptr + 4, config.scale, 'float');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineModelConfig(config, Module) {
|
||||
const transducer =
|
||||
initSherpaOnnxOfflineTransducerModelConfig(config.transducer, Module);
|
||||
const paraformer =
|
||||
initSherpaOnnxOfflineParaformerModelConfig(config.paraformer, Module);
|
||||
const nemoCtc =
|
||||
initSherpaOnnxOfflineNemoEncDecCtcModelConfig(config.nemoCtc, Module);
|
||||
const whisper =
|
||||
initSherpaOnnxOfflineWhisperModelConfig(config.whisper, Module);
|
||||
const tdnn = initSherpaOnnxOfflineTdnnModelConfig(config.tdnn, Module);
|
||||
|
||||
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
||||
tdnn.len + 5 * 4;
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
Module._CopyHeap(transducer.ptr, transducer.len, ptr + offset);
|
||||
offset += transducer.len;
|
||||
|
||||
Module._CopyHeap(paraformer.ptr, paraformer.len, ptr + offset);
|
||||
offset += paraformer.len;
|
||||
|
||||
Module._CopyHeap(nemoCtc.ptr, nemoCtc.len, ptr + offset);
|
||||
offset += nemoCtc.len;
|
||||
|
||||
Module._CopyHeap(whisper.ptr, whisper.len, ptr + offset);
|
||||
offset += whisper.len;
|
||||
|
||||
Module._CopyHeap(tdnn.ptr, tdnn.len, ptr + offset);
|
||||
offset += tdnn.len;
|
||||
|
||||
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
|
||||
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
|
||||
const modelTypeLen = Module.lengthBytesUTF8(config.modelType) + 1;
|
||||
const bufferLen = tokensLen + providerLen + modelTypeLen;
|
||||
const buffer = Module._malloc(bufferLen);
|
||||
|
||||
offset = 0;
|
||||
Module.stringToUTF8(config.tokens, buffer, tokensLen);
|
||||
offset += tokensLen;
|
||||
|
||||
Module.stringToUTF8(config.provider, buffer + offset, providerLen);
|
||||
offset += providerLen;
|
||||
|
||||
Module.stringToUTF8(config.modelType, buffer + offset, modelTypeLen);
|
||||
|
||||
offset =
|
||||
transducer.len + paraformer.len + nemoCtc.len + whisper.len + tdnn.len;
|
||||
Module.setValue(ptr + offset, buffer, 'i8*'); // tokens
|
||||
offset += 4;
|
||||
|
||||
Module.setValue(ptr + offset, config.numThreads, 'i32');
|
||||
offset += 4;
|
||||
|
||||
Module.setValue(ptr + offset, config.debug, 'i32');
|
||||
offset += 4;
|
||||
|
||||
Module.setValue(ptr + offset, buffer + tokensLen, 'i8*'); // provider
|
||||
offset += 4;
|
||||
|
||||
Module.setValue(
|
||||
ptr + offset, buffer + tokensLen + providerLen, 'i8*'); // modelType
|
||||
offset += 4;
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
||||
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
|
||||
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
|
||||
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
|
||||
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
|
||||
|
||||
const len = feat.len + model.len + lm.len + 4 * 4;
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
Module._CopyHeap(feat.ptr, feat.len, ptr + offset);
|
||||
offset += feat.len;
|
||||
|
||||
Module._CopyHeap(model.ptr, model.len, ptr + offset);
|
||||
offset += model.len;
|
||||
|
||||
Module._CopyHeap(lm.ptr, lm.len, ptr + offset);
|
||||
offset += lm.len;
|
||||
|
||||
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
|
||||
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
|
||||
const bufferLen = decodingMethodLen + hotwordsFileLen;
|
||||
const buffer = Module._malloc(bufferLen);
|
||||
|
||||
offset = 0;
|
||||
Module.stringToUTF8(config.decodingMethod, buffer, decodingMethodLen);
|
||||
offset += decodingMethodLen;
|
||||
|
||||
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
|
||||
|
||||
offset = feat.len + model.len + lm.len;
|
||||
|
||||
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
|
||||
offset += 4;
|
||||
|
||||
Module.setValue(ptr + offset, config.maxActivePaths, 'i32');
|
||||
offset += 4;
|
||||
|
||||
Module.setValue(ptr + offset, buffer + decodingMethodLen, 'i8*');
|
||||
offset += 4;
|
||||
|
||||
Module.setValue(ptr + offset, config.hotwordsScore, 'float');
|
||||
offset += 4;
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
|
||||
}
|
||||
}
|
||||
|
||||
class OfflineStream {
|
||||
constructor(handle, Module) {
|
||||
this.handle = handle;
|
||||
this.pointer = null; // buffer
|
||||
this.n = 0; // buffer size
|
||||
this.Module = Module;
|
||||
}
|
||||
|
||||
free() {
|
||||
if (this.handle) {
|
||||
_DestroyOnlineStream(this.handle);
|
||||
this.Module._DestroyOfflineStream(this.handle);
|
||||
this.handle = null;
|
||||
_free(this.pointer);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param sampleRate {Number}
|
||||
* @param samples {Float32Array} Containing samples in the range [-1, 1]
|
||||
*/
|
||||
acceptWaveform(sampleRate, samples) {
|
||||
const pointer =
|
||||
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
|
||||
this.Module.HEAPF32.set(samples, pointer / samples.BYTES_PER_ELEMENT);
|
||||
this.Module._AcceptWaveformOffline(
|
||||
this.handle, sampleRate, pointer, samples.length);
|
||||
this.Module._free(pointer);
|
||||
}
|
||||
};
|
||||
|
||||
class OfflineRecognizer {
|
||||
constructor(configObj, Module) {
|
||||
this.config = configObj;
|
||||
const config = initSherpaOnnxOfflineRecognizerConfig(configObj, Module);
|
||||
const handle = Module._CreateOfflineRecognizer(config.ptr);
|
||||
freeConfig(config, Module);
|
||||
|
||||
this.handle = handle;
|
||||
this.Module = Module;
|
||||
}
|
||||
|
||||
free() {
|
||||
this.Module._DestroyOfflineRecognizer(this.handle);
|
||||
this.handle = 0
|
||||
}
|
||||
|
||||
createStream() {
|
||||
const handle = this.Module._CreateOfflineStream(this.handle);
|
||||
return new OfflineStream(handle, this.Module);
|
||||
}
|
||||
|
||||
decode(stream) {
|
||||
this.Module._DecodeOfflineStream(this.handle, stream.handle);
|
||||
}
|
||||
|
||||
getResult(stream) {
|
||||
const r = this.Module._GetOfflineStreamResult(stream.handle);
|
||||
|
||||
const textPtr = this.Module.getValue(r, 'i8*');
|
||||
const text = this.Module.UTF8ToString(textPtr);
|
||||
|
||||
this.Module._DestroyOfflineRecognizerResult(r);
|
||||
return text;
|
||||
}
|
||||
};
|
||||
|
||||
class OnlineStream {
|
||||
constructor(handle, Module) {
|
||||
this.handle = handle;
|
||||
this.pointer = null; // buffer
|
||||
this.n = 0; // buffer size
|
||||
this.Module = Module;
|
||||
}
|
||||
|
||||
free() {
|
||||
if (this.handle) {
|
||||
this.Module._DestroyOnlineStream(this.handle);
|
||||
this.handle = null;
|
||||
this.Module._free(this.pointer);
|
||||
this.pointer = null;
|
||||
this.n = 0;
|
||||
}
|
||||
@@ -321,61 +654,73 @@ class OnlineStream {
|
||||
*/
|
||||
acceptWaveform(sampleRate, samples) {
|
||||
if (this.n < samples.length) {
|
||||
_free(this.pointer);
|
||||
this.pointer = _malloc(samples.length * samples.BYTES_PER_ELEMENT);
|
||||
this.Module._free(this.pointer);
|
||||
this.pointer =
|
||||
this.Module._malloc(samples.length * samples.BYTES_PER_ELEMENT);
|
||||
this.n = samples.length
|
||||
}
|
||||
|
||||
Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
|
||||
_AcceptWaveform(this.handle, sampleRate, this.pointer, samples.length);
|
||||
this.Module.HEAPF32.set(samples, this.pointer / samples.BYTES_PER_ELEMENT);
|
||||
this.Module._AcceptWaveform(
|
||||
this.handle, sampleRate, this.pointer, samples.length);
|
||||
}
|
||||
|
||||
inputFinished() {
|
||||
_InputFinished(this.handle);
|
||||
this.Module._InputFinished(this.handle);
|
||||
}
|
||||
};
|
||||
|
||||
class OnlineRecognizer {
|
||||
constructor(configObj) {
|
||||
let config = initSherpaOnnxOnlineRecognizerConfig(configObj)
|
||||
let handle = _CreateOnlineRecognizer(config.ptr);
|
||||
constructor(configObj, Module) {
|
||||
this.config = configObj;
|
||||
const config = initSherpaOnnxOnlineRecognizerConfig(configObj, Module)
|
||||
const handle = Module._CreateOnlineRecognizer(config.ptr);
|
||||
|
||||
freeConfig(config);
|
||||
freeConfig(config, Module);
|
||||
|
||||
this.handle = handle;
|
||||
this.Module = Module;
|
||||
}
|
||||
|
||||
free() {
|
||||
_DestroyOnlineRecognizer(this.handle);
|
||||
this.Module._DestroyOnlineRecognizer(this.handle);
|
||||
this.handle = 0
|
||||
}
|
||||
|
||||
createStream() {
|
||||
let handle = _CreateOnlineStream(this.handle);
|
||||
return new OnlineStream(handle);
|
||||
const handle = this.Module._CreateOnlineStream(this.handle);
|
||||
return new OnlineStream(handle, this.Module);
|
||||
}
|
||||
|
||||
isReady(stream) {
|
||||
return _IsOnlineStreamReady(this.handle, stream.handle) == 1;
|
||||
return this.Module._IsOnlineStreamReady(this.handle, stream.handle) == 1;
|
||||
}
|
||||
|
||||
decode(stream) {
|
||||
return _DecodeOnlineStream(this.handle, stream.handle);
|
||||
this.Module._DecodeOnlineStream(this.handle, stream.handle);
|
||||
}
|
||||
|
||||
isEndpoint(stream) {
|
||||
return _IsEndpoint(this.handle, stream.handle) == 1;
|
||||
return this.Module._IsEndpoint(this.handle, stream.handle) == 1;
|
||||
}
|
||||
|
||||
reset(stream) {
|
||||
_Reset(this.handle, stream.handle);
|
||||
this.Module._Reset(this.handle, stream.handle);
|
||||
}
|
||||
|
||||
getResult(stream) {
|
||||
let r = _GetOnlineStreamResult(this.handle, stream.handle);
|
||||
let textPtr = getValue(r, 'i8*');
|
||||
let text = UTF8ToString(textPtr);
|
||||
_DestroyOnlineRecognizerResult(r);
|
||||
const r = this.Module._GetOnlineStreamResult(this.handle, stream.handle);
|
||||
const textPtr = this.Module.getValue(r, 'i8*');
|
||||
const text = this.Module.UTF8ToString(textPtr);
|
||||
this.Module._DestroyOnlineRecognizerResult(r);
|
||||
return text;
|
||||
}
|
||||
}
|
||||
|
||||
if (typeof process == 'object' && typeof process.versions == 'object' &&
|
||||
typeof process.versions.node == 'string') {
|
||||
module.exports = {
|
||||
createOnlineRecognizer,
|
||||
OfflineRecognizer,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// wasm/sherpa-onnx-wasm-asr-main.cc
|
||||
// wasm/sherpa-onnx-wasm-main-asr.cc
|
||||
//
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
#include <stdio.h>
|
||||
|
||||
76
wasm/nodejs/CMakeLists.txt
Normal file
76
wasm/nodejs/CMakeLists.txt
Normal file
@@ -0,0 +1,76 @@
|
||||
if(NOT $ENV{SHERPA_ONNX_IS_USING_BUILD_WASM_SH})
|
||||
message(FATAL_ERROR "Please use ./build-wasm-simd-nodejs.sh to build for wasm NodeJS")
|
||||
endif()
|
||||
|
||||
set(exported_functions
|
||||
#tts
|
||||
PrintOfflineTtsConfig
|
||||
SherpaOnnxCreateOfflineTts
|
||||
SherpaOnnxDestroyOfflineTts
|
||||
SherpaOnnxDestroyOfflineTtsGeneratedAudio
|
||||
SherpaOnnxOfflineTtsGenerate
|
||||
SherpaOnnxOfflineTtsGenerateWithCallback
|
||||
SherpaOnnxOfflineTtsNumSpeakers
|
||||
SherpaOnnxOfflineTtsSampleRate
|
||||
SherpaOnnxWriteWave
|
||||
# streaming asr
|
||||
AcceptWaveform
|
||||
CreateOnlineRecognizer
|
||||
CreateOnlineStream
|
||||
DecodeOnlineStream
|
||||
DestroyOnlineRecognizer
|
||||
DestroyOnlineRecognizerResult
|
||||
DestroyOnlineStream
|
||||
GetOnlineStreamResult
|
||||
InputFinished
|
||||
IsEndpoint
|
||||
IsOnlineStreamReady
|
||||
Reset
|
||||
# non-streaming ASR
|
||||
PrintOfflineRecognizerConfig
|
||||
CreateOfflineRecognizer
|
||||
DestroyOfflineRecognizer
|
||||
CreateOfflineStream
|
||||
DestroyOfflineStream
|
||||
AcceptWaveformOffline
|
||||
DecodeOfflineStream
|
||||
DecodeMultipleOfflineStreams
|
||||
GetOfflineStreamResult
|
||||
DestroyOfflineRecognizerResult
|
||||
)
|
||||
|
||||
|
||||
set(mangled_exported_functions)
|
||||
foreach(x IN LISTS exported_functions)
|
||||
list(APPEND mangled_exported_functions "_${x}")
|
||||
endforeach()
|
||||
list(JOIN mangled_exported_functions "," all_exported_functions)
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
set(MY_FLAGS " -s FORCE_FILESYSTEM=1 -s INITIAL_MEMORY=512MB -s ALLOW_MEMORY_GROWTH=1")
|
||||
string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ") # 10MB
|
||||
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
|
||||
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue'] ")
|
||||
string(APPEND MY_FLAGS " -sNODERAWFS=1 ")
|
||||
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
||||
string(APPEND MY_FLAGS " -sMODULARIZE=1 -sWASM_ASYNC_COMPILATION=0 ")
|
||||
|
||||
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
|
||||
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
|
||||
|
||||
add_executable(sherpa-onnx-wasm-nodejs sherpa-onnx-wasm-nodejs.cc)
|
||||
target_link_libraries(sherpa-onnx-wasm-nodejs sherpa-onnx-core sherpa-onnx-c-api)
|
||||
install(TARGETS sherpa-onnx-wasm-nodejs DESTINATION bin/wasm/nodejs)
|
||||
|
||||
install(
|
||||
FILES
|
||||
${CMAKE_SOURCE_DIR}/wasm/asr/sherpa-onnx-asr.js
|
||||
${CMAKE_SOURCE_DIR}/wasm/tts/sherpa-onnx-tts.js
|
||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.js"
|
||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-nodejs>/sherpa-onnx-wasm-nodejs.wasm"
|
||||
DESTINATION
|
||||
bin/wasm/nodejs
|
||||
)
|
||||
104
wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
Normal file
104
wasm/nodejs/sherpa-onnx-wasm-nodejs.cc
Normal file
@@ -0,0 +1,104 @@
|
||||
// wasm/sherpa-onnx-wasm-main-nodejs.cc
|
||||
//
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
#include <stdio.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "sherpa-onnx/c-api/c-api.h"
|
||||
|
||||
extern "C" {
|
||||
|
||||
static_assert(sizeof(SherpaOnnxOfflineTransducerModelConfig) == 3 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
|
||||
|
||||
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 2 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineLMConfig) == 2 * 4, "");
|
||||
|
||||
static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
|
||||
sizeof(SherpaOnnxOfflineTransducerModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineParaformerModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineWhisperModelConfig) +
|
||||
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 5 * 4,
|
||||
"");
|
||||
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
|
||||
sizeof(SherpaOnnxFeatureConfig) +
|
||||
sizeof(SherpaOnnxOfflineLMConfig) +
|
||||
sizeof(SherpaOnnxOfflineModelConfig) + 4 * 4,
|
||||
"");
|
||||
|
||||
void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
|
||||
auto tts_model_config = &tts_config->model;
|
||||
auto vits_model_config = &tts_model_config->vits;
|
||||
fprintf(stdout, "----------vits model config----------\n");
|
||||
fprintf(stdout, "model: %s\n", vits_model_config->model);
|
||||
fprintf(stdout, "lexicon: %s\n", vits_model_config->lexicon);
|
||||
fprintf(stdout, "tokens: %s\n", vits_model_config->tokens);
|
||||
fprintf(stdout, "data_dir: %s\n", vits_model_config->data_dir);
|
||||
fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
|
||||
fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
|
||||
fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
|
||||
|
||||
fprintf(stdout, "----------tts model config----------\n");
|
||||
fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
|
||||
fprintf(stdout, "debug: %d\n", tts_model_config->debug);
|
||||
fprintf(stdout, "provider: %s\n", tts_model_config->provider);
|
||||
|
||||
fprintf(stdout, "----------tts config----------\n");
|
||||
fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
|
||||
fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
|
||||
}
|
||||
|
||||
void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
||||
auto model_config = &config->model_config;
|
||||
auto feat = &config->feat_config;
|
||||
auto transducer = &model_config->transducer;
|
||||
auto paraformer = &model_config->paraformer;
|
||||
auto nemo_ctc = &model_config->nemo_ctc;
|
||||
auto whisper = &model_config->whisper;
|
||||
auto tdnn = &model_config->tdnn;
|
||||
|
||||
fprintf(stdout, "----------offline transducer model config----------\n");
|
||||
fprintf(stdout, "encoder: %s\n", transducer->encoder);
|
||||
fprintf(stdout, "decoder: %s\n", transducer->decoder);
|
||||
fprintf(stdout, "joiner: %s\n", transducer->joiner);
|
||||
|
||||
fprintf(stdout, "----------offline paraformer model config----------\n");
|
||||
fprintf(stdout, "model: %s\n", paraformer->model);
|
||||
|
||||
fprintf(stdout, "----------offline nemo_ctc model config----------\n");
|
||||
fprintf(stdout, "model: %s\n", nemo_ctc->model);
|
||||
|
||||
fprintf(stdout, "----------offline whisper model config----------\n");
|
||||
fprintf(stdout, "encoder: %s\n", whisper->encoder);
|
||||
fprintf(stdout, "decoder: %s\n", whisper->decoder);
|
||||
|
||||
fprintf(stdout, "----------offline tdnn model config----------\n");
|
||||
fprintf(stdout, "model: %s\n", tdnn->model);
|
||||
|
||||
fprintf(stdout, "tokens: %s\n", model_config->tokens);
|
||||
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
|
||||
fprintf(stdout, "provider: %s\n", model_config->provider);
|
||||
fprintf(stdout, "debug: %d\n", model_config->debug);
|
||||
fprintf(stdout, "model type: %s\n", model_config->model_type);
|
||||
|
||||
fprintf(stdout, "----------feat config----------\n");
|
||||
fprintf(stdout, "sample rate: %d\n", feat->sample_rate);
|
||||
fprintf(stdout, "feat dim: %d\n", feat->feature_dim);
|
||||
|
||||
fprintf(stdout, "----------recognizer config----------\n");
|
||||
fprintf(stdout, "decoding method: %s\n", config->decoding_method);
|
||||
fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
|
||||
fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
|
||||
fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
|
||||
}
|
||||
|
||||
void CopyHeap(const char *src, int32_t num_bytes, char *dst) {
|
||||
std::copy(src, src + num_bytes, dst);
|
||||
}
|
||||
}
|
||||
@@ -22,7 +22,7 @@ Module.onRuntimeInitialized = function() {
|
||||
console.log('Model files downloaded!');
|
||||
|
||||
console.log('Initializing tts ......');
|
||||
tts = initSherpaOnnxOfflineTts()
|
||||
tts = createOfflineTts(Module)
|
||||
if (tts.numSpeakers > 1) {
|
||||
speakerIdLabel.innerHTML = `Speaker ID (0 - ${tts.numSpeakers - 1}):`;
|
||||
}
|
||||
|
||||
@@ -1,109 +1,109 @@
|
||||
|
||||
function freeConfig(config) {
|
||||
function freeConfig(config, Module) {
|
||||
if ('buffer' in config) {
|
||||
_free(config.buffer);
|
||||
Module._free(config.buffer);
|
||||
}
|
||||
|
||||
if ('config' in config) {
|
||||
freeConfig(config.config)
|
||||
freeConfig(config.config, Module)
|
||||
}
|
||||
|
||||
_free(config.ptr);
|
||||
Module._free(config.ptr);
|
||||
}
|
||||
|
||||
// The user should free the returned pointers
|
||||
function initSherpaOnnxOfflineTtsVitsModelConfig(config) {
|
||||
let modelLen = lengthBytesUTF8(config.model) + 1;
|
||||
let lexiconLen = lengthBytesUTF8(config.lexicon) + 1;
|
||||
let tokensLen = lengthBytesUTF8(config.tokens) + 1;
|
||||
let dataDirLen = lengthBytesUTF8(config.dataDir) + 1;
|
||||
function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
|
||||
const modelLen = Module.lengthBytesUTF8(config.model) + 1;
|
||||
const lexiconLen = Module.lengthBytesUTF8(config.lexicon) + 1;
|
||||
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
|
||||
const dataDirLen = Module.lengthBytesUTF8(config.dataDir) + 1;
|
||||
|
||||
let n = modelLen + lexiconLen + tokensLen + dataDirLen;
|
||||
const n = modelLen + lexiconLen + tokensLen + dataDirLen;
|
||||
|
||||
let buffer = _malloc(n);
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
let len = 7 * 4;
|
||||
let ptr = _malloc(len);
|
||||
const len = 7 * 4;
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
stringToUTF8(config.model, buffer + offset, modelLen);
|
||||
Module.stringToUTF8(config.model, buffer + offset, modelLen);
|
||||
offset += modelLen;
|
||||
|
||||
stringToUTF8(config.lexicon, buffer + offset, lexiconLen);
|
||||
Module.stringToUTF8(config.lexicon, buffer + offset, lexiconLen);
|
||||
offset += lexiconLen;
|
||||
|
||||
stringToUTF8(config.tokens, buffer + offset, tokensLen);
|
||||
Module.stringToUTF8(config.tokens, buffer + offset, tokensLen);
|
||||
offset += tokensLen;
|
||||
|
||||
stringToUTF8(config.dataDir, buffer + offset, dataDirLen);
|
||||
Module.stringToUTF8(config.dataDir, buffer + offset, dataDirLen);
|
||||
offset += dataDirLen;
|
||||
|
||||
offset = 0;
|
||||
setValue(ptr, buffer + offset, 'i8*');
|
||||
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||
offset += modelLen;
|
||||
|
||||
setValue(ptr + 4, buffer + offset, 'i8*');
|
||||
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||
offset += lexiconLen;
|
||||
|
||||
setValue(ptr + 8, buffer + offset, 'i8*');
|
||||
Module.setValue(ptr + 8, buffer + offset, 'i8*');
|
||||
offset += tokensLen;
|
||||
|
||||
setValue(ptr + 12, buffer + offset, 'i8*');
|
||||
Module.setValue(ptr + 12, buffer + offset, 'i8*');
|
||||
offset += dataDirLen;
|
||||
|
||||
setValue(ptr + 16, config.noiseScale, 'float');
|
||||
setValue(ptr + 20, config.noiseScaleW, 'float');
|
||||
setValue(ptr + 24, config.lengthScale, 'float');
|
||||
Module.setValue(ptr + 16, config.noiseScale, 'float');
|
||||
Module.setValue(ptr + 20, config.noiseScaleW, 'float');
|
||||
Module.setValue(ptr + 24, config.lengthScale, 'float');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineTtsModelConfig(config) {
|
||||
let vitsModelConfig =
|
||||
initSherpaOnnxOfflineTtsVitsModelConfig(config.offlineTtsVitsModelConfig);
|
||||
function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
|
||||
const vitsModelConfig = initSherpaOnnxOfflineTtsVitsModelConfig(
|
||||
config.offlineTtsVitsModelConfig, Module);
|
||||
|
||||
let len = vitsModelConfig.len + 3 * 4;
|
||||
let ptr = _malloc(len);
|
||||
const len = vitsModelConfig.len + 3 * 4;
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
_CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
|
||||
Module._CopyHeap(vitsModelConfig.ptr, vitsModelConfig.len, ptr + offset);
|
||||
offset += vitsModelConfig.len;
|
||||
|
||||
setValue(ptr + offset, config.numThreads, 'i32');
|
||||
Module.setValue(ptr + offset, config.numThreads, 'i32');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.debug, 'i32');
|
||||
Module.setValue(ptr + offset, config.debug, 'i32');
|
||||
offset += 4;
|
||||
|
||||
let providerLen = lengthBytesUTF8(config.provider) + 1;
|
||||
let buffer = _malloc(providerLen);
|
||||
stringToUTF8(config.provider, buffer, providerLen);
|
||||
setValue(ptr + offset, buffer, 'i8*');
|
||||
const providerLen = Module.lengthBytesUTF8(config.provider) + 1;
|
||||
const buffer = Module._malloc(providerLen);
|
||||
Module.stringToUTF8(config.provider, buffer, providerLen);
|
||||
Module.setValue(ptr + offset, buffer, 'i8*');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len, config: vitsModelConfig,
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineTtsConfig(config) {
|
||||
let modelConfig =
|
||||
initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig);
|
||||
let len = modelConfig.len + 2 * 4;
|
||||
let ptr = _malloc(len);
|
||||
function initSherpaOnnxOfflineTtsConfig(config, Module) {
|
||||
const modelConfig =
|
||||
initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module);
|
||||
const len = modelConfig.len + 2 * 4;
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
_CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
|
||||
Module._CopyHeap(modelConfig.ptr, modelConfig.len, ptr + offset);
|
||||
offset += modelConfig.len;
|
||||
|
||||
let ruleFstsLen = lengthBytesUTF8(config.ruleFsts) + 1;
|
||||
let buffer = _malloc(ruleFstsLen);
|
||||
stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
|
||||
setValue(ptr + offset, buffer, 'i8*');
|
||||
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts) + 1;
|
||||
const buffer = Module._malloc(ruleFstsLen);
|
||||
Module.stringToUTF8(config.ruleFsts, buffer, ruleFstsLen);
|
||||
Module.setValue(ptr + offset, buffer, 'i8*');
|
||||
offset += 4;
|
||||
|
||||
setValue(ptr + offset, config.maxNumSentences, 'i32');
|
||||
Module.setValue(ptr + offset, config.maxNumSentences, 'i32');
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len, config: modelConfig,
|
||||
@@ -111,19 +111,21 @@ function initSherpaOnnxOfflineTtsConfig(config) {
|
||||
}
|
||||
|
||||
class OfflineTts {
|
||||
constructor(configObj) {
|
||||
let config = initSherpaOnnxOfflineTtsConfig(configObj)
|
||||
let handle = _SherpaOnnxCreateOfflineTts(config.ptr);
|
||||
constructor(configObj, Module) {
|
||||
console.log(configObj)
|
||||
const config = initSherpaOnnxOfflineTtsConfig(configObj, Module)
|
||||
const handle = Module._SherpaOnnxCreateOfflineTts(config.ptr);
|
||||
|
||||
freeConfig(config);
|
||||
freeConfig(config, Module);
|
||||
|
||||
this.handle = handle;
|
||||
this.sampleRate = _SherpaOnnxOfflineTtsSampleRate(this.handle);
|
||||
this.numSpeakers = _SherpaOnnxOfflineTtsNumSpeakers(this.handle);
|
||||
this.sampleRate = Module._SherpaOnnxOfflineTtsSampleRate(this.handle);
|
||||
this.numSpeakers = Module._SherpaOnnxOfflineTtsNumSpeakers(this.handle);
|
||||
this.Module = Module
|
||||
}
|
||||
|
||||
free() {
|
||||
_SherpaOnnxDestroyOfflineTts(this.handle);
|
||||
this.Module._SherpaOnnxDestroyOfflineTts(this.handle);
|
||||
this.handle = 0
|
||||
}
|
||||
|
||||
@@ -133,29 +135,44 @@ class OfflineTts {
|
||||
// speed: 1.0
|
||||
// }
|
||||
generate(config) {
|
||||
let textLen = lengthBytesUTF8(config.text) + 1;
|
||||
let textPtr = _malloc(textLen);
|
||||
stringToUTF8(config.text, textPtr, textLen);
|
||||
const textLen = this.Module.lengthBytesUTF8(config.text) + 1;
|
||||
const textPtr = this.Module._malloc(textLen);
|
||||
this.Module.stringToUTF8(config.text, textPtr, textLen);
|
||||
|
||||
let h = _SherpaOnnxOfflineTtsGenerate(
|
||||
const h = this.Module._SherpaOnnxOfflineTtsGenerate(
|
||||
this.handle, textPtr, config.sid, config.speed);
|
||||
|
||||
let numSamples = HEAP32[h / 4 + 1];
|
||||
let sampleRate = HEAP32[h / 4 + 2];
|
||||
const numSamples = this.Module.HEAP32[h / 4 + 1];
|
||||
const sampleRate = this.Module.HEAP32[h / 4 + 2];
|
||||
|
||||
let samplesPtr = HEAP32[h / 4] / 4;
|
||||
let samples = new Float32Array(numSamples);
|
||||
const samplesPtr = this.Module.HEAP32[h / 4] / 4;
|
||||
const samples = new Float32Array(numSamples);
|
||||
for (let i = 0; i < numSamples; i++) {
|
||||
samples[i] = HEAPF32[samplesPtr + i];
|
||||
samples[i] = this.Module.HEAPF32[samplesPtr + i];
|
||||
}
|
||||
|
||||
_SherpaOnnxDestroyOfflineTtsGeneratedAudio(h);
|
||||
this.Module._SherpaOnnxDestroyOfflineTtsGeneratedAudio(h);
|
||||
return {samples: samples, sampleRate: sampleRate};
|
||||
}
|
||||
save(filename, audio) {
|
||||
const samples = audio.samples;
|
||||
const sampleRate = audio.sampleRate;
|
||||
const ptr = this.Module._malloc(samples.length * 4);
|
||||
for (let i = 0; i < samples.length; i++) {
|
||||
this.Module.HEAPF32[ptr / 4 + i] = samples[i];
|
||||
}
|
||||
|
||||
const filenameLen = this.Module.lengthBytesUTF8(filename) + 1;
|
||||
const buffer = this.Module._malloc(filenameLen);
|
||||
this.Module.stringToUTF8(filename, buffer, filenameLen);
|
||||
this.Module._SherpaOnnxWriteWave(ptr, samples.length, sampleRate, buffer);
|
||||
this.Module._free(buffer);
|
||||
this.Module._free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
function initSherpaOnnxOfflineTts() {
|
||||
let offlineTtsVitsModelConfig = {
|
||||
function createOfflineTts(Module, myConfig) {
|
||||
const offlineTtsVitsModelConfig = {
|
||||
model: './model.onnx',
|
||||
lexicon: '',
|
||||
tokens: './tokens.txt',
|
||||
@@ -164,7 +181,7 @@ function initSherpaOnnxOfflineTts() {
|
||||
noiseScaleW: 0.8,
|
||||
lengthScale: 1.0,
|
||||
};
|
||||
let offlineTtsModelConfig = {
|
||||
const offlineTtsModelConfig = {
|
||||
offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
|
||||
numThreads: 1,
|
||||
debug: 1,
|
||||
@@ -176,5 +193,16 @@ function initSherpaOnnxOfflineTts() {
|
||||
maxNumSentences: 1,
|
||||
}
|
||||
|
||||
return new OfflineTts(offlineTtsConfig);
|
||||
if (myConfig) {
|
||||
offlineTtsConfig = myConfig;
|
||||
}
|
||||
|
||||
return new OfflineTts(offlineTtsConfig, Module);
|
||||
}
|
||||
|
||||
if (typeof process == 'object' && typeof process.versions == 'object' &&
|
||||
typeof process.versions.node == 'string') {
|
||||
module.exports = {
|
||||
createOfflineTts,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// wasm/sherpa-onnx-wasm-main.cc
|
||||
// wasm/sherpa-onnx-wasm-main-tts.cc
|
||||
//
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
#include <stdio.h>
|
||||
|
||||
Reference in New Issue
Block a user