Add JavaScript API (WASM) for homophone replacer (#2157)

This commit is contained in:
Fangjun Kuang
2025-04-28 20:47:49 +08:00
committed by GitHub
parent e51c37eb2f
commit a0aef1f6cd
13 changed files with 165 additions and 24 deletions

View File

@@ -63,6 +63,10 @@ function freeConfig(config, Module) {
freeConfig(config.ctcFstDecoder, Module)
}
if ('hr' in config) {
freeConfig(config.hr, Module)
}
Module._free(config.ptr);
}
@@ -281,6 +285,34 @@ function initSherpaOnnxFeatureConfig(config, Module) {
return {ptr: ptr, len: len};
}
function initSherpaOnnxHomophoneReplacerConfig(config, Module) {
const len = 3 * 4;
const ptr = Module._malloc(len);
const dictDirLen = Module.lengthBytesUTF8(config.dictDir || '') + 1;
const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
const bufferLen = dictDirLen + lexiconLen + ruleFstsLen;
const buffer = Module._malloc(bufferLen);
let offset = 0
Module.stringToUTF8(config.dictDir || '', buffer + offset, dictDirLen);
offset += dictDirLen;
Module.stringToUTF8(config.lexicon || '', buffer + offset, lexiconLen);
offset += lexiconLen;
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
offset += ruleFstsLen;
Module.setValue(ptr, buffer, 'i8*');
Module.setValue(ptr + 4, buffer + dictDirLen, 'i8*');
Module.setValue(ptr + 8, buffer + dictDirLen + lexiconLen, 'i8*');
return {ptr: ptr, len: len, buffer: buffer};
}
function initSherpaOnnxOnlineCtcFstDecoderConfig(config, Module) {
const len = 2 * 4;
const ptr = Module._malloc(len);
@@ -317,12 +349,21 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
config.hotwordsBufSize = 0;
}
if (!('hr' in config)) {
config.hr = {
dictDir: '',
lexicon: '',
ruleFsts: '',
};
}
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOnlineModelConfig(config.modelConfig, Module);
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
config.ctcFstDecoderConfig, Module)
const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4;
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 5 * 4 + hr.len;
const ptr = Module._malloc(len);
let offset = 0;
@@ -411,9 +452,12 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
Module.setValue(ptr + offset, config.hotwordsBufSize || 0, 'i32');
offset += 4;
Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
offset += hr.len;
return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
ctcFstDecoder: ctcFstDecoder
ctcFstDecoder: ctcFstDecoder, hr: hr,
}
}
@@ -989,11 +1033,20 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
};
}
if (!('hr' in config)) {
config.hr = {
dictDir: '',
lexicon: '',
ruleFsts: '',
};
}
const feat = initSherpaOnnxFeatureConfig(config.featConfig, Module);
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
const hr = initSherpaOnnxHomophoneReplacerConfig(config.hr, Module);
const len = feat.len + model.len + lm.len + 7 * 4;
const len = feat.len + model.len + lm.len + 7 * 4 + hr.len;
const ptr = Module._malloc(len);
let offset = 0;
@@ -1056,8 +1109,12 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
Module.setValue(ptr + offset, config.blankPenalty || 0, 'float');
offset += 4;
Module._CopyHeap(hr.ptr, hr.len, ptr + offset);
offset += hr.len;
return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm,
hr: hr,
}
}