Inverse text normalization API of streaming ASR for various programming languages (#1022)
This commit is contained in:
@@ -40,6 +40,8 @@ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exp
|
||||
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
|
||||
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
||||
|
||||
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
|
||||
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
|
||||
|
||||
@@ -239,7 +239,7 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
||||
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
|
||||
config.ctcFstDecoderConfig, Module)
|
||||
|
||||
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len;
|
||||
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 2 * 4;
|
||||
const ptr = Module._malloc(len);
|
||||
|
||||
let offset = 0;
|
||||
@@ -251,7 +251,10 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
||||
|
||||
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
|
||||
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
|
||||
const bufferLen = decodingMethodLen + hotwordsFileLen;
|
||||
const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
|
||||
const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
|
||||
const bufferLen =
|
||||
decodingMethodLen + hotwordsFileLen + ruleFstsFileLen + ruleFarsFileLen;
|
||||
const buffer = Module._malloc(bufferLen);
|
||||
|
||||
offset = 0;
|
||||
@@ -259,6 +262,13 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
||||
offset += decodingMethodLen;
|
||||
|
||||
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
|
||||
offset += hotwordsFileLen;
|
||||
|
||||
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen);
|
||||
offset += ruleFstsFileLen;
|
||||
|
||||
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsFileLen);
|
||||
offset += ruleFarsFileLen;
|
||||
|
||||
offset = feat.len + model.len;
|
||||
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
|
||||
@@ -286,6 +296,16 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
|
||||
offset += 4;
|
||||
|
||||
Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset);
|
||||
offset += ctcFstDecoder.len;
|
||||
|
||||
Module.setValue(
|
||||
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
|
||||
offset += 4;
|
||||
|
||||
Module.setValue(
|
||||
ptr + offset,
|
||||
buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen, 'i8*');
|
||||
offset += 4;
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
|
||||
@@ -363,7 +383,9 @@ function createOnlineRecognizer(Module, myConfig) {
|
||||
ctcFstDecoderConfig: {
|
||||
graph: '',
|
||||
maxActive: 3000,
|
||||
}
|
||||
},
|
||||
ruleFsts: '',
|
||||
ruleFars: '',
|
||||
};
|
||||
if (myConfig) {
|
||||
recognizerConfig = myConfig;
|
||||
|
||||
@@ -26,7 +26,7 @@ static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
|
||||
sizeof(SherpaOnnxFeatureConfig) +
|
||||
sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
|
||||
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig),
|
||||
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 2 * 4,
|
||||
"");
|
||||
|
||||
void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
|
||||
@@ -71,6 +71,8 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
|
||||
config->rule3_min_utterance_length);
|
||||
fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
|
||||
fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
|
||||
fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
|
||||
fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
|
||||
|
||||
fprintf(stdout, "----------ctc fst decoder config----------\n");
|
||||
fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);
|
||||
|
||||
@@ -31,6 +31,7 @@ string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ")
|
||||
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
|
||||
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
|
||||
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
||||
|
||||
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
||||
@@ -51,4 +52,4 @@ install(
|
||||
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.data"
|
||||
DESTINATION
|
||||
bin/wasm
|
||||
)
|
||||
)
|
||||
|
||||
@@ -31,6 +31,8 @@ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exp
|
||||
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
|
||||
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
|
||||
|
||||
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
|
||||
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")
|
||||
|
||||
Reference in New Issue
Block a user