Inverse text normalization API of streaming ASR for various programming languages (#1022)

This commit is contained in:
Fangjun Kuang
2024-06-18 13:42:17 +08:00
committed by GitHub
parent 349d957da2
commit 6789c909d2
64 changed files with 849 additions and 55 deletions

View File

@@ -40,6 +40,8 @@ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exp
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")

View File

@@ -239,7 +239,7 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
const ctcFstDecoder = initSherpaOnnxOnlineCtcFstDecoderConfig(
config.ctcFstDecoderConfig, Module)
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len;
const len = feat.len + model.len + 8 * 4 + ctcFstDecoder.len + 2 * 4;
const ptr = Module._malloc(len);
let offset = 0;
@@ -251,7 +251,10 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
const bufferLen = decodingMethodLen + hotwordsFileLen;
const ruleFstsFileLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
const ruleFarsFileLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
const bufferLen =
decodingMethodLen + hotwordsFileLen + ruleFstsFileLen + ruleFarsFileLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
@@ -259,6 +262,13 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
offset += decodingMethodLen;
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
offset += hotwordsFileLen;
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsFileLen);
offset += ruleFstsFileLen;
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsFileLen);
offset += ruleFarsFileLen;
offset = feat.len + model.len;
Module.setValue(ptr + offset, buffer, 'i8*'); // decoding method
@@ -286,6 +296,16 @@ function initSherpaOnnxOnlineRecognizerConfig(config, Module) {
offset += 4;
Module._CopyHeap(ctcFstDecoder.ptr, ctcFstDecoder.len, ptr + offset);
offset += ctcFstDecoder.len;
Module.setValue(
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
offset += 4;
Module.setValue(
ptr + offset,
buffer + decodingMethodLen + hotwordsFileLen + ruleFstsFileLen, 'i8*');
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model,
@@ -363,7 +383,9 @@ function createOnlineRecognizer(Module, myConfig) {
ctcFstDecoderConfig: {
graph: '',
maxActive: 3000,
}
},
ruleFsts: '',
ruleFars: '',
};
if (myConfig) {
recognizerConfig = myConfig;

View File

@@ -26,7 +26,7 @@ static_assert(sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOnlineRecognizerConfig) ==
sizeof(SherpaOnnxFeatureConfig) +
sizeof(SherpaOnnxOnlineModelConfig) + 8 * 4 +
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig),
sizeof(SherpaOnnxOnlineCtcFstDecoderConfig) + 2 * 4,
"");
void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
@@ -71,6 +71,8 @@ void MyPrint(SherpaOnnxOnlineRecognizerConfig *config) {
config->rule3_min_utterance_length);
fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
fprintf(stdout, "----------ctc fst decoder config----------\n");
fprintf(stdout, "graph: %s\n", config->ctc_fst_decoder_config.graph);

View File

@@ -31,6 +31,7 @@ string(APPEND MY_FLAGS " -sSTACK_SIZE=10485760 ")
string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exported_functions}] ")
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
@@ -51,4 +52,4 @@ install(
"$<TARGET_FILE_DIR:sherpa-onnx-wasm-kws-main>/sherpa-onnx-wasm-kws-main.data"
DESTINATION
bin/wasm
)
)

View File

@@ -31,6 +31,8 @@ string(APPEND MY_FLAGS " -sEXPORTED_FUNCTIONS=[_CopyHeap,_malloc,_free,${all_exp
string(APPEND MY_FLAGS "--preload-file ${CMAKE_CURRENT_SOURCE_DIR}/assets@. ")
string(APPEND MY_FLAGS " -sEXPORTED_RUNTIME_METHODS=['ccall','stringToUTF8','setValue','getValue','lengthBytesUTF8','UTF8ToString'] ")
message(STATUS "MY_FLAGS: ${MY_FLAGS}")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${MY_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MY_FLAGS}")
set(CMAKE_EXECUTBLE_LINKER_FLAGS "${CMAKE_EXECUTBLE_LINKER_FLAGS} ${MY_FLAGS}")