Inverse text normalization API for other programming languages (#1019)

This commit is contained in:
Fangjun Kuang
2024-06-17 17:02:39 +08:00
committed by GitHub
parent b0f7ed3ee3
commit 6e09933d99
39 changed files with 669 additions and 104 deletions

View File

@@ -628,7 +628,7 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
const model = initSherpaOnnxOfflineModelConfig(config.modelConfig, Module);
const lm = initSherpaOnnxOfflineLMConfig(config.lmConfig, Module);
const len = feat.len + model.len + lm.len + 4 * 4;
const len = feat.len + model.len + lm.len + 6 * 4;
const ptr = Module._malloc(len);
let offset = 0;
@@ -643,7 +643,10 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
const decodingMethodLen = Module.lengthBytesUTF8(config.decodingMethod) + 1;
const hotwordsFileLen = Module.lengthBytesUTF8(config.hotwordsFile) + 1;
const bufferLen = decodingMethodLen + hotwordsFileLen;
const ruleFstsLen = Module.lengthBytesUTF8(config.ruleFsts || '') + 1;
const ruleFarsLen = Module.lengthBytesUTF8(config.ruleFars || '') + 1;
const bufferLen =
decodingMethodLen + hotwordsFileLen + ruleFstsLen + ruleFarsLen;
const buffer = Module._malloc(bufferLen);
offset = 0;
@@ -651,6 +654,13 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
offset += decodingMethodLen;
Module.stringToUTF8(config.hotwordsFile, buffer + offset, hotwordsFileLen);
offset += hotwordsFileLen;
Module.stringToUTF8(config.ruleFsts || '', buffer + offset, ruleFstsLen);
offset += ruleFstsLen;
Module.stringToUTF8(config.ruleFars || '', buffer + offset, ruleFarsLen);
offset += ruleFarsLen;
offset = feat.len + model.len + lm.len;
@@ -666,6 +676,15 @@ function initSherpaOnnxOfflineRecognizerConfig(config, Module) {
Module.setValue(ptr + offset, config.hotwordsScore, 'float');
offset += 4;
Module.setValue(
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen, 'i8*');
offset += 4;
Module.setValue(
ptr + offset, buffer + decodingMethodLen + hotwordsFileLen + ruleFstsLen,
'i8*');
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, feat: feat, model: model, lm: lm
}

View File

@@ -29,7 +29,7 @@ static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
sizeof(SherpaOnnxFeatureConfig) +
sizeof(SherpaOnnxOfflineLMConfig) +
sizeof(SherpaOnnxOfflineModelConfig) + 4 * 4,
sizeof(SherpaOnnxOfflineModelConfig) + 6 * 4,
"");
void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
@@ -103,6 +103,8 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
fprintf(stdout, "max active paths: %d\n", config->max_active_paths);
fprintf(stdout, "hotwords_file: %s\n", config->hotwords_file);
fprintf(stdout, "hotwords_score: %.2f\n", config->hotwords_score);
fprintf(stdout, "rule_fsts: %s\n", config->rule_fsts);
fprintf(stdout, "rule_fars: %s\n", config->rule_fars);
}
void CopyHeap(const char *src, int32_t num_bytes, char *dst) {