Add JavaScript API (WebAssembly) for FireRedAsr model. (#1874)
This commit is contained in:
@@ -14,6 +14,7 @@ find dart-api-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.43/1\.10\
|
|||||||
find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
find flutter-examples -name *.yaml -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
||||||
find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
find flutter -name *.podspec -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
||||||
find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
find nodejs-addon-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
||||||
|
find nodejs-examples -name package.json -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
||||||
|
|
||||||
find harmony-os -name "README.md" -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
find harmony-os -name "README.md" -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
||||||
find harmony-os -name oh-package.json5 -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
find harmony-os -name oh-package.json5 -type f -exec sed -i.bak 's/1\.10\.43/1\.10\.44/g' {} \;
|
||||||
|
|||||||
@@ -216,6 +216,21 @@ tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
|
|||||||
node ./test-offline-whisper.js
|
node ./test-offline-whisper.js
|
||||||
```
|
```
|
||||||
|
|
||||||
|
## ./test-offline-fire-red-asr.js
|
||||||
|
|
||||||
|
[./test-offline-fire-red-asr.js](./test-offline-fire-red-asr.js) demonstrates
|
||||||
|
how to decode a file with a FireRedAsr AED model.
|
||||||
|
|
||||||
|
You can use the following command to run it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
|
||||||
|
rm sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2
|
||||||
|
|
||||||
|
node ./test-offline-fire-red-asr.js
|
||||||
|
```
|
||||||
|
|
||||||
## ./test-offline-moonshine.js
|
## ./test-offline-moonshine.js
|
||||||
|
|
||||||
[./test-offline-moonshine.js](./test-offline-moonshine.js) demonstrates
|
[./test-offline-moonshine.js](./test-offline-moonshine.js) demonstrates
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{
|
{
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"naudiodon2": "^2.4.0",
|
"naudiodon2": "^2.4.0",
|
||||||
"sherpa-onnx": "*",
|
"sherpa-onnx": "^1.10.44",
|
||||||
"wav": "^1.0.2"
|
"wav": "^1.0.2"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
37
nodejs-examples/test-offline-fire-red-asr.js
Normal file
37
nodejs-examples/test-offline-fire-red-asr.js
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
// Copyright (c) 2025 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const sherpa_onnx = require('sherpa-onnx');
|
||||||
|
|
||||||
|
function createOfflineRecognizer() {
|
||||||
|
let modelConfig = {
|
||||||
|
fireRedAsr: {
|
||||||
|
encoder:
|
||||||
|
'./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/encoder.int8.onnx',
|
||||||
|
decoder:
|
||||||
|
'./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/decoder.int8.onnx',
|
||||||
|
},
|
||||||
|
tokens: './sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/tokens.txt',
|
||||||
|
debug: 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
let config = {
|
||||||
|
modelConfig: modelConfig,
|
||||||
|
};
|
||||||
|
|
||||||
|
return sherpa_onnx.createOfflineRecognizer(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
recognizer = createOfflineRecognizer();
|
||||||
|
stream = recognizer.createStream();
|
||||||
|
|
||||||
|
const waveFilename =
|
||||||
|
'./sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16/test_wavs/0.wav';
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform(wave.sampleRate, wave.samples);
|
||||||
|
|
||||||
|
recognizer.decode(stream);
|
||||||
|
const text = recognizer.getResult(stream).text;
|
||||||
|
console.log(text);
|
||||||
|
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
@@ -35,6 +35,10 @@ function freeConfig(config, Module) {
|
|||||||
freeConfig(config.whisper, Module)
|
freeConfig(config.whisper, Module)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ('fireRedAsr' in config) {
|
||||||
|
freeConfig(config.fireRedAsr, Module)
|
||||||
|
}
|
||||||
|
|
||||||
if ('moonshine' in config) {
|
if ('moonshine' in config) {
|
||||||
freeConfig(config.moonshine, Module)
|
freeConfig(config.moonshine, Module)
|
||||||
}
|
}
|
||||||
@@ -651,6 +655,35 @@ function initSherpaOnnxOfflineMoonshineModelConfig(config, Module) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function initSherpaOnnxOfflineFireRedAsrModelConfig(config, Module) {
|
||||||
|
const encoderLen = Module.lengthBytesUTF8(config.encoder || '') + 1;
|
||||||
|
const decoderLen = Module.lengthBytesUTF8(config.decoder || '') + 1;
|
||||||
|
|
||||||
|
const n = encoderLen + decoderLen;
|
||||||
|
const buffer = Module._malloc(n);
|
||||||
|
|
||||||
|
const len = 2 * 4; // 2 pointers
|
||||||
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
|
let offset = 0;
|
||||||
|
Module.stringToUTF8(config.encoder || '', buffer + offset, encoderLen);
|
||||||
|
offset += encoderLen;
|
||||||
|
|
||||||
|
Module.stringToUTF8(config.decoder || '', buffer + offset, decoderLen);
|
||||||
|
offset += decoderLen;
|
||||||
|
|
||||||
|
offset = 0;
|
||||||
|
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||||
|
offset += encoderLen;
|
||||||
|
|
||||||
|
Module.setValue(ptr + 4, buffer + offset, 'i8*');
|
||||||
|
offset += decoderLen;
|
||||||
|
|
||||||
|
return {
|
||||||
|
buffer: buffer, ptr: ptr, len: len,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
|
function initSherpaOnnxOfflineTdnnModelConfig(config, Module) {
|
||||||
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
const n = Module.lengthBytesUTF8(config.model || '') + 1;
|
||||||
const buffer = Module._malloc(n);
|
const buffer = Module._malloc(n);
|
||||||
@@ -755,6 +788,13 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!('fireRedAsr' in config)) {
|
||||||
|
config.fireRedAsr = {
|
||||||
|
encoder: '',
|
||||||
|
decoder: '',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
if (!('tdnn' in config)) {
|
if (!('tdnn' in config)) {
|
||||||
config.tdnn = {
|
config.tdnn = {
|
||||||
model: '',
|
model: '',
|
||||||
@@ -789,8 +829,11 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
|||||||
const moonshine =
|
const moonshine =
|
||||||
initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module);
|
initSherpaOnnxOfflineMoonshineModelConfig(config.moonshine, Module);
|
||||||
|
|
||||||
|
const fireRedAsr =
|
||||||
|
initSherpaOnnxOfflineFireRedAsrModelConfig(config.fireRedAsr, Module);
|
||||||
|
|
||||||
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
const len = transducer.len + paraformer.len + nemoCtc.len + whisper.len +
|
||||||
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len;
|
tdnn.len + 8 * 4 + senseVoice.len + moonshine.len + fireRedAsr.len;
|
||||||
|
|
||||||
const ptr = Module._malloc(len);
|
const ptr = Module._malloc(len);
|
||||||
|
|
||||||
@@ -884,11 +927,15 @@ function initSherpaOnnxOfflineModelConfig(config, Module) {
|
|||||||
offset += senseVoice.len;
|
offset += senseVoice.len;
|
||||||
|
|
||||||
Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset);
|
Module._CopyHeap(moonshine.ptr, moonshine.len, ptr + offset);
|
||||||
|
offset += moonshine.len;
|
||||||
|
|
||||||
|
Module._CopyHeap(fireRedAsr.ptr, fireRedAsr.len, ptr + offset);
|
||||||
|
offset += fireRedAsr.len;
|
||||||
|
|
||||||
return {
|
return {
|
||||||
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
buffer: buffer, ptr: ptr, len: len, transducer: transducer,
|
||||||
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
paraformer: paraformer, nemoCtc: nemoCtc, whisper: whisper, tdnn: tdnn,
|
||||||
senseVoice: senseVoice, moonshine: moonshine,
|
senseVoice: senseVoice, moonshine: moonshine, fireRedAsr: fireRedAsr
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ static_assert(sizeof(SherpaOnnxOfflineParaformerModelConfig) == 4, "");
|
|||||||
|
|
||||||
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
|
static_assert(sizeof(SherpaOnnxOfflineNemoEncDecCtcModelConfig) == 4, "");
|
||||||
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
|
static_assert(sizeof(SherpaOnnxOfflineWhisperModelConfig) == 5 * 4, "");
|
||||||
|
static_assert(sizeof(SherpaOnnxOfflineFireRedAsrModelConfig) == 2 * 4, "");
|
||||||
static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, "");
|
static_assert(sizeof(SherpaOnnxOfflineMoonshineModelConfig) == 4 * 4, "");
|
||||||
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
|
static_assert(sizeof(SherpaOnnxOfflineTdnnModelConfig) == 4, "");
|
||||||
static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
|
static_assert(sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) == 3 * 4, "");
|
||||||
@@ -27,7 +28,9 @@ static_assert(sizeof(SherpaOnnxOfflineModelConfig) ==
|
|||||||
sizeof(SherpaOnnxOfflineWhisperModelConfig) +
|
sizeof(SherpaOnnxOfflineWhisperModelConfig) +
|
||||||
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 +
|
sizeof(SherpaOnnxOfflineTdnnModelConfig) + 8 * 4 +
|
||||||
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
|
sizeof(SherpaOnnxOfflineSenseVoiceModelConfig) +
|
||||||
sizeof(SherpaOnnxOfflineMoonshineModelConfig),
|
sizeof(SherpaOnnxOfflineMoonshineModelConfig) +
|
||||||
|
sizeof(SherpaOnnxOfflineFireRedAsrModelConfig),
|
||||||
|
|
||||||
"");
|
"");
|
||||||
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
|
static_assert(sizeof(SherpaOnnxFeatureConfig) == 2 * 4, "");
|
||||||
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
|
static_assert(sizeof(SherpaOnnxOfflineRecognizerConfig) ==
|
||||||
@@ -69,6 +72,7 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
|||||||
auto tdnn = &model_config->tdnn;
|
auto tdnn = &model_config->tdnn;
|
||||||
auto sense_voice = &model_config->sense_voice;
|
auto sense_voice = &model_config->sense_voice;
|
||||||
auto moonshine = &model_config->moonshine;
|
auto moonshine = &model_config->moonshine;
|
||||||
|
auto fire_red_asr = &model_config->fire_red_asr;
|
||||||
|
|
||||||
fprintf(stdout, "----------offline transducer model config----------\n");
|
fprintf(stdout, "----------offline transducer model config----------\n");
|
||||||
fprintf(stdout, "encoder: %s\n", transducer->encoder);
|
fprintf(stdout, "encoder: %s\n", transducer->encoder);
|
||||||
@@ -102,6 +106,10 @@ void PrintOfflineRecognizerConfig(SherpaOnnxOfflineRecognizerConfig *config) {
|
|||||||
fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder);
|
fprintf(stdout, "uncached_decoder: %s\n", moonshine->uncached_decoder);
|
||||||
fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder);
|
fprintf(stdout, "cached_decoder: %s\n", moonshine->cached_decoder);
|
||||||
|
|
||||||
|
fprintf(stdout, "----------offline FireRedAsr model config----------\n");
|
||||||
|
fprintf(stdout, "encoder: %s\n", fire_red_asr->encoder);
|
||||||
|
fprintf(stdout, "decoder: %s\n", fire_red_asr->decoder);
|
||||||
|
|
||||||
fprintf(stdout, "tokens: %s\n", model_config->tokens);
|
fprintf(stdout, "tokens: %s\n", model_config->tokens);
|
||||||
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
|
fprintf(stdout, "num_threads: %d\n", model_config->num_threads);
|
||||||
fprintf(stdout, "provider: %s\n", model_config->provider);
|
fprintf(stdout, "provider: %s\n", model_config->provider);
|
||||||
|
|||||||
Reference in New Issue
Block a user