From 99defc5b90a1976ea243ee54fcd1c2ea6647e2f0 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 15 May 2025 11:27:22 +0800 Subject: [PATCH] Add nodejs example for parakeet-tdt-0.6b-v2. (#2219) --- .github/scripts/test-nodejs-addon-npm.sh | 8 +++ nodejs-addon-examples/README.md | 11 ++++ ..._asr_non_streaming_nemo_parakeet_tdt_v2.js | 51 +++++++++++++++++++ 3 files changed, 70 insertions(+) create mode 100644 nodejs-addon-examples/test_asr_non_streaming_nemo_parakeet_tdt_v2.js diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh index 73b99798..49bd6bbd 100755 --- a/.github/scripts/test-nodejs-addon-npm.sh +++ b/.github/scripts/test-nodejs-addon-npm.sh @@ -10,6 +10,14 @@ arch=$(node -p "require('os').arch()") platform=$(node -p "require('os').platform()") node_version=$(node -p "process.versions.node.split('.')[0]") +echo "----------non-streaming ASR NeMo parakeet tdt----------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 +tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 +rm sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 + +node ./test_asr_non_streaming_nemo_parakeet_tdt_v2.js +rm -rf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8 + echo "----------non-streaming ASR dolphin CTC----------" curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 diff --git a/nodejs-addon-examples/README.md b/nodejs-addon-examples/README.md index 00edb064..88de0d0e 100644 --- a/nodejs-addon-examples/README.md +++ b/nodejs-addon-examples/README.md @@ -123,6 +123,7 @@ The following tables list the examples in this folder. |[./test_asr_non_streaming_moonshine.js](./test_asr_non_streaming_moonshine.js)|Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine)| |[./test_vad_with_non_streaming_asr_moonshine.js](./test_vad_with_non_streaming_asr_moonshine.js)| Non-streaming speech recognition from a file using [Moonshine](https://github.com/usefulsensors/moonshine) + [Silero VAD](https://github.com/snakers4/silero-vad)| |[./test_asr_non_streaming_nemo_ctc.js](./test_asr_non_streaming_nemo_ctc.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) CTC model with greedy search| +|[./test_asr_non_streaming_nemo_parakeet_tdt_v2.js](./test_asr_non_streaming_nemo_parakeet_tdt_v2.js)|Non-streaming speech recognition from a file using a [NeMo](https://github.com/NVIDIA/NeMo) [parakeet-tdt-0.6b-v2](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/offline-transducer/nemo-transducer-models.html#sherpa-onnx-nemo-parakeet-tdt-0-6b-v2-int8-english) model with greedy search| |[./test_asr_non_streaming_dolphin_ctc.js](./test_asr_non_streaming_dolphin_ctc.js)|Non-streaming speech recognition from a file using a [Dolphinhttps://github.com/DataoceanAI/Dolphin]) CTC model with greedy search| |[./test_asr_non_streaming_paraformer.js](./test_asr_non_streaming_paraformer.js)|Non-streaming speech recognition from a file using [Paraformer](https://github.com/alibaba-damo-academy/FunASR)| |[./test_asr_non_streaming_sense_voice.js](./test_asr_non_streaming_sense_voice.js)|Non-streaming speech recognition from a file using [SenseVoice](https://github.com/FunAudioLLM/SenseVoice)| @@ -361,6 +362,16 @@ rm sherpa-onnx-dolphin-base-ctc-multi-lang-int8-2025-04-02.tar.bz2 node ./test_asr_non_streaming_dolphin_ctc.js ``` +### Non-streaming speech recognition with NeMo parakeet-tdt-0.6b-v2 models + +```bash +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 +tar xvf sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 +rm sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8.tar.bz2 + +node ./test_asr_non_streaming_nemo_parakeet_tdt_v2.js +``` + ### Non-streaming speech recognition with NeMo CTC models ```bash diff --git a/nodejs-addon-examples/test_asr_non_streaming_nemo_parakeet_tdt_v2.js b/nodejs-addon-examples/test_asr_non_streaming_nemo_parakeet_tdt_v2.js new file mode 100644 index 00000000..ac3517c3 --- /dev/null +++ b/nodejs-addon-examples/test_asr_non_streaming_nemo_parakeet_tdt_v2.js @@ -0,0 +1,51 @@ +// Copyright (c) 2025 Xiaomi Corporation +const sherpa_onnx = require('sherpa-onnx-node'); + +// Please download test files from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models +const config = { + 'featConfig': { + 'sampleRate': 16000, + 'featureDim': 80, + }, + 'modelConfig': { + 'transducer': { + 'encoder': + './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/encoder.int8.onnx', + 'decoder': + './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/decoder.int8.onnx', + 'joiner': './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/joiner.int8.onnx', + }, + 'tokens': './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/tokens.txt', + 'numThreads': 2, + 'provider': 'cpu', + 'debug': 1, + 'modelType': 'nemo_transducer', + } +}; + +const waveFilename = + './sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8/test_wavs/0.wav'; + +const recognizer = new sherpa_onnx.OfflineRecognizer(config); +console.log('Started') +let start = Date.now(); +const stream = recognizer.createStream(); +const wave = sherpa_onnx.readWave(waveFilename); +stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); + +recognizer.decode(stream); +result = recognizer.getResult(stream) +let stop = Date.now(); +console.log('Done') + +const elapsed_seconds = (stop - start) / 1000; +const duration = wave.samples.length / wave.sampleRate; +const real_time_factor = elapsed_seconds / duration; +console.log('Wave duration', duration.toFixed(3), 'seconds') +console.log('Elapsed', elapsed_seconds.toFixed(3), 'seconds') +console.log( + `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`, + real_time_factor.toFixed(3)) +console.log(waveFilename) +console.log('result\n', result)