Add streaming CTC ASR APIs for node-addon-api (#867)
This commit is contained in:
20
.github/scripts/test-nodejs-addon-npm.sh
vendored
20
.github/scripts/test-nodejs-addon-npm.sh
vendored
@@ -5,15 +5,6 @@ set -ex
|
|||||||
d=nodejs-addon-examples
|
d=nodejs-addon-examples
|
||||||
echo "dir: $d"
|
echo "dir: $d"
|
||||||
cd $d
|
cd $d
|
||||||
npm install --verbose
|
|
||||||
git status
|
|
||||||
ls -lh
|
|
||||||
ls -lh node_modules
|
|
||||||
|
|
||||||
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
|
|
||||||
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
|
|
||||||
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
|
|
||||||
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
|
|
||||||
|
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
||||||
@@ -22,3 +13,14 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
|||||||
node test_asr_streaming_transducer.js
|
node test_asr_streaming_transducer.js
|
||||||
|
|
||||||
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
|
||||||
|
node ./test_asr_streaming_ctc.js
|
||||||
|
|
||||||
|
# To decode with HLG.fst
|
||||||
|
node ./test_asr_streaming_ctc_hlg.js
|
||||||
|
|
||||||
|
rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
|
||||||
|
|||||||
22
.github/workflows/test-nodejs-addon-api.yaml
vendored
22
.github/workflows/test-nodejs-addon-api.yaml
vendored
@@ -152,17 +152,23 @@ jobs:
|
|||||||
|
|
||||||
./node_modules/.bin/cmake-js compile --log-level verbose
|
./node_modules/.bin/cmake-js compile --log-level verbose
|
||||||
|
|
||||||
- name: Test streaming transducer
|
- name: Run tests
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
export PATH=$PWD/build/install/lib:$PATH
|
export PATH=$PWD/build/install/lib:$PATH
|
||||||
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
d=nodejs-addon-examples
|
||||||
|
cd $d
|
||||||
|
files=$(ls *.js)
|
||||||
|
echo $files
|
||||||
|
for f in ${files[@]}; do
|
||||||
|
echo $f
|
||||||
|
sed -i.bak s%sherpa-onnx-node%./sherpa-onnx% ./$f
|
||||||
|
done
|
||||||
|
cd ..
|
||||||
|
|
||||||
cd scripts/node-addon-api
|
cp -v scripts/node-addon-api/build/Release/sherpa-onnx.node $d/
|
||||||
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
cp -v scripts/node-addon-api/lib/*.js $d/
|
||||||
tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
cp -v ./build/install/lib/lib* $d/
|
||||||
rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
|
||||||
|
|
||||||
node test/test_asr_streaming_transducer.js
|
.github/scripts/test-nodejs-addon-npm.sh
|
||||||
|
|
||||||
rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
|
||||||
|
|||||||
15
.github/workflows/test-nodejs-addon-npm.yaml
vendored
15
.github/workflows/test-nodejs-addon-npm.yaml
vendored
@@ -63,4 +63,19 @@ jobs:
|
|||||||
- name: Run tests
|
- name: Run tests
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
d=nodejs-addon-examples
|
||||||
|
echo "dir: $d"
|
||||||
|
cd $d
|
||||||
|
npm install --verbose
|
||||||
|
git status
|
||||||
|
ls -lh
|
||||||
|
ls -lh node_modules
|
||||||
|
|
||||||
|
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
|
||||||
|
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
|
||||||
|
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
|
||||||
|
|
||||||
|
cd ../
|
||||||
|
|
||||||
.github/scripts/test-nodejs-addon-npm.sh
|
.github/scripts/test-nodejs-addon-npm.sh
|
||||||
|
|||||||
@@ -27,6 +27,18 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
|
|||||||
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# Voice Activity detection (VAD)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
||||||
|
|
||||||
|
|
||||||
|
# To run the test with a microphone, you need to install the package naudiodon2
|
||||||
|
npm install naudiodon2
|
||||||
|
|
||||||
|
node ./test_vad_microphone.js
|
||||||
|
```
|
||||||
|
|
||||||
## Streaming speech recognition with zipformer transducer
|
## Streaming speech recognition with zipformer transducer
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
@@ -36,21 +48,27 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
|
|||||||
|
|
||||||
node ./test_asr_streaming_transducer.js
|
node ./test_asr_streaming_transducer.js
|
||||||
|
|
||||||
# To run the test with microphone, you need to install the package naudiodon2
|
# To run the test with a microphone, you need to install the package naudiodon2
|
||||||
npm install naudiodon2
|
npm install naudiodon2
|
||||||
|
|
||||||
node ./test_asr_streaming_transducer_microphone.js
|
node ./test_asr_streaming_transducer_microphone.js
|
||||||
```
|
```
|
||||||
|
|
||||||
# VAD
|
## Streaming speech recognition with zipformer CTC
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
|
||||||
|
|
||||||
|
node ./test_asr_streaming_ctc.js
|
||||||
|
|
||||||
# To run the test with microphone, you need to install the package naudiodon2
|
# To decode with HLG.fst
|
||||||
|
node ./test_asr_streaming_ctc_hlg.js
|
||||||
|
|
||||||
|
# To run the test with a microphone, you need to install the package naudiodon2
|
||||||
npm install naudiodon2
|
npm install naudiodon2
|
||||||
|
|
||||||
node ./test_vad_microphone.js
|
node ./test_asr_streaming_ctc_microphone.js
|
||||||
|
node ./test_asr_streaming_ctc_hlg_microphone.js
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
55
nodejs-addon-examples/test_asr_streaming_ctc.js
Normal file
55
nodejs-addon-examples/test_asr_streaming_ctc.js
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
const sherpa_onnx = require('sherpa-onnx-node');
|
||||||
|
const performance = require('perf_hooks').performance;
|
||||||
|
|
||||||
|
|
||||||
|
// Please download test files from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
const config = {
|
||||||
|
'featConfig': {
|
||||||
|
'sampleRate': 16000,
|
||||||
|
'featureDim': 80,
|
||||||
|
},
|
||||||
|
'modelConfig': {
|
||||||
|
'zipformer2Ctc': {
|
||||||
|
'model':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
|
||||||
|
},
|
||||||
|
'tokens':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
|
||||||
|
'numThreads': 2,
|
||||||
|
'provider': 'cpu',
|
||||||
|
'debug': 1,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const waveFilename =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OnlineRecognizer(config);
|
||||||
|
console.log('Started')
|
||||||
|
let start = performance.now();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
|
||||||
|
|
||||||
|
const tailPadding = new Float32Array(wave.sampleRate * 0.4);
|
||||||
|
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
result = recognizer.getResult(stream)
|
||||||
|
let stop = performance.now();
|
||||||
|
console.log('Done')
|
||||||
|
|
||||||
|
const elapsed_seconds = (stop - start) / 1000;
|
||||||
|
const duration = wave.samples.length / wave.sampleRate;
|
||||||
|
const real_time_factor = elapsed_seconds / duration;
|
||||||
|
console.log('Wave duration', duration.toFixed(3), 'secodns')
|
||||||
|
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
|
||||||
|
console.log(
|
||||||
|
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||||
|
real_time_factor.toFixed(3))
|
||||||
|
console.log(waveFilename)
|
||||||
|
console.log('result\n', result)
|
||||||
58
nodejs-addon-examples/test_asr_streaming_ctc_hlg.js
Normal file
58
nodejs-addon-examples/test_asr_streaming_ctc_hlg.js
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
// Copyright (c) 2024 Xiaomi Corporation
|
||||||
|
const sherpa_onnx = require('sherpa-onnx-node');
|
||||||
|
const performance = require('perf_hooks').performance;
|
||||||
|
|
||||||
|
|
||||||
|
// Please download test files from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||||
|
const config = {
|
||||||
|
'featConfig': {
|
||||||
|
'sampleRate': 16000,
|
||||||
|
'featureDim': 80,
|
||||||
|
},
|
||||||
|
'modelConfig': {
|
||||||
|
'zipformer2Ctc': {
|
||||||
|
'model':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
|
||||||
|
},
|
||||||
|
'tokens':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
|
||||||
|
'numThreads': 2,
|
||||||
|
'provider': 'cpu',
|
||||||
|
'debug': 1,
|
||||||
|
},
|
||||||
|
'ctcFstDecoderConfig': {
|
||||||
|
'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const waveFilename =
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav';
|
||||||
|
|
||||||
|
const recognizer = new sherpa_onnx.OnlineRecognizer(config);
|
||||||
|
console.log('Started')
|
||||||
|
let start = performance.now();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
const wave = sherpa_onnx.readWave(waveFilename);
|
||||||
|
stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
|
||||||
|
|
||||||
|
const tailPadding = new Float32Array(wave.sampleRate * 0.4);
|
||||||
|
stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
result = recognizer.getResult(stream)
|
||||||
|
let stop = performance.now();
|
||||||
|
console.log('Done')
|
||||||
|
|
||||||
|
const elapsed_seconds = (stop - start) / 1000;
|
||||||
|
const duration = wave.samples.length / wave.sampleRate;
|
||||||
|
const real_time_factor = elapsed_seconds / duration;
|
||||||
|
console.log('Wave duration', duration.toFixed(3), 'secodns')
|
||||||
|
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
|
||||||
|
console.log(
|
||||||
|
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||||
|
real_time_factor.toFixed(3))
|
||||||
|
console.log(waveFilename)
|
||||||
|
console.log('result\n', result)
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const portAudio = require('naudiodon2');
|
||||||
|
// console.log(portAudio.getDevices());
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx-node');
|
||||||
|
|
||||||
|
function createOnlineRecognizer() {
|
||||||
|
const config = {
|
||||||
|
'featConfig': {
|
||||||
|
'sampleRate': 16000,
|
||||||
|
'featureDim': 80,
|
||||||
|
},
|
||||||
|
'modelConfig': {
|
||||||
|
'zipformer2Ctc': {
|
||||||
|
'model':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
|
||||||
|
},
|
||||||
|
'tokens':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
|
||||||
|
'numThreads': 2,
|
||||||
|
'provider': 'cpu',
|
||||||
|
'debug': 1,
|
||||||
|
},
|
||||||
|
'ctcFstDecoderConfig': {
|
||||||
|
'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
|
||||||
|
},
|
||||||
|
'enableEndpoint': true,
|
||||||
|
'rule1MinTrailingSilence': 2.4,
|
||||||
|
'rule2MinTrailingSilence': 1.2,
|
||||||
|
'rule3MinUtteranceLength': 20
|
||||||
|
};
|
||||||
|
|
||||||
|
return new sherpa_onnx.OnlineRecognizer(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
const recognizer = createOnlineRecognizer();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
|
let lastText = '';
|
||||||
|
let segmentIndex = 0;
|
||||||
|
|
||||||
|
const ai = new portAudio.AudioIO({
|
||||||
|
inOptions: {
|
||||||
|
channelCount: 1,
|
||||||
|
closeOnError: true, // Close the stream if an audio error is detected, if
|
||||||
|
// set false then just log the error
|
||||||
|
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||||
|
sampleFormat: portAudio.SampleFormatFloat32,
|
||||||
|
sampleRate: recognizer.config.featConfig.sampleRate
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const display = new sherpa_onnx.Display(50);
|
||||||
|
|
||||||
|
ai.on('data', data => {
|
||||||
|
const samples = new Float32Array(data.buffer);
|
||||||
|
|
||||||
|
stream.acceptWaveform(
|
||||||
|
{sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
const isEndpoint = recognizer.isEndpoint(stream);
|
||||||
|
const text = recognizer.getResult(stream).text.toLowerCase();
|
||||||
|
|
||||||
|
if (text.length > 0 && lastText != text) {
|
||||||
|
lastText = text;
|
||||||
|
display.print(segmentIndex, lastText);
|
||||||
|
}
|
||||||
|
if (isEndpoint) {
|
||||||
|
if (text.length > 0) {
|
||||||
|
lastText = text;
|
||||||
|
segmentIndex += 1;
|
||||||
|
}
|
||||||
|
recognizer.reset(stream)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('close', () => {
|
||||||
|
console.log('Free resources');
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.start();
|
||||||
|
console.log('Started! Please speak')
|
||||||
88
nodejs-addon-examples/test_asr_streaming_ctc_microphone.js
Normal file
88
nodejs-addon-examples/test_asr_streaming_ctc_microphone.js
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
|
||||||
|
//
|
||||||
|
const portAudio = require('naudiodon2');
|
||||||
|
// console.log(portAudio.getDevices());
|
||||||
|
|
||||||
|
const sherpa_onnx = require('sherpa-onnx-node');
|
||||||
|
|
||||||
|
function createOnlineRecognizer() {
|
||||||
|
const config = {
|
||||||
|
'featConfig': {
|
||||||
|
'sampleRate': 16000,
|
||||||
|
'featureDim': 80,
|
||||||
|
},
|
||||||
|
'modelConfig': {
|
||||||
|
'zipformer2Ctc': {
|
||||||
|
'model':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
|
||||||
|
},
|
||||||
|
'tokens':
|
||||||
|
'./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
|
||||||
|
'numThreads': 2,
|
||||||
|
'provider': 'cpu',
|
||||||
|
'debug': 1,
|
||||||
|
},
|
||||||
|
'decodingMethod': 'greedy_search',
|
||||||
|
'maxActivePaths': 4,
|
||||||
|
'enableEndpoint': true,
|
||||||
|
'rule1MinTrailingSilence': 2.4,
|
||||||
|
'rule2MinTrailingSilence': 1.2,
|
||||||
|
'rule3MinUtteranceLength': 20
|
||||||
|
};
|
||||||
|
|
||||||
|
return new sherpa_onnx.OnlineRecognizer(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
const recognizer = createOnlineRecognizer();
|
||||||
|
const stream = recognizer.createStream();
|
||||||
|
|
||||||
|
let lastText = '';
|
||||||
|
let segmentIndex = 0;
|
||||||
|
|
||||||
|
const ai = new portAudio.AudioIO({
|
||||||
|
inOptions: {
|
||||||
|
channelCount: 1,
|
||||||
|
closeOnError: true, // Close the stream if an audio error is detected, if
|
||||||
|
// set false then just log the error
|
||||||
|
deviceId: -1, // Use -1 or omit the deviceId to select the default device
|
||||||
|
sampleFormat: portAudio.SampleFormatFloat32,
|
||||||
|
sampleRate: recognizer.config.featConfig.sampleRate
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
const display = new sherpa_onnx.Display(50);
|
||||||
|
|
||||||
|
ai.on('data', data => {
|
||||||
|
const samples = new Float32Array(data.buffer);
|
||||||
|
|
||||||
|
stream.acceptWaveform(
|
||||||
|
{sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
|
||||||
|
|
||||||
|
while (recognizer.isReady(stream)) {
|
||||||
|
recognizer.decode(stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
const isEndpoint = recognizer.isEndpoint(stream);
|
||||||
|
const text = recognizer.getResult(stream).text.toLowerCase();
|
||||||
|
|
||||||
|
if (text.length > 0 && lastText != text) {
|
||||||
|
lastText = text;
|
||||||
|
display.print(segmentIndex, lastText);
|
||||||
|
}
|
||||||
|
if (isEndpoint) {
|
||||||
|
if (text.length > 0) {
|
||||||
|
lastText = text;
|
||||||
|
segmentIndex += 1;
|
||||||
|
}
|
||||||
|
recognizer.reset(stream)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.on('close', () => {
|
||||||
|
console.log('Free resources');
|
||||||
|
stream.free();
|
||||||
|
recognizer.free();
|
||||||
|
});
|
||||||
|
|
||||||
|
ai.start();
|
||||||
|
console.log('Started! Please speak')
|
||||||
@@ -24,7 +24,6 @@ const config = {
|
|||||||
'numThreads': 2,
|
'numThreads': 2,
|
||||||
'provider': 'cpu',
|
'provider': 'cpu',
|
||||||
'debug': 1,
|
'debug': 1,
|
||||||
'modelType': 'zipformer',
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -53,5 +52,8 @@ const duration = wave.samples.length / wave.sampleRate;
|
|||||||
const real_time_factor = elapsed_seconds / duration;
|
const real_time_factor = elapsed_seconds / duration;
|
||||||
console.log('Wave duration', duration.toFixed(3), 'secodns')
|
console.log('Wave duration', duration.toFixed(3), 'secodns')
|
||||||
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
|
console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
|
||||||
console.log('RTF', real_time_factor.toFixed(3))
|
console.log(
|
||||||
console.log('result', result.text)
|
`RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
|
||||||
|
real_time_factor.toFixed(3))
|
||||||
|
console.log(waveFilename)
|
||||||
|
console.log('result\n', result)
|
||||||
|
|||||||
@@ -25,7 +25,6 @@ function createOnlineRecognizer() {
|
|||||||
'numThreads': 2,
|
'numThreads': 2,
|
||||||
'provider': 'cpu',
|
'provider': 'cpu',
|
||||||
'debug': 1,
|
'debug': 1,
|
||||||
'modelType': 'zipformer',
|
|
||||||
},
|
},
|
||||||
'decodingMethod': 'greedy_search',
|
'decodingMethod': 'greedy_search',
|
||||||
'maxActivePaths': 4,
|
'maxActivePaths': 4,
|
||||||
@@ -68,7 +67,7 @@ ai.on('data', data => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const isEndpoint = recognizer.isEndpoint(stream);
|
const isEndpoint = recognizer.isEndpoint(stream);
|
||||||
const text = recognizer.getResult(stream).text;
|
const text = recognizer.getResult(stream).text.toLowerCase();
|
||||||
|
|
||||||
if (text.length > 0 && lastText != text) {
|
if (text.length > 0 && lastText != text) {
|
||||||
lastText = text;
|
lastText = text;
|
||||||
|
|||||||
@@ -158,7 +158,7 @@ def get_piper_models() -> List[TtsModel]:
|
|||||||
TtsModel(model_dir="vits-piper-fa_IR-gyro-medium"),
|
TtsModel(model_dir="vits-piper-fa_IR-gyro-medium"),
|
||||||
TtsModel(model_dir="vits-piper-fi_FI-harri-low"),
|
TtsModel(model_dir="vits-piper-fi_FI-harri-low"),
|
||||||
TtsModel(model_dir="vits-piper-fi_FI-harri-medium"),
|
TtsModel(model_dir="vits-piper-fi_FI-harri-medium"),
|
||||||
TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
|
# TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
|
||||||
TtsModel(model_dir="vits-piper-fr_FR-siwis-low"),
|
TtsModel(model_dir="vits-piper-fr_FR-siwis-low"),
|
||||||
TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"),
|
TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"),
|
||||||
TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"),
|
TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"),
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ const possible_paths = [
|
|||||||
'../build/Debug/sherpa-onnx.node',
|
'../build/Debug/sherpa-onnx.node',
|
||||||
`./node_modules/sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
|
`./node_modules/sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
|
||||||
`../sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
|
`../sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
|
||||||
|
'./sherpa-onnx.node',
|
||||||
];
|
];
|
||||||
|
|
||||||
let found = false;
|
let found = false;
|
||||||
|
|||||||
15
scripts/node-addon-api/run.sh
Executable file
15
scripts/node-addon-api/run.sh
Executable file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
set -ex
|
||||||
|
|
||||||
|
if [[ ! -f ../../build/install/lib/libsherpa-onnx-core.dylib && ! -f ../../build/install/lib/libsherpa-onnx-core.so ]]; then
|
||||||
|
pushd ../../
|
||||||
|
mkdir -p build
|
||||||
|
cd build
|
||||||
|
cmake -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON ..
|
||||||
|
make install
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
export SHERPA_ONNX_INSTALL_DIR=$PWD/../../build/install
|
||||||
|
|
||||||
|
./node_modules/.bin/cmake-js compile
|
||||||
@@ -89,6 +89,30 @@ static SherpaOnnxOnlineTransducerModelConfig GetOnlineTransducerModelConfig(
|
|||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SherpaOnnxOnlineZipformer2CtcModelConfig
|
||||||
|
GetOnlineZipformer2CtcModelConfig(Napi::Object obj) {
|
||||||
|
SherpaOnnxOnlineZipformer2CtcModelConfig config;
|
||||||
|
memset(&config, 0, sizeof(config));
|
||||||
|
|
||||||
|
if (!obj.Has("zipformer2Ctc") || !obj.Get("zipformer2Ctc").IsObject()) {
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
Napi::Object o = obj.Get("zipformer2Ctc").As<Napi::Object>();
|
||||||
|
|
||||||
|
if (o.Has("model") && o.Get("model").IsString()) {
|
||||||
|
Napi::String model = o.Get("model").As<Napi::String>();
|
||||||
|
std::string s = model.Utf8Value();
|
||||||
|
char *p = new char[s.size() + 1];
|
||||||
|
std::copy(s.begin(), s.end(), p);
|
||||||
|
p[s.size()] = 0;
|
||||||
|
|
||||||
|
config.model = p;
|
||||||
|
}
|
||||||
|
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
|
static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
|
||||||
SherpaOnnxOnlineModelConfig config;
|
SherpaOnnxOnlineModelConfig config;
|
||||||
memset(&config, 0, sizeof(config));
|
memset(&config, 0, sizeof(config));
|
||||||
@@ -100,6 +124,7 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
|
|||||||
Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();
|
Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();
|
||||||
|
|
||||||
config.transducer = GetOnlineTransducerModelConfig(o);
|
config.transducer = GetOnlineTransducerModelConfig(o);
|
||||||
|
config.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o);
|
||||||
|
|
||||||
if (o.Has("tokens") && o.Get("tokens").IsString()) {
|
if (o.Has("tokens") && o.Get("tokens").IsString()) {
|
||||||
Napi::String tokens = o.Get("tokens").As<Napi::String>();
|
Napi::String tokens = o.Get("tokens").As<Napi::String>();
|
||||||
@@ -147,6 +172,35 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
|
|||||||
return config;
|
return config;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static SherpaOnnxOnlineCtcFstDecoderConfig GetCtcFstDecoderConfig(
|
||||||
|
Napi::Object obj) {
|
||||||
|
SherpaOnnxOnlineCtcFstDecoderConfig config;
|
||||||
|
memset(&config, 0, sizeof(config));
|
||||||
|
|
||||||
|
if (!obj.Has("ctcFstDecoderConfig") ||
|
||||||
|
!obj.Get("ctcFstDecoderConfig").IsObject()) {
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
|
Napi::Object o = obj.Get("ctcFstDecoderConfig").As<Napi::Object>();
|
||||||
|
|
||||||
|
if (o.Has("graph") && o.Get("graph").IsString()) {
|
||||||
|
Napi::String graph = o.Get("graph").As<Napi::String>();
|
||||||
|
std::string s = graph.Utf8Value();
|
||||||
|
char *p = new char[s.size() + 1];
|
||||||
|
std::copy(s.begin(), s.end(), p);
|
||||||
|
p[s.size()] = 0;
|
||||||
|
|
||||||
|
config.graph = p;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (o.Has("maxActive") && o.Get("maxActive").IsNumber()) {
|
||||||
|
config.max_active = o.Get("maxActive").As<Napi::Number>().Int32Value();
|
||||||
|
}
|
||||||
|
|
||||||
|
return config;
|
||||||
|
}
|
||||||
|
|
||||||
static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
|
static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
|
||||||
const Napi::CallbackInfo &info) {
|
const Napi::CallbackInfo &info) {
|
||||||
Napi::Env env = info.Env();
|
Napi::Env env = info.Env();
|
||||||
@@ -234,6 +288,8 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
|
|||||||
config.Get("hotwordsScore").As<Napi::Number>().FloatValue();
|
config.Get("hotwordsScore").As<Napi::Number>().FloatValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(config);
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
printf("encoder: %s\n", c.model_config.transducer.encoder
|
printf("encoder: %s\n", c.model_config.transducer.encoder
|
||||||
? c.model_config.transducer.encoder
|
? c.model_config.transducer.encoder
|
||||||
@@ -277,6 +333,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
|
|||||||
delete[] c.model_config.transducer.joiner;
|
delete[] c.model_config.transducer.joiner;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (c.model_config.zipformer2_ctc.model) {
|
||||||
|
delete[] c.model_config.zipformer2_ctc.model;
|
||||||
|
}
|
||||||
|
|
||||||
if (c.model_config.tokens) {
|
if (c.model_config.tokens) {
|
||||||
delete[] c.model_config.tokens;
|
delete[] c.model_config.tokens;
|
||||||
}
|
}
|
||||||
@@ -297,6 +357,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
|
|||||||
delete[] c.hotwords_file;
|
delete[] c.hotwords_file;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (c.ctc_fst_decoder_config.graph) {
|
||||||
|
delete[] c.ctc_fst_decoder_config.graph;
|
||||||
|
}
|
||||||
|
|
||||||
if (!recognizer) {
|
if (!recognizer) {
|
||||||
Napi::TypeError::New(env, "Please check your config!")
|
Napi::TypeError::New(env, "Please check your config!")
|
||||||
.ThrowAsJavaScriptException();
|
.ThrowAsJavaScriptException();
|
||||||
|
|||||||
@@ -216,6 +216,8 @@ class OnlineRecognizerCtcImpl : public OnlineRecognizerImpl {
|
|||||||
// clear states
|
// clear states
|
||||||
s->SetStates(model_->GetInitStates());
|
s->SetStates(model_->GetInitStates());
|
||||||
|
|
||||||
|
s->GetFasterDecoderProcessedFrames() = 0;
|
||||||
|
|
||||||
// Note: We only update counters. The underlying audio samples
|
// Note: We only update counters. The underlying audio samples
|
||||||
// are not discarded.
|
// are not discarded.
|
||||||
s->Reset();
|
s->Reset();
|
||||||
|
|||||||
Reference in New Issue
Block a user