Add WebAssembly for NodeJS. (#628)

This commit is contained in:
Fangjun Kuang
2024-03-03 20:00:36 +08:00
committed by GitHub
parent ac6825ff11
commit ed06ced16f
39 changed files with 1450 additions and 1887 deletions

View File

@@ -1,3 +1,4 @@
node_modules
lib
package-lock.json
*.tar.bz2

View File

@@ -2,38 +2,18 @@
This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
Before you continue, please first install the npm package `sherpa-onnx` by
Before you continue, please first run
```bash
npm install sherpa-onnx
cd ./nodejs-examples
npm i
```
In the following, we describe how to use [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx)
for text-to-speech and speech-to-text.
**Caution**: If you get the following error:
```
/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67
if (match = err.match(/^(([^ \t()])+\.so([^ \t:()])*):([ \t])*/)) {
^
TypeError: Cannot read properties of null (reading 'match')
at new DynamicLibrary (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/dynamic_library.js:67:21)
at Object.Library (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/ffi-napi/lib/library.js:47:10)
at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/node_modules/sherpa-onnx3/index.js:268:28)
at Module._compile (node:internal/modules/cjs/loader:1376:14)
at Module._extensions..js (node:internal/modules/cjs/loader:1435:10)
at Module.load (node:internal/modules/cjs/loader:1207:32)
at Module._load (node:internal/modules/cjs/loader:1023:12)
at Module.require (node:internal/modules/cjs/loader:1235:19)
at require (node:internal/modules/helpers:176:18)
at Object.<anonymous> (/Users/fangjun/open-source/sherpa-onnx/nodejs-examples/test-offline-tts-zh.js:3:21)
```
Please downgrade your node to version v13.14.0. See also
https://github.com/node-ffi-napi/node-ffi-napi/issues/244
and
https://github.com/node-ffi-napi/node-ffi-napi/issues/97 .
Note: You need `Node >= 18`.
# Text-to-speech
@@ -71,13 +51,7 @@ node ./test-offline-tts-zh.js
# Speech-to-text
In the following, we demonstrate how to decode files and how to perform
speech recognition with a microphone with `nodejs`. We need to install two additional
npm packages:
```bash
npm install wav naudiodon2
```
speech recognition with a microphone with `nodejs`.
## ./test-offline-nemo-ctc.js
@@ -200,60 +174,3 @@ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherp
tar xvf sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13.tar.bz2
node ./test-online-zipformer2-ctc.js
```
## ./test-vad-microphone-offline-paraformer.js
[./test-vad-microphone-offline-paraformer.js](./test-vad-microphone-offline-paraformer.js)
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
with non-streaming Paraformer for speech recognition from microphone.
You can use the following command to run it:
```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
tar xvf sherpa-onnx-paraformer-zh-2023-03-28.tar.bz2
node ./test-vad-microphone-offline-paraformer.js
```
## ./test-vad-microphone-offline-transducer.js
[./test-vad-microphone-offline-transducer.js](./test-vad-microphone-offline-transducer.js)
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
with a non-streaming transducer model for speech recognition from microphone.
You can use the following command to run it:
```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
tar xvf sherpa-onnx-zipformer-en-2023-06-26.tar.bz2
node ./test-vad-microphone-offline-transducer.js
```
## ./test-vad-microphone-offline-whisper.js
[./test-vad-microphone-offline-whisper.js](./test-vad-microphone-offline-whisper.js)
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad)
with whisper for speech recognition from microphone.
You can use the following command to run it:
```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.en.tar.bz2
tar xvf sherpa-onnx-whisper-tiny.en.tar.bz2
node ./test-vad-microphone-offline-whisper.js
```
## ./test-vad-microphone.js
[./test-vad-microphone.js](./test-vad-microphone.js)
demonstrates how to use [silero-vad](https://github.com/snakers4/silero-vad).
You can use the following command to run it:
```bash
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
node ./test-vad-microphone.js
```

View File

@@ -1,7 +1,7 @@
{
"dependencies": {
"naudiodon2": "^2.4.0",
"sherpa-onnx": "^1.8.12",
"sherpa-onnx": "*",
"wav": "^1.0.2"
}
}

View File

@@ -1,4 +1,4 @@
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
//
const fs = require('fs');
const {Readable} = require('stream');
@@ -6,32 +6,58 @@ const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
function createOfflineRecognizer() {
let featConfig = {
sampleRate: 16000,
featureDim: 80,
};
// test online recognizer
const nemoCtc = new sherpa_onnx.OfflineNemoEncDecCtcModelConfig();
nemoCtc.model = './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx';
const tokens = './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt';
let modelConfig = {
transducer: {
encoder: '',
decoder: '',
joiner: '',
},
paraformer: {
model: '',
},
nemoCtc: {
model: './sherpa-onnx-nemo-ctc-en-conformer-small/model.int8.onnx',
},
whisper: {
encoder: '',
decoder: '',
},
tdnn: {
model: '',
},
tokens: './sherpa-onnx-nemo-ctc-en-conformer-small/tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
modelType: 'nemo_ctc',
};
const modelConfig = new sherpa_onnx.OfflineModelConfig();
modelConfig.nemoCtc = nemoCtc;
modelConfig.tokens = tokens;
modelConfig.modelType = 'nemo_ctc';
let lmConfig = {
model: '',
scale: 1.0,
};
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
let config = {
featConfig: featConfig,
modelConfig: modelConfig,
lmConfig: lmConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
hotwordsFile: '',
hotwordsScore: 1.5,
};
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
return recognizer;
return sherpa_onnx.createOfflineRecognizer(config);
}
recognizer = createRecognizer();
stream = recognizer.createStream();
const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();
const waveFilename =
'./sherpa-onnx-nemo-ctc-en-conformer-small/test_wavs/0.wav';
@@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {highWaterMark: 4096})
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
recognizer.decode(stream);
const r = recognizer.getResult(stream);
console.log(r.text);
const text = recognizer.getResult(stream);
console.log(text);
stream.free();
recognizer.free();

View File

@@ -6,32 +6,59 @@ const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
function createOfflineRecognizer() {
let featConfig = {
sampleRate: 16000,
featureDim: 80,
};
// test online recognizer
const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();
paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.onnx';
const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';
let modelConfig = {
transducer: {
encoder: '',
decoder: '',
joiner: '',
},
paraformer: {
model: './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx',
},
nemoCtc: {
model: '',
},
whisper: {
encoder: '',
decoder: '',
},
tdnn: {
model: '',
},
tokens: './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
modelType: 'paraformer',
};
const modelConfig = new sherpa_onnx.OfflineModelConfig();
modelConfig.paraformer = paraformer;
modelConfig.tokens = tokens;
modelConfig.modelType = 'paraformer';
let lmConfig = {
model: '',
scale: 1.0,
};
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
let config = {
featConfig: featConfig,
modelConfig: modelConfig,
lmConfig: lmConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
hotwordsFile: '',
hotwordsScore: 1.5,
};
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
return recognizer;
return sherpa_onnx.createOfflineRecognizer(config);
}
recognizer = createRecognizer();
stream = recognizer.createStream();
const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();
const waveFilename = './sherpa-onnx-paraformer-zh-2023-03-28/test_wavs/0.wav';
@@ -71,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
recognizer.decode(stream);
const r = recognizer.getResult(stream);
console.log(r.text);
const text = recognizer.getResult(stream);
console.log(text);
stream.free();
recognizer.free();

View File

@@ -6,37 +6,60 @@ const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
function createOfflineRecognizer() {
let featConfig = {
sampleRate: 16000,
featureDim: 80,
};
// test online recognizer
const transducer = new sherpa_onnx.OfflineTransducerModelConfig();
transducer.encoder =
'./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';
transducer.decoder =
'./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';
transducer.joiner =
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';
const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';
let modelConfig = {
transducer: {
encoder:
'./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.int8.onnx',
decoder:
'./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx',
joiner:
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.int8.onnx',
},
paraformer: {
model: '',
},
nemoCtc: {
model: '',
},
whisper: {
encoder: '',
decoder: '',
},
tdnn: {
model: '',
},
tokens: './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
modelType: 'transducer',
};
const modelConfig = new sherpa_onnx.OfflineModelConfig();
modelConfig.transducer = transducer;
modelConfig.tokens = tokens;
modelConfig.modelType = 'transducer';
let lmConfig = {
model: '',
scale: 1.0,
};
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
let config = {
featConfig: featConfig,
modelConfig: modelConfig,
lmConfig: lmConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
hotwordsFile: '',
hotwordsScore: 1.5,
};
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
return recognizer;
return sherpa_onnx.createOfflineRecognizer(config);
}
recognizer = createRecognizer();
stream = recognizer.createStream();
const recognizer = createOfflineRecognizer();
const stream = recognizer.createStream();
const waveFilename = './sherpa-onnx-zipformer-en-2023-06-26/test_wavs/0.wav';
@@ -76,8 +99,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
recognizer.decode(stream);
const r = recognizer.getResult(stream);
console.log(r.text);
const text = recognizer.getResult(stream);
console.log(text);
stream.free();
recognizer.free();

View File

@@ -1,28 +1,45 @@
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang)
const sherpa_onnx = require('sherpa-onnx');
function createOfflineTts() {
const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();
vits.model = 'vits-piper-en_US-amy-low/en_US-amy-low.onnx'
vits.tokens = './vits-piper-en_US-amy-low/tokens.txt';
vits.dataDir = './vits-piper-en_US-amy-low/espeak-ng-data'
let offlineTtsVitsModelConfig = {
model: './vits-piper-en_US-amy-low/en_US-amy-low.onnx',
lexicon: '',
tokens: './vits-piper-en_US-amy-low/tokens.txt',
dataDir: './vits-piper-en_US-amy-low/espeak-ng-data',
noiseScale: 0.667,
noiseScaleW: 0.8,
lengthScale: 1.0,
};
let offlineTtsModelConfig = {
offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
numThreads: 1,
debug: 1,
provider: 'cpu',
};
const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();
modelConfig.vits = vits;
let offlineTtsConfig = {
offlineTtsModelConfig: offlineTtsModelConfig,
ruleFsts: '',
maxNumSentences: 1,
};
const config = new sherpa_onnx.OfflineTtsConfig();
config.model = modelConfig;
return new sherpa_onnx.OfflineTts(config);
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}
const tts = createOfflineTts();
const speakerId = 0;
const speed = 1.0;
const audio = tts.generate(
'“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”',
speakerId, speed);
audio.save('./test-en.wav');
const audio = tts.generate({
text:
'“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”',
sid: speakerId,
speed: speed
});
tts.save('./test-en.wav', audio);
console.log('Saved to test-en.wav successfully.');
tts.free();

View File

@@ -3,25 +3,37 @@
const sherpa_onnx = require('sherpa-onnx');
function createOfflineTts() {
const vits = new sherpa_onnx.OfflineTtsVitsModelConfig();
vits.model = './vits-zh-aishell3/vits-aishell3.onnx';
vits.lexicon = './vits-zh-aishell3/lexicon.txt';
vits.tokens = './vits-zh-aishell3/tokens.txt';
let offlineTtsVitsModelConfig = {
model: './vits-zh-aishell3/vits-aishell3.onnx',
lexicon: './vits-zh-aishell3/lexicon.txt',
tokens: './vits-zh-aishell3/tokens.txt',
dataDir: '',
noiseScale: 0.667,
noiseScaleW: 0.8,
lengthScale: 1.0,
};
let offlineTtsModelConfig = {
offlineTtsVitsModelConfig: offlineTtsVitsModelConfig,
numThreads: 1,
debug: 1,
provider: 'cpu',
};
const modelConfig = new sherpa_onnx.OfflineTtsModelConfig();
modelConfig.vits = vits;
let offlineTtsConfig = {
offlineTtsModelConfig: offlineTtsModelConfig,
ruleFsts: './vits-zh-aishell3/rule.fst',
maxNumSentences: 1,
};
const config = new sherpa_onnx.OfflineTtsConfig();
config.model = modelConfig;
config.ruleFsts = './vits-zh-aishell3/rule.fst';
return new sherpa_onnx.OfflineTts(config);
return sherpa_onnx.createOfflineTts(offlineTtsConfig);
}
const tts = createOfflineTts();
const speakerId = 66;
const speed = 1.0;
const audio = tts.generate('3年前中国总人口是1411778724人', speakerId, speed);
audio.save('./test-zh.wav');
const audio = tts.generate(
{text: '3年前中国总人口是1411778724人', sid: speakerId, speed: speed});
tts.save('./test-zh.wav', audio);
console.log('Saved to test-zh.wav successfully.');
tts.free();

View File

@@ -6,32 +6,58 @@ const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
function createOfflineRecognizer() {
let featConfig = {
sampleRate: 16000,
featureDim: 80,
};
// test online recognizer
const whisper = new sherpa_onnx.OfflineWhisperModelConfig();
whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
let modelConfig = {
transducer: {
encoder: '',
decoder: '',
joiner: '',
},
paraformer: {
model: '',
},
nemoCtc: {
model: '',
},
whisper: {
encoder: './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx',
decoder: './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx',
},
tdnn: {
model: '',
},
tokens: './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt',
numThreads: 1,
debug: 0,
provider: 'cpu',
modelType: 'whisper',
};
const modelConfig = new sherpa_onnx.OfflineModelConfig();
modelConfig.whisper = whisper;
modelConfig.tokens = tokens;
modelConfig.modelType = 'whisper';
let lmConfig = {
model: '',
scale: 1.0,
};
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
let config = {
featConfig: featConfig,
modelConfig: modelConfig,
lmConfig: lmConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
hotwordsFile: '',
hotwordsScore: 1.5,
};
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
return recognizer;
return sherpa_onnx.createOfflineRecognizer(config);
}
recognizer = createRecognizer();
recognizer = createOfflineRecognizer();
stream = recognizer.createStream();
const waveFilename = './sherpa-onnx-whisper-tiny.en/test_wavs/0.wav';
@@ -72,8 +98,8 @@ fs.createReadStream(waveFilename, {'highWaterMark': 4096})
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, flattened);
recognizer.decode(stream);
const r = recognizer.getResult(stream);
console.log(r.text);
const text = recognizer.getResult(stream);
console.log(text);
stream.free();
recognizer.free();

View File

@@ -5,37 +5,58 @@ console.log(portAudio.getDevices());
const sherpa_onnx = require('sherpa-onnx');
function createRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
function createOnlineRecognizer() {
let onlineTransducerModelConfig = {
encoder: '',
decoder: '',
joiner: '',
};
const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();
paraformer.encoder =
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx';
paraformer.decoder =
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx';
const tokens =
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
let onlineParaformerModelConfig = {
encoder:
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
decoder:
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
};
const modelConfig = new sherpa_onnx.OnlineModelConfig();
modelConfig.paraformer = paraformer;
modelConfig.tokens = tokens;
modelConfig.modelType = 'paraformer';
let onlineZipformer2CtcModelConfig = {
model: '',
};
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
recognizerConfig.enableEndpoint = 1;
let onlineModelConfig = {
transducer: onlineTransducerModelConfig,
paraformer: onlineParaformerModelConfig,
zipformer2Ctc: onlineZipformer2CtcModelConfig,
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
numThreads: 1,
provider: 'cpu',
debug: 1,
modelType: 'paraformer',
};
const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
return recognizer;
let featureConfig = {
sampleRate: 16000,
featureDim: 80,
};
let recognizerConfig = {
featConfig: featureConfig,
modelConfig: onlineModelConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
enableEndpoint: 1,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 1.2,
rule3MinUtteranceLength: 20,
hotwordsFile: '',
hotwordsScore: 1.5,
};
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}
recognizer = createRecognizer();
stream = recognizer.createStream();
display = new sherpa_onnx.Display(50);
const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();
let lastText = '';
let segmentIndex = 0;
@@ -61,11 +82,11 @@ ai.on('data', data => {
}
const isEndpoint = recognizer.isEndpoint(stream);
const text = recognizer.getResult(stream).text;
const text = recognizer.getResult(stream);
if (text.length > 0 && lastText != text) {
lastText = text;
display.print(segmentIndex, lastText);
console.log(segmentIndex, lastText);
}
if (isEndpoint) {
if (text.length > 0) {

View File

@@ -6,34 +6,58 @@ const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
function createOnlineRecognizer() {
let onlineTransducerModelConfig = {
encoder: '',
decoder: '',
joiner: '',
};
const paraformer = new sherpa_onnx.OnlineParaformerModelConfig();
paraformer.encoder =
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.onnx';
paraformer.decoder =
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.onnx';
const tokens =
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
let onlineParaformerModelConfig = {
encoder:
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
decoder:
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
};
const modelConfig = new sherpa_onnx.OnlineModelConfig();
modelConfig.paraformer = paraformer;
modelConfig.tokens = tokens;
modelConfig.modelType = 'paraformer';
let onlineZipformer2CtcModelConfig = {
model: '',
};
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
let onlineModelConfig = {
transducer: onlineTransducerModelConfig,
paraformer: onlineParaformerModelConfig,
zipformer2Ctc: onlineZipformer2CtcModelConfig,
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
numThreads: 1,
provider: 'cpu',
debug: 1,
modelType: 'paraformer',
};
const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
return recognizer;
let featureConfig = {
sampleRate: 16000,
featureDim: 80,
};
let recognizerConfig = {
featConfig: featureConfig,
modelConfig: onlineModelConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
enableEndpoint: 1,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 1.2,
rule3MinUtteranceLength: 20,
hotwordsFile: '',
hotwordsScore: 1.5,
};
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}
recognizer = createRecognizer();
stream = recognizer.createStream();
const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();
const waveFilename =
'./sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/0.wav';
@@ -47,8 +71,8 @@ function decode(samples) {
while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}
const r = recognizer.getResult(stream);
console.log(r.text);
const text = recognizer.getResult(stream);
console.log(text);
}
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {

View File

@@ -5,39 +5,60 @@ const portAudio = require('naudiodon2');
const sherpa_onnx = require('sherpa-onnx');
function createRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
function createOnlineRecognizer() {
let onlineTransducerModelConfig = {
encoder:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
decoder:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
joiner:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
};
// test online recognizer
const transducer = new sherpa_onnx.OnlineTransducerModelConfig();
transducer.encoder =
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
transducer.decoder =
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
transducer.joiner =
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';
const tokens =
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
let onlineParaformerModelConfig = {
encoder: '',
decoder: '',
};
const modelConfig = new sherpa_onnx.OnlineModelConfig();
modelConfig.transducer = transducer;
modelConfig.tokens = tokens;
modelConfig.modelType = 'zipformer';
let onlineZipformer2CtcModelConfig = {
model: '',
};
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
recognizerConfig.enableEndpoint = 1;
let onlineModelConfig = {
transducer: onlineTransducerModelConfig,
paraformer: onlineParaformerModelConfig,
zipformer2Ctc: onlineZipformer2CtcModelConfig,
tokens:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
numThreads: 1,
provider: 'cpu',
debug: 1,
modelType: 'zipformer',
};
const recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
return recognizer;
let featureConfig = {
sampleRate: 16000,
featureDim: 80,
};
let recognizerConfig = {
featConfig: featureConfig,
modelConfig: onlineModelConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
enableEndpoint: 1,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 1.2,
rule3MinUtteranceLength: 20,
hotwordsFile: '',
hotwordsScore: 1.5,
};
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}
recognizer = createRecognizer();
stream = recognizer.createStream();
display = new sherpa_onnx.Display(50);
const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();
let lastText = '';
let segmentIndex = 0;
@@ -63,11 +84,11 @@ ai.on('data', data => {
}
const isEndpoint = recognizer.isEndpoint(stream);
const text = recognizer.getResult(stream).text;
const text = recognizer.getResult(stream);
if (text.length > 0 && lastText != text) {
lastText = text;
display.print(segmentIndex, lastText);
console.log(segmentIndex, lastText);
}
if (isEndpoint) {
if (text.length > 0) {

View File

@@ -6,37 +6,60 @@ const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
function createOnlineRecognizer() {
let onlineTransducerModelConfig = {
encoder:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx',
decoder:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx',
joiner:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.int8.onnx',
};
// test online recognizer
const transducer = new sherpa_onnx.OnlineTransducerModelConfig();
transducer.encoder =
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.int8.onnx';
transducer.decoder =
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx';
transducer.joiner =
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx';
const tokens =
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt';
let onlineParaformerModelConfig = {
encoder: '',
decoder: '',
};
const modelConfig = new sherpa_onnx.OnlineModelConfig();
modelConfig.transducer = transducer;
modelConfig.tokens = tokens;
modelConfig.modelType = 'zipformer';
let onlineZipformer2CtcModelConfig = {
model: '',
};
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
let onlineModelConfig = {
transducer: onlineTransducerModelConfig,
paraformer: onlineParaformerModelConfig,
zipformer2Ctc: onlineZipformer2CtcModelConfig,
tokens:
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt',
numThreads: 1,
provider: 'cpu',
debug: 1,
modelType: 'zipformer',
};
recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
return recognizer;
let featureConfig = {
sampleRate: 16000,
featureDim: 80,
};
let recognizerConfig = {
featConfig: featureConfig,
modelConfig: onlineModelConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
enableEndpoint: 1,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 1.2,
rule3MinUtteranceLength: 20,
hotwordsFile: '',
hotwordsScore: 1.5,
};
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}
recognizer = createRecognizer();
stream = recognizer.createStream();
const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();
const waveFilename =
'./sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav';
@@ -50,8 +73,8 @@ function decode(samples) {
while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}
const r = recognizer.getResult(stream);
console.log(r.text);
const text = recognizer.getResult(stream);
console.log(text);
}
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {

View File

@@ -6,32 +6,58 @@ const wav = require('wav');
const sherpa_onnx = require('sherpa-onnx');
function createRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
function createOnlineRecognizer() {
let onlineTransducerModelConfig = {
encoder: '',
decoder: '',
joiner: '',
};
// test online recognizer
const zipformer2Ctc = new sherpa_onnx.OnlineZipformer2CtcModelConfig();
zipformer2Ctc.model =
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx';
const tokens =
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt';
let onlineParaformerModelConfig = {
encoder: '',
decoder: '',
};
const modelConfig = new sherpa_onnx.OnlineModelConfig();
modelConfig.zipformer2Ctc = zipformer2Ctc;
modelConfig.tokens = tokens;
let onlineZipformer2CtcModelConfig = {
model:
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/ctc-epoch-20-avg-1-chunk-16-left-128.onnx',
};
const recognizerConfig = new sherpa_onnx.OnlineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
let onlineModelConfig = {
transducer: onlineTransducerModelConfig,
paraformer: onlineParaformerModelConfig,
zipformer2Ctc: onlineZipformer2CtcModelConfig,
tokens:
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/tokens.txt',
numThreads: 1,
provider: 'cpu',
debug: 1,
modelType: '',
};
recognizer = new sherpa_onnx.OnlineRecognizer(recognizerConfig);
return recognizer;
let featureConfig = {
sampleRate: 16000,
featureDim: 80,
};
let recognizerConfig = {
featConfig: featureConfig,
modelConfig: onlineModelConfig,
decodingMethod: 'greedy_search',
maxActivePaths: 4,
enableEndpoint: 1,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 1.2,
rule3MinUtteranceLength: 20,
hotwordsFile: '',
hotwordsScore: 1.5,
};
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}
recognizer = createRecognizer();
stream = recognizer.createStream();
const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();
const waveFilename =
'./sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13/test_wavs/DEV_T0000000000.wav';
@@ -45,8 +71,8 @@ function decode(samples) {
while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}
const r = recognizer.getResult(stream);
console.log(r.text);
const text = recognizer.getResult(stream);
console.log(text);
}
reader.on('format', ({audioFormat, bitDepth, channels, sampleRate}) => {

View File

@@ -1,101 +0,0 @@
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
//
const sherpa_onnx = require('sherpa-onnx3');
const portAudio = require('naudiodon2');
console.log(portAudio.getDevices());
function createOfflineRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
// test online recognizer
const paraformer = new sherpa_onnx.OfflineParaformerModelConfig();
paraformer.model = './sherpa-onnx-paraformer-zh-2023-03-28/model.int8.onnx';
const tokens = './sherpa-onnx-paraformer-zh-2023-03-28/tokens.txt';
const modelConfig = new sherpa_onnx.OfflineModelConfig();
modelConfig.paraformer = paraformer;
modelConfig.tokens = tokens;
modelConfig.modelType = 'paraformer';
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
return recognizer
}
function createVad() {
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
sileroVadModelConfig.model = './silero_vad.onnx';
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
sileroVadModelConfig.windowSize = 512;
const vadModelConfig = new sherpa_onnx.VadModelConfig();
vadModelConfig.sileroVad = sileroVadModelConfig;
vadModelConfig.sampleRate = 16000;
const bufferSizeInSeconds = 60;
const vad = new sherpa_onnx.VoiceActivityDetector(
vadModelConfig, bufferSizeInSeconds);
return vad;
}
const recognizer = createOfflineRecognizer();
const vad = createVad();
const bufferSizeInSeconds = 30;
const buffer =
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
var ai = new portAudio.AudioIO({
inOptions: {
channelCount: 1,
sampleFormat: portAudio.SampleFormatFloat32,
sampleRate: vad.config.sampleRate,
deviceId: -1, // Use -1 or omit the deviceId to select the default device
closeOnError: true // Close the stream if an audio error is detected, if
// set false then just log the error
}
});
let printed = false;
let index = 0;
ai.on('data', data => {
const windowSize = vad.config.sileroVad.windowSize;
buffer.push(new Float32Array(data.buffer));
while (buffer.size() > windowSize) {
const samples = buffer.get(buffer.head(), windowSize);
buffer.pop(windowSize);
vad.acceptWaveform(samples)
}
while (!vad.isEmpty()) {
const segment = vad.front();
vad.pop();
const stream = recognizer.createStream();
stream.acceptWaveform(
recognizer.config.featConfig.sampleRate, segment.samples);
recognizer.decode(stream);
const r = recognizer.getResult(stream);
stream.free();
if (r.text.length > 0) {
console.log(`${index}: ${r.text}`);
index += 1;
}
}
});
ai.on('close', () => {
console.log('Free resources');
recognizer.free();
vad.free();
buffer.free();
});
ai.start();
console.log('Started! Please speak')

View File

@@ -1,106 +0,0 @@
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
//
const sherpa_onnx = require('sherpa-onnx');
const portAudio = require('naudiodon2');
console.log(portAudio.getDevices());
function createOfflineRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
// test online recognizer
const transducer = new sherpa_onnx.OfflineTransducerModelConfig();
transducer.encoder =
'./sherpa-onnx-zipformer-en-2023-06-26/encoder-epoch-99-avg-1.onnx';
transducer.decoder =
'./sherpa-onnx-zipformer-en-2023-06-26/decoder-epoch-99-avg-1.onnx';
transducer.joiner =
'./sherpa-onnx-zipformer-en-2023-06-26/joiner-epoch-99-avg-1.onnx';
const tokens = './sherpa-onnx-zipformer-en-2023-06-26/tokens.txt';
const modelConfig = new sherpa_onnx.OfflineModelConfig();
modelConfig.transducer = transducer;
modelConfig.tokens = tokens;
modelConfig.modelType = 'transducer';
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
return recognizer;
}
function createVad() {
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
sileroVadModelConfig.model = './silero_vad.onnx';
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
sileroVadModelConfig.windowSize = 512;
const vadModelConfig = new sherpa_onnx.VadModelConfig();
vadModelConfig.sileroVad = sileroVadModelConfig;
vadModelConfig.sampleRate = 16000;
const bufferSizeInSeconds = 60;
const vad = new sherpa_onnx.VoiceActivityDetector(
vadModelConfig, bufferSizeInSeconds);
return vad;
}
const recognizer = createOfflineRecognizer();
const vad = createVad();
const bufferSizeInSeconds = 30;
const buffer =
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
const ai = new portAudio.AudioIO({
inOptions: {
channelCount: 1,
closeOnError: true, // Close the stream if an audio error is detected, if
// set false then just log the error
deviceId: -1, // Use -1 or omit the deviceId to select the default device
sampleFormat: portAudio.SampleFormatFloat32,
sampleRate: vad.config.sampleRate
}
});
let printed = false;
let index = 0;
ai.on('data', data => {
const windowSize = vad.config.sileroVad.windowSize;
buffer.push(new Float32Array(data.buffer));
while (buffer.size() > windowSize) {
const samples = buffer.get(buffer.head(), windowSize);
buffer.pop(windowSize);
vad.acceptWaveform(samples)
}
while (!vad.isEmpty()) {
const segment = vad.front();
vad.pop();
const stream = recognizer.createStream();
stream.acceptWaveform(
recognizer.config.featConfig.sampleRate, segment.samples);
recognizer.decode(stream);
const r = recognizer.getResult(stream);
stream.free();
if (r.text.length > 0) {
console.log(`${index}: ${r.text}`);
index += 1;
}
}
});
ai.on('close', () => {
console.log('Free resources');
recognizer.free();
vad.free();
buffer.free();
});
ai.start();
console.log('Started! Please speak')

View File

@@ -1,102 +0,0 @@
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
//
const sherpa_onnx = require('sherpa-onnx');
const portAudio = require('naudiodon2');
console.log(portAudio.getDevices());
function createOfflineRecognizer() {
const featConfig = new sherpa_onnx.FeatureConfig();
featConfig.sampleRate = 16000;
featConfig.featureDim = 80;
// test online recognizer
const whisper = new sherpa_onnx.OfflineWhisperModelConfig();
whisper.encoder = './sherpa-onnx-whisper-tiny.en/tiny.en-encoder.int8.onnx';
whisper.decoder = './sherpa-onnx-whisper-tiny.en/tiny.en-decoder.int8.onnx';
const tokens = './sherpa-onnx-whisper-tiny.en/tiny.en-tokens.txt';
const modelConfig = new sherpa_onnx.OfflineModelConfig();
modelConfig.whisper = whisper;
modelConfig.tokens = tokens;
modelConfig.modelType = 'whisper';
const recognizerConfig = new sherpa_onnx.OfflineRecognizerConfig();
recognizerConfig.featConfig = featConfig;
recognizerConfig.modelConfig = modelConfig;
recognizerConfig.decodingMethod = 'greedy_search';
const recognizer = new sherpa_onnx.OfflineRecognizer(recognizerConfig);
return recognizer;
}
function createVad() {
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
sileroVadModelConfig.model = './silero_vad.onnx';
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
sileroVadModelConfig.windowSize = 512;
const vadModelConfig = new sherpa_onnx.VadModelConfig();
vadModelConfig.sileroVad = sileroVadModelConfig;
vadModelConfig.sampleRate = 16000;
const bufferSizeInSeconds = 60;
const vad = new sherpa_onnx.VoiceActivityDetector(
vadModelConfig, bufferSizeInSeconds);
return vad;
}
const recognizer = createOfflineRecognizer();
const vad = createVad();
const bufferSizeInSeconds = 30;
const buffer =
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
const ai = new portAudio.AudioIO({
inOptions: {
channelCount: 1,
closeOnError: true, // Close the stream if an audio error is detected, if
// set false then just log the error
deviceId: -1, // Use -1 or omit the deviceId to select the default device
sampleFormat: portAudio.SampleFormatFloat32,
sampleRate: vad.config.sampleRate
}
});
let printed = false;
let index = 0;
ai.on('data', data => {
const windowSize = vad.config.sileroVad.windowSize;
buffer.push(new Float32Array(data.buffer));
while (buffer.size() > windowSize) {
const samples = buffer.get(buffer.head(), windowSize);
buffer.pop(windowSize);
vad.acceptWaveform(samples)
}
while (!vad.isEmpty()) {
const segment = vad.front();
vad.pop();
const stream = recognizer.createStream();
stream.acceptWaveform(
recognizer.config.featConfig.sampleRate, segment.samples);
recognizer.decode(stream);
const r = recognizer.getResult(stream);
stream.free();
if (r.text.length > 0) {
console.log(`${index}: ${r.text}`);
index += 1;
}
}
});
ai.on('close', () => {
console.log('Free resources');
recognizer.free();
vad.free();
buffer.free();
});
ai.start();
console.log('Started! Please speak')

View File

@@ -1,74 +0,0 @@
// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
const sherpa_onnx = require('sherpa-onnx');
const portAudio = require('naudiodon2');
console.log(portAudio.getDevices());
function createVad() {
const sileroVadModelConfig = new sherpa_onnx.SileroVadModelConfig();
sileroVadModelConfig.model = './silero_vad.onnx';
sileroVadModelConfig.minSpeechDuration = 0.3; // seconds
sileroVadModelConfig.minSilenceDuration = 0.3; // seconds
sileroVadModelConfig.windowSize = 512;
const vadModelConfig = new sherpa_onnx.VadModelConfig();
vadModelConfig.sileroVad = sileroVadModelConfig;
vadModelConfig.sampleRate = 16000;
const bufferSizeInSeconds = 60;
const vad = new sherpa_onnx.VoiceActivityDetector(
vadModelConfig, bufferSizeInSeconds);
return vad;
}
vad = createVad();
const bufferSizeInSeconds = 30;
const buffer =
new sherpa_onnx.CircularBuffer(bufferSizeInSeconds * vad.config.sampleRate);
const ai = new portAudio.AudioIO({
inOptions: {
channelCount: 1,
closeOnError: true, // Close the stream if an audio error is detected, if
// set false then just log the error
deviceId: -1, // Use -1 or omit the deviceId to select the default device
sampleFormat: portAudio.SampleFormatFloat32,
sampleRate: vad.config.sampleRate
}
});
let printed = false;
let index = 0;
ai.on('data', data => {
const windowSize = vad.config.sileroVad.windowSize;
buffer.push(new Float32Array(data.buffer));
while (buffer.size() > windowSize) {
const samples = buffer.get(buffer.head(), windowSize);
buffer.pop(windowSize);
vad.acceptWaveform(samples)
if (vad.isDetected() && !printed) {
console.log(`${index}: Detected speech`)
printed = true;
}
if (!vad.isDetected()) {
printed = false;
}
while (!vad.isEmpty()) {
const segment = vad.front();
vad.pop();
const duration = segment.samples.length / vad.config.sampleRate;
console.log(`${index} End of speech. Duration: ${duration} seconds`);
index += 1;
}
}
});
ai.on('close', () => {
console.log('Free resources');
vad.free();
buffer.free();
});
ai.start();
console.log('Started! Please speak')