This repository has been archived on 2025-08-26. You can view files and clone it, but cannot push or open issues or pull requests.
Files
enginex_bi_series-sherpa-onnx/nodejs-examples/test-online-paraformer-microphone-mic.js
wanghsinche 7774e35749 feat: add mic example for better compatibility (#1909)
Co-authored-by: wanghsinche <wanghsinche>
2025-02-21 21:47:21 +08:00

206 lines
5.4 KiB
JavaScript

// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
const mic = require('mic'); // It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.
const sherpa_onnx = require('sherpa-onnx');
function createOnlineRecognizer() {
let onlineParaformerModelConfig = {
encoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
decoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
};
let onlineModelConfig = {
paraformer: onlineParaformerModelConfig,
tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
};
let recognizerConfig = {
modelConfig: onlineModelConfig,
enableEndpoint: 1,
rule1MinTrailingSilence: 2.4,
rule2MinTrailingSilence: 1.2,
rule3MinUtteranceLength: 20,
};
return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
}
/**
* SpeechSession class, work as a session manager with the formatOutput function
* Sample output:
=== Automated Speech Recognition ===
Current Session #1
Time: 8:44:46 PM
------------------------
Recognized Sentences:
[8:44:43 PM] 1. it's so great three result is great great 她还支持中文
[8:44:46 PM] 2. 很厉
------------------------
Recognizing: 真的很厉害太厉害
*/
class SpeechSession {
constructor() {
this.startTime = Date.now();
this.sentences = [];
this.currentText = '';
this.lastUpdateTime = Date.now();
}
addOrUpdateText(text) {
this.currentText = text;
this.lastUpdateTime = Date.now();
}
finalizeSentence() {
if (this.currentText.trim()) {
this.sentences.push({
text: this.currentText.trim(),
timestamp: new Date().toLocaleTimeString()
});
}
this.currentText = '';
}
shouldStartNewSession() {
return Date.now() - this.lastUpdateTime > 10000; // 10 seconds of silence
}
}
function formatOutput() {
clearConsole();
console.log('\n=== Automated Speech Recognition ===');
console.log(`Current Session #${sessionCount}`);
console.log('Time:', new Date().toLocaleTimeString());
console.log('------------------------');
// 显示历史句子
if (currentSession.sentences.length > 0) {
console.log('Recognized Sentences:');
currentSession.sentences.forEach((sentence, index) => {
console.log(`[${sentence.timestamp}] ${index + 1}. ${sentence.text}`);
});
console.log('------------------------');
}
// 显示当前正在识别的内容
if (currentSession.currentText) {
console.log('Recognizing:', currentSession.currentText);
}
}
const recognizer = createOnlineRecognizer();
const stream = recognizer.createStream();
let currentSession = new SpeechSession();
let sessionCount = 1;
function clearConsole() {
process.stdout.write('\x1B[2J\x1B[0f');
}
function exitHandler(options, exitCode) {
if (options.cleanup) {
console.log('\nCleaned up resources...');
micInstance.stop();
stream.free();
recognizer.free();
}
if (exitCode || exitCode === 0) console.log('Exit code:', exitCode);
if (options.exit) process.exit();
}
const micInstance = mic({
rate: recognizer.config.featConfig.sampleRate,
channels: 1,
debug: false, // 关闭调试输出
device: 'default',
bitwidth: 16,
encoding: 'signed-integer',
exitOnSilence: 6,
fileType: 'raw'
});
const micInputStream = micInstance.getAudioStream();
function startMic() {
return new Promise((resolve, reject) => {
micInputStream.once('startComplete', () => {
console.log('Mic phone started.');
resolve();
});
micInputStream.once('error', (err) => {
console.error('Mic phone start error:', err);
reject(err);
});
micInstance.start();
});
}
micInputStream.on('data', buffer => {
const int16Array = new Int16Array(buffer.buffer);
const samples = new Float32Array(int16Array.length);
for (let i = 0; i < int16Array.length; i++) {
samples[i] = int16Array[i] / 32768.0;
}
stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
while (recognizer.isReady(stream)) {
recognizer.decode(stream);
}
const isEndpoint = recognizer.isEndpoint(stream);
const text = recognizer.getResult(stream).text;
if (text.length > 0) {
// 检查是否需要开始新会话
if (currentSession.shouldStartNewSession()) {
currentSession.finalizeSentence();
sessionCount++;
currentSession = new SpeechSession();
}
currentSession.addOrUpdateText(text);
formatOutput();
}
if (isEndpoint) {
if (text.length > 0) {
currentSession.finalizeSentence();
formatOutput();
}
recognizer.reset(stream);
}
});
micInputStream.on('error', err => {
console.error('Audio stream error:', err);
});
micInputStream.on('close', () => {
console.log('Mic phone closed.');
});
process.on('exit', exitHandler.bind(null, {cleanup: true}));
process.on('SIGINT', exitHandler.bind(null, {exit: true}));
process.on('SIGUSR1', exitHandler.bind(null, {exit: true}));
process.on('SIGUSR2', exitHandler.bind(null, {exit: true}));
process.on('uncaughtException', exitHandler.bind(null, {exit: true}));
async function main() {
try {
console.log('Starting ...');
await startMic();
console.log('Initialized, waiting for speech ...');
formatOutput();
} catch (err) {
console.error('Failed to initialize:', err);
process.exit(1);
}
}
main();