feat: add mic example for better compatibility (#1909)

Co-authored-by: wanghsinche <wanghsinche>
2025-02-21 21:47:21 +08:00
parent 94728bfbee
commit 7774e35749
3 changed files with 229 additions and 1 deletions
--- a/nodejs-examples/README.md
+++ b/nodejs-examples/README.md
@@ -1,6 +1,8 @@
 # Introduction

-Note: You need `Node >= 18`.
+Note: You need `Node >= 18`. 
+
+Note: For Mac M1 and other silicon chip series, do check the example `test-online-paraformer-microphone-mic.js` 

 This directory contains nodejs examples for [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).

@@ -278,6 +280,25 @@ rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
 node ./test-online-paraformer-microphone.js
 ```

+
+## ./test-online-paraformer-microphone-mic.js
+
+[./test-online-paraformer-microphone-mic.js](./test-online-paraformer-microphone-mic.js)
+demonstrates how to do real-time speech recognition from microphone
+with a streaming Paraformer model. In the code we use
+[sherpa-onnx-streaming-paraformer-bilingual-zh-en](https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-paraformer/paraformer-models.html#csukuangfj-sherpa-onnx-streaming-paraformer-bilingual-zh-en-chinese-english).
+
+It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.
+
+You can use the following command to run it:
+
+```bash
+wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
+rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
+node ./test-online-paraformer-microphone-mic.js
+```
+
+
 ## ./test-online-paraformer.js
 [./test-online-paraformer.js](./test-online-paraformer.js) demonstrates
 how to decode a file using a streaming Paraformer model. In the code we use
--- a/nodejs-examples/package.json
+++ b/nodejs-examples/package.json
@@ -1,5 +1,6 @@
 {
  "dependencies": {
+    "mic": "^2.1.2",
    "naudiodon2": "^2.4.0",
    "sherpa-onnx": "^1.10.45",
    "wav": "^1.0.2"
--- a/nodejs-examples/test-online-paraformer-microphone-mic.js
+++ b/nodejs-examples/test-online-paraformer-microphone-mic.js
@@ -0,0 +1,206 @@
+// Copyright (c) 2023 Xiaomi Corporation (authors: Fangjun Kuang)
+const mic = require('mic'); // It uses `mic` for better compatibility, do check its [npm](https://www.npmjs.com/package/mic) before running it.
+const sherpa_onnx = require('sherpa-onnx');
+
+function createOnlineRecognizer() {
+  let onlineParaformerModelConfig = {
+    encoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx',
+    decoder: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx',
+  };
+
+  let onlineModelConfig = {
+    paraformer: onlineParaformerModelConfig,
+    tokens: './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt',
+  };
+
+  let recognizerConfig = {
+    modelConfig: onlineModelConfig,
+    enableEndpoint: 1,
+    rule1MinTrailingSilence: 2.4,
+    rule2MinTrailingSilence: 1.2,
+    rule3MinUtteranceLength: 20,
+  };
+
+  return sherpa_onnx.createOnlineRecognizer(recognizerConfig);
+}
+
+/**
+ * SpeechSession class, work as a session manager with the formatOutput function
+ * Sample output:
+=== Automated Speech Recognition ===
+Current Session #1
+Time: 8:44:46 PM
+------------------------
+Recognized Sentences:
+[8:44:43 PM] 1. it's so great three result is great great 她还支持中文
+[8:44:46 PM] 2. 很厉
+------------------------
+Recognizing: 真的很厉害太厉害
+
+*/
+class SpeechSession {
+  constructor() {
+    this.startTime = Date.now();
+    this.sentences = [];
+    this.currentText = '';
+    this.lastUpdateTime = Date.now();
+  }
+
+  addOrUpdateText(text) {
+    this.currentText = text;
+    this.lastUpdateTime = Date.now();
+  }
+
+  finalizeSentence() {
+    if (this.currentText.trim()) {
+      this.sentences.push({
+        text: this.currentText.trim(),
+        timestamp: new Date().toLocaleTimeString()
+      });
+    }
+    this.currentText = '';
+  }
+
+  shouldStartNewSession() {
+    return Date.now() - this.lastUpdateTime > 10000; // 10 seconds of silence
+  }
+}
+
+function formatOutput() {
+    clearConsole();
+    console.log('\n=== Automated Speech Recognition ===');
+    console.log(`Current Session #${sessionCount}`);
+    console.log('Time:', new Date().toLocaleTimeString());
+    console.log('------------------------');
+    
+    // 显示历史句子
+    if (currentSession.sentences.length > 0) {
+      console.log('Recognized Sentences:');
+      currentSession.sentences.forEach((sentence, index) => {
+        console.log(`[${sentence.timestamp}] ${index + 1}. ${sentence.text}`);
+      });
+      console.log('------------------------');
+    }
+    
+    // 显示当前正在识别的内容
+    if (currentSession.currentText) {
+      console.log('Recognizing:', currentSession.currentText);
+    }
+  }
+  
+
+const recognizer = createOnlineRecognizer();
+const stream = recognizer.createStream();
+let currentSession = new SpeechSession();
+let sessionCount = 1;
+
+function clearConsole() {
+  process.stdout.write('\x1B[2J\x1B[0f');
+}
+
+
+function exitHandler(options, exitCode) {
+  if (options.cleanup) {
+    console.log('\nCleaned up resources...');
+    micInstance.stop();
+    stream.free();
+    recognizer.free();
+  }
+  if (exitCode || exitCode === 0) console.log('Exit code:', exitCode);
+  if (options.exit) process.exit();
+}
+
+const micInstance = mic({
+  rate: recognizer.config.featConfig.sampleRate,
+  channels: 1,
+  debug: false, // 关闭调试输出
+  device: 'default',
+  bitwidth: 16,
+  encoding: 'signed-integer',
+  exitOnSilence: 6,
+  fileType: 'raw'
+});
+
+const micInputStream = micInstance.getAudioStream();
+
+function startMic() {
+  return new Promise((resolve, reject) => {
+    micInputStream.once('startComplete', () => {
+      console.log('Mic phone started.');
+      resolve();
+    });
+    
+    micInputStream.once('error', (err) => {
+      console.error('Mic phone start error:', err);
+      reject(err);
+    });
+    
+    micInstance.start();
+  });
+}
+
+micInputStream.on('data', buffer => {
+  const int16Array = new Int16Array(buffer.buffer);
+  const samples = new Float32Array(int16Array.length);
+  
+  for (let i = 0; i < int16Array.length; i++) {
+    samples[i] = int16Array[i] / 32768.0;
+  }
+
+  stream.acceptWaveform(recognizer.config.featConfig.sampleRate, samples);
+
+  while (recognizer.isReady(stream)) {
+    recognizer.decode(stream);
+  }
+
+  const isEndpoint = recognizer.isEndpoint(stream);
+  const text = recognizer.getResult(stream).text;
+
+  if (text.length > 0) {
+    // 检查是否需要开始新会话
+    if (currentSession.shouldStartNewSession()) {
+      currentSession.finalizeSentence();
+      sessionCount++;
+      currentSession = new SpeechSession();
+    }
+    
+    currentSession.addOrUpdateText(text);
+    formatOutput();
+  }
+
+  if (isEndpoint) {
+    if (text.length > 0) {
+      currentSession.finalizeSentence();
+      formatOutput();
+    }
+    recognizer.reset(stream);
+  }
+});
+
+micInputStream.on('error', err => {
+  console.error('Audio stream error:', err);
+});
+
+micInputStream.on('close', () => {
+  console.log('Mic phone closed.');
+});
+
+process.on('exit', exitHandler.bind(null, {cleanup: true}));
+process.on('SIGINT', exitHandler.bind(null, {exit: true}));
+process.on('SIGUSR1', exitHandler.bind(null, {exit: true}));
+process.on('SIGUSR2', exitHandler.bind(null, {exit: true}));
+process.on('uncaughtException', exitHandler.bind(null, {exit: true}));
+
+async function main() {
+  try {
+    console.log('Starting ...');
+    await startMic();
+    console.log('Initialized, waiting for speech ...');
+    formatOutput();
+  } catch (err) {
+    console.error('Failed to initialize:', err);
+    process.exit(1);
+  }
+}
+
+main();