Add streaming CTC ASR APIs for node-addon-api (#867)

2024-05-13 11:58:25 +08:00
parent db85b2c1d8
commit 384f96c40f
15 changed files with 443 additions and 29 deletions
--- a/.github/scripts/test-nodejs-addon-npm.sh
+++ b/.github/scripts/test-nodejs-addon-npm.sh
@@ -5,15 +5,6 @@ set -ex
 d=nodejs-addon-examples
 echo "dir: $d"
 cd $d
 npm install --verbose
 git status
 ls -lh
 ls -lh node_modules
 export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
 export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
 export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
 tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
@@ -22,3 +13,14 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
 node test_asr_streaming_transducer.js
 rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
 curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 node ./test_asr_streaming_ctc.js
 # To decode with HLG.fst
 node ./test_asr_streaming_ctc_hlg.js
 rm -rf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18
--- a/.github/workflows/test-nodejs-addon-api.yaml
+++ b/.github/workflows/test-nodejs-addon-api.yaml
@@ -152,17 +152,23 @@ jobs:
          ./node_modules/.bin/cmake-js compile --log-level verbose
-      - name: Test streaming transducer
+      - name: Run tests
        shell: bash
        run: |
          export PATH=$PWD/build/install/lib:$PATH
          export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
          d=nodejs-addon-examples
          cd $d
          files=$(ls *.js)
          echo $files
          for f in ${files[@]}; do
            echo $f
            sed -i.bak s%sherpa-onnx-node%./sherpa-onnx% ./$f
          done
          cd ..
-          cd scripts/node-addon-api
+          cp -v scripts/node-addon-api/build/Release/sherpa-onnx.node $d/
-          curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+          cp -v scripts/node-addon-api/lib/*.js $d/
-          tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
+          cp -v ./build/install/lib/lib*  $d/
          rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
-          node test/test_asr_streaming_transducer.js
+          .github/scripts/test-nodejs-addon-npm.sh
          rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
--- a/.github/workflows/test-nodejs-addon-npm.yaml
+++ b/.github/workflows/test-nodejs-addon-npm.yaml
@@ -63,4 +63,19 @@ jobs:
      - name: Run tests
        shell: bash
        run: |
          d=nodejs-addon-examples
          echo "dir: $d"
          cd $d
          npm install --verbose
          git status
          ls -lh
          ls -lh node_modules
          export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-x64:$DYLD_LIBRARY_PATH
          export DYLD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-darwin-arm64:$DYLD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
          export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
          cd ../
          .github/scripts/test-nodejs-addon-npm.sh
--- a/nodejs-addon-examples/README.md
+++ b/nodejs-addon-examples/README.md
@@ -27,6 +27,18 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-x64:$LD_LIBRARY_PATH
 export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PATH
 ```
 # Voice Activity detection (VAD)
 ```bash
 wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
 # To run the test with a microphone, you need to install the package naudiodon2
 npm install naudiodon2
 node ./test_vad_microphone.js
 ```
 ## Streaming speech recognition with zipformer transducer
 ```bash
@@ -36,21 +48,27 @@ rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2
 node ./test_asr_streaming_transducer.js
-# To run the test with microphone, you need to install the package naudiodon2
+# To run the test with a microphone, you need to install the package naudiodon2
 npm install naudiodon2
 node ./test_asr_streaming_transducer_microphone.js
 ```
-# VAD
+## Streaming speech recognition with zipformer CTC
 ```bash
-wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/silero_vad.onnx
+wget https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
 node ./test_asr_streaming_ctc.js
-# To run the test with microphone, you need to install the package naudiodon2
+# To decode with HLG.fst
 node ./test_asr_streaming_ctc_hlg.js
 # To run the test with a microphone, you need to install the package naudiodon2
 npm install naudiodon2
-node ./test_vad_microphone.js
+node ./test_asr_streaming_ctc_microphone.js
 node ./test_asr_streaming_ctc_hlg_microphone.js
 ```
--- a/nodejs-addon-examples/test_asr_streaming_ctc.js
+++ b/nodejs-addon-examples/test_asr_streaming_ctc.js
@@ -0,0 +1,55 @@
 // Copyright (c)  2024  Xiaomi Corporation
 const sherpa_onnx = require('sherpa-onnx-node');
 const performance = require('perf_hooks').performance;
 // Please download test files from
 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
 const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'zipformer2Ctc': {
      'model':
          './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
    },
    'tokens':
        './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  }
 };
 const waveFilename =
    './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/0.wav';
 const recognizer = new sherpa_onnx.OnlineRecognizer(config);
 console.log('Started')
 let start = performance.now();
 const stream = recognizer.createStream();
 const wave = sherpa_onnx.readWave(waveFilename);
 stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
 const tailPadding = new Float32Array(wave.sampleRate * 0.4);
 stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
 while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
 }
 result = recognizer.getResult(stream)
 let stop = performance.now();
 console.log('Done')
 const elapsed_seconds = (stop - start) / 1000;
 const duration = wave.samples.length / wave.sampleRate;
 const real_time_factor = elapsed_seconds / duration;
 console.log('Wave duration', duration.toFixed(3), 'secodns')
 console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
 console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3))
 console.log(waveFilename)
 console.log('result\n', result)
--- a/nodejs-addon-examples/test_asr_streaming_ctc_hlg.js
+++ b/nodejs-addon-examples/test_asr_streaming_ctc_hlg.js
@@ -0,0 +1,58 @@
 // Copyright (c)  2024  Xiaomi Corporation
 const sherpa_onnx = require('sherpa-onnx-node');
 const performance = require('perf_hooks').performance;
 // Please download test files from
 // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
 const config = {
  'featConfig': {
    'sampleRate': 16000,
    'featureDim': 80,
  },
  'modelConfig': {
    'zipformer2Ctc': {
      'model':
          './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
    },
    'tokens':
        './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
  },
  'ctcFstDecoderConfig': {
    'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
  },
 };
 const waveFilename =
    './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/test_wavs/1.wav';
 const recognizer = new sherpa_onnx.OnlineRecognizer(config);
 console.log('Started')
 let start = performance.now();
 const stream = recognizer.createStream();
 const wave = sherpa_onnx.readWave(waveFilename);
 stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples});
 const tailPadding = new Float32Array(wave.sampleRate * 0.4);
 stream.acceptWaveform({samples: tailPadding, sampleRate: wave.sampleRate});
 while (recognizer.isReady(stream)) {
  recognizer.decode(stream);
 }
 result = recognizer.getResult(stream)
 let stop = performance.now();
 console.log('Done')
 const elapsed_seconds = (stop - start) / 1000;
 const duration = wave.samples.length / wave.sampleRate;
 const real_time_factor = elapsed_seconds / duration;
 console.log('Wave duration', duration.toFixed(3), 'secodns')
 console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
 console.log(
    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3))
 console.log(waveFilename)
 console.log('result\n', result)
--- a/nodejs-addon-examples/test_asr_streaming_ctc_hlg_microphone.js
+++ b/nodejs-addon-examples/test_asr_streaming_ctc_hlg_microphone.js
@@ -0,0 +1,89 @@
 // Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
 //
 const portAudio = require('naudiodon2');
 // console.log(portAudio.getDevices());
 const sherpa_onnx = require('sherpa-onnx-node');
 function createOnlineRecognizer() {
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'zipformer2Ctc': {
        'model':
            './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
      },
      'tokens':
          './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    },
    'ctcFstDecoderConfig': {
      'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst',
    },
    'enableEndpoint': true,
    'rule1MinTrailingSilence': 2.4,
    'rule2MinTrailingSilence': 1.2,
    'rule3MinUtteranceLength': 20
  };
  return new sherpa_onnx.OnlineRecognizer(config);
 }
 const recognizer = createOnlineRecognizer();
 const stream = recognizer.createStream();
 let lastText = '';
 let segmentIndex = 0;
 const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: recognizer.config.featConfig.sampleRate
  }
 });
 const display = new sherpa_onnx.Display(50);
 ai.on('data', data => {
  const samples = new Float32Array(data.buffer);
  stream.acceptWaveform(
      {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }
  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text.toLowerCase();
  if (text.length > 0 && lastText != text) {
    lastText = text;
    display.print(segmentIndex, lastText);
  }
  if (isEndpoint) {
    if (text.length > 0) {
      lastText = text;
      segmentIndex += 1;
    }
    recognizer.reset(stream)
  }
 });
 ai.on('close', () => {
  console.log('Free resources');
  stream.free();
  recognizer.free();
 });
 ai.start();
 console.log('Started! Please speak')
--- a/nodejs-addon-examples/test_asr_streaming_ctc_microphone.js
+++ b/nodejs-addon-examples/test_asr_streaming_ctc_microphone.js
@@ -0,0 +1,88 @@
 // Copyright (c)  2023-2024  Xiaomi Corporation (authors: Fangjun Kuang)
 //
 const portAudio = require('naudiodon2');
 // console.log(portAudio.getDevices());
 const sherpa_onnx = require('sherpa-onnx-node');
 function createOnlineRecognizer() {
  const config = {
    'featConfig': {
      'sampleRate': 16000,
      'featureDim': 80,
    },
    'modelConfig': {
      'zipformer2Ctc': {
        'model':
            './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx',
      },
      'tokens':
          './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt',
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
    },
    'decodingMethod': 'greedy_search',
    'maxActivePaths': 4,
    'enableEndpoint': true,
    'rule1MinTrailingSilence': 2.4,
    'rule2MinTrailingSilence': 1.2,
    'rule3MinUtteranceLength': 20
  };
  return new sherpa_onnx.OnlineRecognizer(config);
 }
 const recognizer = createOnlineRecognizer();
 const stream = recognizer.createStream();
 let lastText = '';
 let segmentIndex = 0;
 const ai = new portAudio.AudioIO({
  inOptions: {
    channelCount: 1,
    closeOnError: true,  // Close the stream if an audio error is detected, if
                         // set false then just log the error
    deviceId: -1,  // Use -1 or omit the deviceId to select the default device
    sampleFormat: portAudio.SampleFormatFloat32,
    sampleRate: recognizer.config.featConfig.sampleRate
  }
 });
 const display = new sherpa_onnx.Display(50);
 ai.on('data', data => {
  const samples = new Float32Array(data.buffer);
  stream.acceptWaveform(
      {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples});
  while (recognizer.isReady(stream)) {
    recognizer.decode(stream);
  }
  const isEndpoint = recognizer.isEndpoint(stream);
  const text = recognizer.getResult(stream).text.toLowerCase();
  if (text.length > 0 && lastText != text) {
    lastText = text;
    display.print(segmentIndex, lastText);
  }
  if (isEndpoint) {
    if (text.length > 0) {
      lastText = text;
      segmentIndex += 1;
    }
    recognizer.reset(stream)
  }
 });
 ai.on('close', () => {
  console.log('Free resources');
  stream.free();
  recognizer.free();
 });
 ai.start();
 console.log('Started! Please speak')
--- a/nodejs-addon-examples/test_asr_streaming_transducer.js
+++ b/nodejs-addon-examples/test_asr_streaming_transducer.js
@@ -24,7 +24,6 @@ const config = {
    'numThreads': 2,
    'provider': 'cpu',
    'debug': 1,
    'modelType': 'zipformer',
  }
 };
@@ -53,5 +52,8 @@ const duration = wave.samples.length / wave.sampleRate;
 const real_time_factor = elapsed_seconds / duration;
 console.log('Wave duration', duration.toFixed(3), 'secodns')
 console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns')
-console.log('RTF', real_time_factor.toFixed(3))
+console.log(
-console.log('result', result.text)
+    `RTF = ${elapsed_seconds.toFixed(3)}/${duration.toFixed(3)} =`,
    real_time_factor.toFixed(3))
 console.log(waveFilename)
 console.log('result\n', result)
--- a/nodejs-addon-examples/test_asr_streaming_transducer_microphone.js
+++ b/nodejs-addon-examples/test_asr_streaming_transducer_microphone.js
@@ -25,7 +25,6 @@ function createOnlineRecognizer() {
      'numThreads': 2,
      'provider': 'cpu',
      'debug': 1,
      'modelType': 'zipformer',
    },
    'decodingMethod': 'greedy_search',
    'maxActivePaths': 4,
@@ -68,7 +67,7 @@ ai.on('data', data => {
  }
  const isEndpoint = recognizer.isEndpoint(stream);
-  const text = recognizer.getResult(stream).text;
+  const text = recognizer.getResult(stream).text.toLowerCase();
  if (text.length > 0 && lastText != text) {
    lastText = text;
--- a/scripts/apk/generate-tts-apk-script.py
+++ b/scripts/apk/generate-tts-apk-script.py
@@ -158,7 +158,7 @@ def get_piper_models() -> List[TtsModel]:
        TtsModel(model_dir="vits-piper-fa_IR-gyro-medium"),
        TtsModel(model_dir="vits-piper-fi_FI-harri-low"),
        TtsModel(model_dir="vits-piper-fi_FI-harri-medium"),
-        TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
+        #  TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-siwis-low"),
        TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"),
--- a/scripts/node-addon-api/lib/addon.js
+++ b/scripts/node-addon-api/lib/addon.js
@@ -9,6 +9,7 @@ const possible_paths = [
  '../build/Debug/sherpa-onnx.node',
  `./node_modules/sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
  `../sherpa-onnx-${platform_arch}/sherpa-onnx.node`,
  './sherpa-onnx.node',
 ];
 let found = false;
--- a/scripts/node-addon-api/run.sh
+++ b/scripts/node-addon-api/run.sh
@@ -0,0 +1,15 @@
 #!/usr/bin/env bash
 set -ex
 if [[ ! -f ../../build/install/lib/libsherpa-onnx-core.dylib && ! -f ../../build/install/lib/libsherpa-onnx-core.so ]]; then
  pushd ../../
  mkdir -p build
  cd build
  cmake -DCMAKE_INSTALL_PREFIX=./install -DBUILD_SHARED_LIBS=ON ..
  make install
  popd
 fi
 export SHERPA_ONNX_INSTALL_DIR=$PWD/../../build/install
 ./node_modules/.bin/cmake-js compile
--- a/scripts/node-addon-api/src/streaming-asr.cc
+++ b/scripts/node-addon-api/src/streaming-asr.cc
@@ -89,6 +89,30 @@ static SherpaOnnxOnlineTransducerModelConfig GetOnlineTransducerModelConfig(
  return config;
 }
 static SherpaOnnxOnlineZipformer2CtcModelConfig
 GetOnlineZipformer2CtcModelConfig(Napi::Object obj) {
  SherpaOnnxOnlineZipformer2CtcModelConfig config;
  memset(&config, 0, sizeof(config));
  if (!obj.Has("zipformer2Ctc") || !obj.Get("zipformer2Ctc").IsObject()) {
    return config;
  }
  Napi::Object o = obj.Get("zipformer2Ctc").As<Napi::Object>();
  if (o.Has("model") && o.Get("model").IsString()) {
    Napi::String model = o.Get("model").As<Napi::String>();
    std::string s = model.Utf8Value();
    char *p = new char[s.size() + 1];
    std::copy(s.begin(), s.end(), p);
    p[s.size()] = 0;
    config.model = p;
  }
  return config;
 }
 static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
  SherpaOnnxOnlineModelConfig config;
  memset(&config, 0, sizeof(config));
@@ -100,6 +124,7 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
  Napi::Object o = obj.Get("modelConfig").As<Napi::Object>();
  config.transducer = GetOnlineTransducerModelConfig(o);
  config.zipformer2_ctc = GetOnlineZipformer2CtcModelConfig(o);
  if (o.Has("tokens") && o.Get("tokens").IsString()) {
    Napi::String tokens = o.Get("tokens").As<Napi::String>();
@@ -147,6 +172,35 @@ static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) {
  return config;
 }
 static SherpaOnnxOnlineCtcFstDecoderConfig GetCtcFstDecoderConfig(
    Napi::Object obj) {
  SherpaOnnxOnlineCtcFstDecoderConfig config;
  memset(&config, 0, sizeof(config));
  if (!obj.Has("ctcFstDecoderConfig") ||
      !obj.Get("ctcFstDecoderConfig").IsObject()) {
    return config;
  }
  Napi::Object o = obj.Get("ctcFstDecoderConfig").As<Napi::Object>();
  if (o.Has("graph") && o.Get("graph").IsString()) {
    Napi::String graph = o.Get("graph").As<Napi::String>();
    std::string s = graph.Utf8Value();
    char *p = new char[s.size() + 1];
    std::copy(s.begin(), s.end(), p);
    p[s.size()] = 0;
    config.graph = p;
  }
  if (o.Has("maxActive") && o.Get("maxActive").IsNumber()) {
    config.max_active = o.Get("maxActive").As<Napi::Number>().Int32Value();
  }
  return config;
 }
 static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
    const Napi::CallbackInfo &info) {
  Napi::Env env = info.Env();
@@ -234,6 +288,8 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
        config.Get("hotwordsScore").As<Napi::Number>().FloatValue();
  }
  c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(config);
 #if 0
  printf("encoder: %s\n", c.model_config.transducer.encoder
                              ? c.model_config.transducer.encoder
@@ -277,6 +333,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
    delete[] c.model_config.transducer.joiner;
  }
  if (c.model_config.zipformer2_ctc.model) {
    delete[] c.model_config.zipformer2_ctc.model;
  }
  if (c.model_config.tokens) {
    delete[] c.model_config.tokens;
  }
@@ -297,6 +357,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
    delete[] c.hotwords_file;
  }
  if (c.ctc_fst_decoder_config.graph) {
    delete[] c.ctc_fst_decoder_config.graph;
  }
  if (!recognizer) {
    Napi::TypeError::New(env, "Please check your config!")
        .ThrowAsJavaScriptException();
--- a/sherpa-onnx/csrc/online-recognizer-ctc-impl.h
+++ b/sherpa-onnx/csrc/online-recognizer-ctc-impl.h
@@ -216,6 +216,8 @@ class OnlineRecognizerCtcImpl : public OnlineRecognizerImpl {
    // clear states
    s->SetStates(model_->GetInitStates());
    s->GetFasterDecoderProcessedFrames() = 0;
    // Note: We only update counters. The underlying audio samples
    // are not discarded.
    s->Reset();