From 9fe25cc06f50828939b08e2993ef11e181ac68b4 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Wed, 2 Jul 2025 15:52:49 +0800 Subject: [PATCH] Fix VAD+ASR C++ example. (#2335) It was not able to handle short audios., e.g., 2.1 seconds. --- scripts/apk/generate-asr-apk-script.py | 60 +++++++++++++++++++ .../csrc/sherpa-onnx-vad-with-offline-asr.cc | 10 ++-- sherpa-onnx/kotlin-api/OnlineRecognizer.kt | 30 ++++++++++ 3 files changed, 96 insertions(+), 4 deletions(-) diff --git a/scripts/apk/generate-asr-apk-script.py b/scripts/apk/generate-asr-apk-script.py index 9f9c50f1..5e44b8ac 100755 --- a/scripts/apk/generate-asr-apk-script.py +++ b/scripts/apk/generate-asr-apk-script.py @@ -300,6 +300,66 @@ def get_models(): ls -lh + popd + """, + ), + Model( + model_name="sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30", + idx=17, + lang="zh", + short_name="large_zipformer_int8", + rule_fsts="itn_zh_number.fst", + cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi + pushd $model_name + rm -fv bpe.model + + rm -rf test_wavs + + ls -lh + + popd + """, + ), + Model( + model_name="sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30", + idx=18, + lang="zh", + short_name="large_zipformer", + rule_fsts="itn_zh_number.fst", + cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi + pushd $model_name + rm -fv bpe.model + + rm -rf test_wavs + + ls -lh + + popd + """, + ), + Model( + model_name="sherpa-onnx-streaming-zipformer-ctc-fp16-zh-2025-06-30", + idx=19, + lang="zh", + short_name="large_zipformer_fp16", + rule_fsts="itn_zh_number.fst", + cmd=""" + if [ ! -f itn_zh_number.fst ]; then + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/itn_zh_number.fst + fi + pushd $model_name + rm -fv bpe.model + + rm -rf test_wavs + + ls -lh + popd """, ), diff --git a/sherpa-onnx/csrc/sherpa-onnx-vad-with-offline-asr.cc b/sherpa-onnx/csrc/sherpa-onnx-vad-with-offline-asr.cc index 2c806c92..94d7ff6b 100644 --- a/sherpa-onnx/csrc/sherpa-onnx-vad-with-offline-asr.cc +++ b/sherpa-onnx/csrc/sherpa-onnx-vad-with-offline-asr.cc @@ -186,13 +186,15 @@ for a list of pre-trained models to download. fprintf(stderr, "Started!\n"); int32_t window_size = vad_config.silero_vad.window_size; int32_t i = 0; - while (i + window_size < samples.size()) { - vad->AcceptWaveform(samples.data() + i, window_size); - i += window_size; - if (i >= samples.size()) { + while (i < samples.size()) { + if (i + window_size <= samples.size()) { + vad->AcceptWaveform(samples.data() + i, window_size); + } else { vad->Flush(); } + i += window_size; + while (!vad->Empty()) { const auto &segment = vad->Front(); float duration = segment.samples.size() / 16000.; diff --git a/sherpa-onnx/kotlin-api/OnlineRecognizer.kt b/sherpa-onnx/kotlin-api/OnlineRecognizer.kt index 199e4052..f47ac69b 100644 --- a/sherpa-onnx/kotlin-api/OnlineRecognizer.kt +++ b/sherpa-onnx/kotlin-api/OnlineRecognizer.kt @@ -395,6 +395,36 @@ fun getModelConfig(type: Int): OnlineModelConfig? { ) } + 17 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.int8.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 18 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + + 19 -> { + val modelDir = "sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30" + return OnlineModelConfig( + zipformer2Ctc = OnlineZipformer2CtcModelConfig( + model = "$modelDir/model.fp16.onnx", + ), + tokens = "$modelDir/tokens.txt", + ) + } + 1000 -> { val modelDir = "sherpa-onnx-rk3588-streaming-zipformer-bilingual-zh-en-2023-02-20" return OnlineModelConfig(