From 494cb5c7330bc6dbf87fe62955cea88a14c05ea1 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Mon, 22 Apr 2024 15:10:39 +0800 Subject: [PATCH] Fix the last character not being recognized for streaming paraformer models. (#799) --- sherpa-onnx/csrc/sherpa-onnx-alsa.cc | 14 ++++++++++++++ sherpa-onnx/csrc/sherpa-onnx-microphone.cc | 13 +++++++++++++ 2 files changed, 27 insertions(+) diff --git a/sherpa-onnx/csrc/sherpa-onnx-alsa.cc b/sherpa-onnx/csrc/sherpa-onnx-alsa.cc index a0c4e3d6..689fb19a 100644 --- a/sherpa-onnx/csrc/sherpa-onnx-alsa.cc +++ b/sherpa-onnx/csrc/sherpa-onnx-alsa.cc @@ -113,6 +113,20 @@ as the device_name. bool is_endpoint = recognizer.IsEndpoint(stream.get()); + if (is_endpoint && !config.model_config.paraformer.encoder.empty()) { + // For streaming paraformer models, since it has a large right chunk size + // we need to pad it on endpointing so that the last character + // can be recognized + std::vector tail_paddings( + static_cast(1.0 * expected_sample_rate)); + stream->AcceptWaveform(expected_sample_rate, tail_paddings.data(), + tail_paddings.size()); + while (recognizer.IsReady(stream.get())) { + recognizer.DecodeStream(stream.get()); + } + text = recognizer.GetResult(stream.get()).text; + } + if (!text.empty() && last_text != text) { last_text = text; diff --git a/sherpa-onnx/csrc/sherpa-onnx-microphone.cc b/sherpa-onnx/csrc/sherpa-onnx-microphone.cc index 1b6760b5..0e32625e 100644 --- a/sherpa-onnx/csrc/sherpa-onnx-microphone.cc +++ b/sherpa-onnx/csrc/sherpa-onnx-microphone.cc @@ -157,6 +157,19 @@ for a list of pre-trained models to download. auto text = recognizer.GetResult(s.get()).text; bool is_endpoint = recognizer.IsEndpoint(s.get()); + if (is_endpoint && !config.model_config.paraformer.encoder.empty()) { + // For streaming paraformer models, since it has a large right chunk size + // we need to pad it on endpointing so that the last character + // can be recognized + std::vector tail_paddings(static_cast(1.0 * mic_sample_rate)); + s->AcceptWaveform(mic_sample_rate, tail_paddings.data(), + tail_paddings.size()); + while (recognizer.IsReady(s.get())) { + recognizer.DecodeStream(s.get()); + } + text = recognizer.GetResult(s.get()).text; + } + if (!text.empty() && last_text != text) { last_text = text;