Limit number of tokens per second for whisper. (#1958)

Otherwise, it spends lots of time in the loop if the EOT token is not predicted.
2025-03-04 15:45:28 +08:00
parent 49177530ff
commit 209eaaae1d
4 changed files with 14 additions and 6 deletions
--- a/sherpa-onnx/csrc/offline-whisper-greedy-search-decoder.h
+++ b/sherpa-onnx/csrc/offline-whisper-greedy-search-decoder.h
@@ -18,8 +18,9 @@ class OfflineWhisperGreedySearchDecoder : public OfflineWhisperDecoder {
                                    OfflineWhisperModel *model)
      : config_(config), model_(model) {}

-  std::vector<OfflineWhisperDecoderResult> Decode(Ort::Value cross_k,
-                                                  Ort::Value cross_v) override;
+  std::vector<OfflineWhisperDecoderResult> Decode(
+      Ort::Value cross_k, Ort::Value cross_v,
+      int32_t num_feature_frames) override;

  void SetConfig(const OfflineWhisperModelConfig &config) override;