Limit number of tokens per second for whisper. (#1958)

Otherwise, it spends lots of time in the loop if the EOT token is not predicted.
2025-03-04 15:45:28 +08:00
parent 49177530ff
commit 209eaaae1d
4 changed files with 14 additions and 6 deletions
--- a/sherpa-onnx/csrc/offline-whisper-decoder.h
+++ b/sherpa-onnx/csrc/offline-whisper-decoder.h
@@ -33,7 +33,8 @@ class OfflineWhisperDecoder {
   * @return Return a vector of size `N` containing the decoded results.
   */
  virtual std::vector<OfflineWhisperDecoderResult> Decode(
-      Ort::Value n_layer_cross_k, Ort::Value n_layer_cross_v) = 0;
+      Ort::Value n_layer_cross_k, Ort::Value n_layer_cross_v,
+      int32_t num_feature_frames) = 0;

  virtual void SetConfig(const OfflineWhisperModelConfig &config) = 0;
 };