Limit number of tokens per second for whisper. (#1958)

Otherwise, it spends lots of time in the loop if the EOT token
is not predicted.
This commit is contained in:
Fangjun Kuang
2025-03-04 15:45:28 +08:00
committed by GitHub
parent 49177530ff
commit 209eaaae1d
4 changed files with 14 additions and 6 deletions

View File

@@ -33,7 +33,8 @@ class OfflineWhisperDecoder {
* @return Return a vector of size `N` containing the decoded results.
*/
virtual std::vector<OfflineWhisperDecoderResult> Decode(
Ort::Value n_layer_cross_k, Ort::Value n_layer_cross_v) = 0;
Ort::Value n_layer_cross_k, Ort::Value n_layer_cross_v,
int32_t num_feature_frames) = 0;
virtual void SetConfig(const OfflineWhisperModelConfig &config) = 0;
};