Limit number of tokens per second for whisper. (#1958)
Otherwise, it spends lots of time in the loop if the EOT token is not predicted.
This commit is contained in:
@@ -18,8 +18,9 @@ class OfflineWhisperGreedySearchDecoder : public OfflineWhisperDecoder {
|
||||
OfflineWhisperModel *model)
|
||||
: config_(config), model_(model) {}
|
||||
|
||||
std::vector<OfflineWhisperDecoderResult> Decode(Ort::Value cross_k,
|
||||
Ort::Value cross_v) override;
|
||||
std::vector<OfflineWhisperDecoderResult> Decode(
|
||||
Ort::Value cross_k, Ort::Value cross_v,
|
||||
int32_t num_feature_frames) override;
|
||||
|
||||
void SetConfig(const OfflineWhisperModelConfig &config) override;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user