Give an informative log for whisper on exceptions. (#473)

This commit is contained in:
Fangjun Kuang
2023-12-08 14:33:59 +08:00
committed by GitHub
parent 868c339e5e
commit 0e23f82691
7 changed files with 77 additions and 15 deletions

View File

@@ -116,18 +116,12 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
NormalizeFeatures(f.data(), num_frames, feat_dim);
// note that 50 is an experience value.
// see also ../../scripts/whisper/test.py
//
// You can replace 50 by other values, say, 100.
// note that 1000 is an experience-value.
// You can replace 1000 by other values, say, 100.
//
// Since we have removed the 30 seconds constraint, we need
// tail_padding_frames so that whisper is able to detect the eot token.
int32_t tail_padding_frames = 50;
if (model_->IsMultiLingual()) {
// 300 is an experience value. If it throws, please use a larger value.
tail_padding_frames = 300;
}
int32_t tail_padding_frames = 1000;
if (config_.model_config.whisper.tail_paddings > 0) {
tail_padding_frames = config_.model_config.whisper.tail_paddings;
@@ -140,11 +134,13 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
Ort::Value mel = Ort::Value::CreateTensor<float>(
model_->Allocator(), shape.data(), shape.size());
float *p_mel = mel.GetTensorMutableData<float>();
std::copy(f.data(), f.data() + actual_frames * feat_dim, p_mel);
memset(p_mel + f.size(), 0,
(actual_frames - num_frames) * feat_dim * sizeof(float));
float *p_mel = mel.GetTensorMutableData<float>();
std::copy(f.data(), f.data() + num_frames * feat_dim, p_mel);
std::fill_n(p_mel + num_frames * feat_dim,
(actual_frames - num_frames) * feat_dim, 0);
mel = Transpose12(model_->Allocator(), &mel);
try {
@@ -156,8 +152,12 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
auto r = Convert(results[0], symbol_table_);
s->SetResult(r);
} catch (const Ort::Exception &ex) {
SHERPA_ONNX_LOGE("\n\nCaught exception:\n\n%s\n\nReturn an empty result",
ex.what());
SHERPA_ONNX_LOGE(
"\n\nCaught exception:\n\n%s\n\nReturn an empty result. Number of "
"input frames: %d, Current tail "
"paddings: %d. If you see a lot of such exceptions, please consider "
"using a larger --whisper-tail-paddings",
ex.what(), num_frames, tail_padding_frames);
return;
}
}