Give an informative log for whisper on exceptions. (#473)
This commit is contained in:
@@ -116,18 +116,12 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
|
||||
|
||||
NormalizeFeatures(f.data(), num_frames, feat_dim);
|
||||
|
||||
// note that 50 is an experience value.
|
||||
// see also ../../scripts/whisper/test.py
|
||||
//
|
||||
// You can replace 50 by other values, say, 100.
|
||||
// note that 1000 is an experience-value.
|
||||
// You can replace 1000 by other values, say, 100.
|
||||
//
|
||||
// Since we have removed the 30 seconds constraint, we need
|
||||
// tail_padding_frames so that whisper is able to detect the eot token.
|
||||
int32_t tail_padding_frames = 50;
|
||||
if (model_->IsMultiLingual()) {
|
||||
// 300 is an experience value. If it throws, please use a larger value.
|
||||
tail_padding_frames = 300;
|
||||
}
|
||||
int32_t tail_padding_frames = 1000;
|
||||
|
||||
if (config_.model_config.whisper.tail_paddings > 0) {
|
||||
tail_padding_frames = config_.model_config.whisper.tail_paddings;
|
||||
@@ -140,11 +134,13 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
|
||||
|
||||
Ort::Value mel = Ort::Value::CreateTensor<float>(
|
||||
model_->Allocator(), shape.data(), shape.size());
|
||||
float *p_mel = mel.GetTensorMutableData<float>();
|
||||
std::copy(f.data(), f.data() + actual_frames * feat_dim, p_mel);
|
||||
|
||||
memset(p_mel + f.size(), 0,
|
||||
(actual_frames - num_frames) * feat_dim * sizeof(float));
|
||||
float *p_mel = mel.GetTensorMutableData<float>();
|
||||
std::copy(f.data(), f.data() + num_frames * feat_dim, p_mel);
|
||||
|
||||
std::fill_n(p_mel + num_frames * feat_dim,
|
||||
(actual_frames - num_frames) * feat_dim, 0);
|
||||
|
||||
mel = Transpose12(model_->Allocator(), &mel);
|
||||
|
||||
try {
|
||||
@@ -156,8 +152,12 @@ class OfflineRecognizerWhisperImpl : public OfflineRecognizerImpl {
|
||||
auto r = Convert(results[0], symbol_table_);
|
||||
s->SetResult(r);
|
||||
} catch (const Ort::Exception &ex) {
|
||||
SHERPA_ONNX_LOGE("\n\nCaught exception:\n\n%s\n\nReturn an empty result",
|
||||
ex.what());
|
||||
SHERPA_ONNX_LOGE(
|
||||
"\n\nCaught exception:\n\n%s\n\nReturn an empty result. Number of "
|
||||
"input frames: %d, Current tail "
|
||||
"paddings: %d. If you see a lot of such exceptions, please consider "
|
||||
"using a larger --whisper-tail-paddings",
|
||||
ex.what(), num_frames, tail_padding_frames);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -261,6 +261,7 @@ class OfflineRecognizer(object):
|
||||
decoding_method: str = "greedy_search",
|
||||
debug: bool = False,
|
||||
provider: str = "cpu",
|
||||
tail_paddings: int = -1,
|
||||
):
|
||||
"""
|
||||
Please refer to
|
||||
@@ -305,6 +306,7 @@ class OfflineRecognizer(object):
|
||||
decoder=decoder,
|
||||
language=language,
|
||||
task=task,
|
||||
tail_paddings=tail_paddings,
|
||||
),
|
||||
tokens=tokens,
|
||||
num_threads=num_threads,
|
||||
|
||||
Reference in New Issue
Block a user