Remove the 30-second constraint from whisper. (#471)

This commit is contained in:
Fangjun Kuang
2023-12-07 17:47:08 +08:00
committed by GitHub
parent a7d69359c9
commit 3ae984f148
10 changed files with 178 additions and 78 deletions

View File

@@ -32,6 +32,14 @@ void OfflineWhisperModelConfig::Register(ParseOptions *po) {
"Valid values: transcribe, translate. "
"Note that for non-multilingual models, it supports "
"only 'transcribe'");
po->Register(
"whisper-tail-paddings", &tail_paddings,
"Suggest value: 50 for English models. 300 for multilingual models. "
"Since we have removed the 30-second constraint, we need to add some "
"tail padding frames "
"so that whisper can detect the eot token. Leave it to -1 to use 50 for "
"English models and 300 for multilingual models.");
}
bool OfflineWhisperModelConfig::Validate() const {
@@ -63,7 +71,8 @@ std::string OfflineWhisperModelConfig::ToString() const {
os << "encoder=\"" << encoder << "\", ";
os << "decoder=\"" << decoder << "\", ";
os << "language=\"" << language << "\", ";
os << "task=\"" << task << "\")";
os << "task=\"" << task << "\", ";
os << "tail_paddings=" << tail_paddings << ")";
return os.str();
}