Support spoken language identification with whisper (#694)

2024-03-24 22:57:00 +08:00
parent 3cdad9b5d1
commit 0d258dd150
36 changed files with 1173 additions and 200 deletions
--- a/sherpa-onnx/csrc/offline-whisper-model-config.cc
+++ b/sherpa-onnx/csrc/offline-whisper-model-config.cc
@@ -35,19 +35,28 @@ void OfflineWhisperModelConfig::Register(ParseOptions *po) {

  po->Register(
      "whisper-tail-paddings", &tail_paddings,
-      "Suggest value: 50 for English models. 300 for multilingual models. "
+      "Suggested value: 50 for English models. 300 for multilingual models. "
      "Since we have removed the 30-second constraint, we need to add some "
      "tail padding frames "
-      "so that whisper can detect the eot token. Leave it to -1 to use 50 for "
-      "English models and 300 for multilingual models.");
+      "so that whisper can detect the eot token. Leave it to -1 to use 1000.");
 }

 bool OfflineWhisperModelConfig::Validate() const {
+  if (encoder.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --whisper-encoder");
+    return false;
+  }
+
  if (!FileExists(encoder)) {
    SHERPA_ONNX_LOGE("whisper encoder file %s does not exist", encoder.c_str());
    return false;
  }

+  if (decoder.empty()) {
+    SHERPA_ONNX_LOGE("Please provide --whisper-decoder");
+    return false;
+  }
+
  if (!FileExists(decoder)) {
    SHERPA_ONNX_LOGE("whisper decoder file %s does not exist", decoder.c_str());
    return false;