82 lines
2.6 KiB
C++
82 lines
2.6 KiB
C++
// sherpa-onnx/csrc/silero-vad-model-config.cc
|
|
//
|
|
// Copyright (c) 2023 Xiaomi Corporation
|
|
|
|
#include "sherpa-onnx/csrc/silero-vad-model-config.h"
|
|
|
|
#include "sherpa-onnx/csrc/file-utils.h"
|
|
#include "sherpa-onnx/csrc/macros.h"
|
|
|
|
namespace sherpa_onnx {
|
|
|
|
void SileroVadModelConfig::Register(ParseOptions *po) {
|
|
po->Register("silero-vad-model", &model, "Path to silero VAD ONNX model.");
|
|
|
|
po->Register("silero-vad-threshold", &threshold,
|
|
"Speech threshold. Silero VAD outputs speech probabilities for "
|
|
"each audio chunk, probabilities ABOVE this value are "
|
|
"considered as SPEECH. It is better to tune this parameter for "
|
|
"each dataset separately, but lazy "
|
|
"0.5 is pretty good for most datasets.");
|
|
|
|
po->Register(
|
|
"silero-vad-min-silence-duration", &min_silence_duration,
|
|
"In seconds. In the end of each speech chunk wait for "
|
|
"--silero-vad-min-silence-duration seconds before separating it");
|
|
|
|
po->Register("silero-vad-min-speech-duration", &min_speech_duration,
|
|
"In seconds. In the end of each silence chunk wait for "
|
|
"--silero-vad-min-speech-duration seconds before separating it");
|
|
|
|
po->Register(
|
|
"silero-vad-window-size", &window_size,
|
|
"In samples. Audio chunks of --silero-vad-window-size samples are fed "
|
|
"to the silero VAD model. WARNING! Silero VAD models were trained using "
|
|
"512, 1024, 1536 samples for 16000 sample rate and 256, 512, 768 samples "
|
|
"for 8000 sample rate. Values other than these may affect model "
|
|
"perfomance!");
|
|
}
|
|
|
|
bool SileroVadModelConfig::Validate() const {
|
|
if (model.empty()) {
|
|
SHERPA_ONNX_LOGE("Please provide --silero-vad-model");
|
|
return false;
|
|
}
|
|
|
|
if (!FileExists(model)) {
|
|
SHERPA_ONNX_LOGE("Silero vad model file %s does not exist", model.c_str());
|
|
return false;
|
|
}
|
|
|
|
if (threshold < 0.01) {
|
|
SHERPA_ONNX_LOGE(
|
|
"Please use a larger value for --silero-vad-threshold. Given: %f",
|
|
threshold);
|
|
return false;
|
|
}
|
|
|
|
if (threshold >= 1) {
|
|
SHERPA_ONNX_LOGE(
|
|
"Please use a smaller value for --silero-vad-threshold. Given: %f",
|
|
threshold);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
std::string SileroVadModelConfig::ToString() const {
|
|
std::ostringstream os;
|
|
|
|
os << "SilerVadModelConfig(";
|
|
os << "model=\"" << model << "\", ";
|
|
os << "threshold=" << threshold << ", ";
|
|
os << "min_silence_duration=" << min_silence_duration << ", ";
|
|
os << "min_speech_duration=" << min_speech_duration << ", ";
|
|
os << "window_size=" << window_size << ")";
|
|
|
|
return os.str();
|
|
}
|
|
|
|
} // namespace sherpa_onnx
|