Support silero_vad version 5 (#1064)

This commit is contained in:
Fangjun Kuang
2024-06-29 11:45:04 +08:00
committed by GitHub
parent 8c4f576f1b
commit 61c7eb3063
6 changed files with 203 additions and 50 deletions

View File

@@ -38,16 +38,20 @@ class VoiceActivityDetector::Impl {
}
int32_t window_size = model_->WindowSize();
int32_t window_shift = model_->WindowShift();
// note n is usually window_size and there is no need to use
// an extra buffer here
last_.insert(last_.end(), samples, samples + n);
int32_t k = static_cast<int32_t>(last_.size()) / window_size;
// Note: For v4, window_shift == window_size
int32_t k =
(static_cast<int32_t>(last_.size()) - window_size) / window_shift + 1;
const float *p = last_.data();
bool is_speech = false;
for (int32_t i = 0; i != k; ++i, p += window_size) {
buffer_.Push(p, window_size);
for (int32_t i = 0; i != k; ++i, p += window_shift) {
buffer_.Push(p, window_shift);
// NOTE(fangjun): Please don't use a very large n.
bool this_window_is_speech = model_->IsSpeech(p, window_size);
is_speech = is_speech || this_window_is_speech;