Limit the maximum segment length for VAD. (#990)

This commit is contained in:
Fangjun Kuang
2024-06-12 10:49:37 +08:00
committed by GitHub
parent aac86847ad
commit 208da78343
4 changed files with 33 additions and 0 deletions

View File

@@ -29,6 +29,14 @@ class VoiceActivityDetector::Impl {
#endif
void AcceptWaveform(const float *samples, int32_t n) {
if (buffer_.Size() > max_utterance_length_) {
model_->SetMinSilenceDuration(new_min_silence_duration_s_);
model_->SetThreshold(new_threshold_);
} else {
model_->SetMinSilenceDuration(config_.silero_vad.min_silence_duration);
model_->SetThreshold(config_.silero_vad.threshold);
}
int32_t window_size = model_->WindowSize();
// note n is usually window_size and there is no need to use
@@ -114,6 +122,10 @@ class VoiceActivityDetector::Impl {
CircularBuffer buffer_;
std::vector<float> last_;
int max_utterance_length_ = 16000 * 20; // in samples
float new_min_silence_duration_s_ = 0.1;
float new_threshold_ = 1.10;
int32_t start_ = -1;
};