Resize circular buffer on overflow (#422)

This commit is contained in:
Fangjun Kuang
2023-11-13 12:07:51 +08:00
committed by GitHub
parent 9884cf71e7
commit 097d641869
5 changed files with 94 additions and 10 deletions

View File

@@ -4,6 +4,7 @@
#include "sherpa-onnx/csrc/voice-activity-detector.h"
#include <algorithm>
#include <queue>
#include <utility>
@@ -30,7 +31,7 @@ class VoiceActivityDetector::Impl {
void AcceptWaveform(const float *samples, int32_t n) {
int32_t window_size = model_->WindowSize();
// note n is usally window_size and there is no need to use
// note n is usually window_size and there is no need to use
// an extra buffer here
last_.insert(last_.end(), samples, samples + n);
int32_t k = static_cast<int32_t>(last_.size()) / window_size;
@@ -39,7 +40,7 @@ class VoiceActivityDetector::Impl {
for (int32_t i = 0; i != k; ++i, p += window_size) {
buffer_.Push(p, window_size);
is_speech = model_->IsSpeech(p, window_size);
is_speech = is_speech || model_->IsSpeech(p, window_size);
}
last_ = std::vector<float>(
@@ -48,8 +49,9 @@ class VoiceActivityDetector::Impl {
if (is_speech) {
if (start_ == -1) {
// beginning of speech
start_ = buffer_.Tail() - 2 * model_->WindowSize() -
model_->MinSpeechDurationSamples();
start_ = std::max(buffer_.Tail() - 2 * model_->WindowSize() -
model_->MinSpeechDurationSamples(),
buffer_.Head());
}
} else {
// non-speech
@@ -68,6 +70,15 @@ class VoiceActivityDetector::Impl {
buffer_.Pop(end - buffer_.Head());
}
if (start_ == -1) {
int32_t end = buffer_.Tail() - 2 * model_->WindowSize() -
model_->MinSpeechDurationSamples();
int32_t n = std::max(0, end - buffer_.Head());
if (n > 0) {
buffer_.Pop(n);
}
}
start_ = -1;
}
}