Limit the maximum segment length for VAD. (#990)
This commit is contained in:
@@ -190,6 +190,14 @@ class SileroVadModel::Impl {
|
|||||||
|
|
||||||
int32_t MinSpeechDurationSamples() const { return min_speech_samples_; }
|
int32_t MinSpeechDurationSamples() const { return min_speech_samples_; }
|
||||||
|
|
||||||
|
void SetMinSilenceDuration(float s) {
|
||||||
|
min_silence_samples_ = sample_rate_ * s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetThreshold(float threshold) {
|
||||||
|
config_.silero_vad.threshold = threshold;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void Init(void *model_data, size_t model_data_length) {
|
void Init(void *model_data, size_t model_data_length) {
|
||||||
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
|
sess_ = std::make_unique<Ort::Session>(env_, model_data, model_data_length,
|
||||||
@@ -306,4 +314,12 @@ int32_t SileroVadModel::MinSpeechDurationSamples() const {
|
|||||||
return impl_->MinSpeechDurationSamples();
|
return impl_->MinSpeechDurationSamples();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SileroVadModel::SetMinSilenceDuration(float s) {
|
||||||
|
impl_->SetMinSilenceDuration(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SileroVadModel::SetThreshold(float threshold) {
|
||||||
|
impl_->SetThreshold(threshold);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace sherpa_onnx
|
} // namespace sherpa_onnx
|
||||||
|
|||||||
@@ -42,6 +42,9 @@ class SileroVadModel : public VadModel {
|
|||||||
int32_t MinSilenceDurationSamples() const override;
|
int32_t MinSilenceDurationSamples() const override;
|
||||||
int32_t MinSpeechDurationSamples() const override;
|
int32_t MinSpeechDurationSamples() const override;
|
||||||
|
|
||||||
|
void SetMinSilenceDuration(float s) override;
|
||||||
|
void SetThreshold(float threshold) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class Impl;
|
class Impl;
|
||||||
std::unique_ptr<Impl> impl_;
|
std::unique_ptr<Impl> impl_;
|
||||||
|
|||||||
@@ -42,6 +42,8 @@ class VadModel {
|
|||||||
|
|
||||||
virtual int32_t MinSilenceDurationSamples() const = 0;
|
virtual int32_t MinSilenceDurationSamples() const = 0;
|
||||||
virtual int32_t MinSpeechDurationSamples() const = 0;
|
virtual int32_t MinSpeechDurationSamples() const = 0;
|
||||||
|
virtual void SetMinSilenceDuration(float s) = 0;
|
||||||
|
virtual void SetThreshold(float threshold) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace sherpa_onnx
|
} // namespace sherpa_onnx
|
||||||
|
|||||||
@@ -29,6 +29,14 @@ class VoiceActivityDetector::Impl {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
void AcceptWaveform(const float *samples, int32_t n) {
|
void AcceptWaveform(const float *samples, int32_t n) {
|
||||||
|
if (buffer_.Size() > max_utterance_length_) {
|
||||||
|
model_->SetMinSilenceDuration(new_min_silence_duration_s_);
|
||||||
|
model_->SetThreshold(new_threshold_);
|
||||||
|
} else {
|
||||||
|
model_->SetMinSilenceDuration(config_.silero_vad.min_silence_duration);
|
||||||
|
model_->SetThreshold(config_.silero_vad.threshold);
|
||||||
|
}
|
||||||
|
|
||||||
int32_t window_size = model_->WindowSize();
|
int32_t window_size = model_->WindowSize();
|
||||||
|
|
||||||
// note n is usually window_size and there is no need to use
|
// note n is usually window_size and there is no need to use
|
||||||
@@ -114,6 +122,10 @@ class VoiceActivityDetector::Impl {
|
|||||||
CircularBuffer buffer_;
|
CircularBuffer buffer_;
|
||||||
std::vector<float> last_;
|
std::vector<float> last_;
|
||||||
|
|
||||||
|
int max_utterance_length_ = 16000 * 20; // in samples
|
||||||
|
float new_min_silence_duration_s_ = 0.1;
|
||||||
|
float new_threshold_ = 1.10;
|
||||||
|
|
||||||
int32_t start_ = -1;
|
int32_t start_ = -1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user