Add APIs about max speech duration in VAD for various programming languages (#1349)
This commit is contained in:
@@ -907,6 +907,9 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector(
|
||||
vad_config.silero_vad.window_size =
|
||||
SHERPA_ONNX_OR(config->silero_vad.window_size, 512);
|
||||
|
||||
vad_config.silero_vad.max_speech_duration =
|
||||
SHERPA_ONNX_OR(config->silero_vad.max_speech_duration, 20);
|
||||
|
||||
vad_config.sample_rate = SHERPA_ONNX_OR(config->sample_rate, 16000);
|
||||
vad_config.num_threads = SHERPA_ONNX_OR(config->num_threads, 1);
|
||||
vad_config.provider = SHERPA_ONNX_OR(config->provider, "cpu");
|
||||
|
||||
@@ -746,6 +746,11 @@ SHERPA_ONNX_API typedef struct SherpaOnnxSileroVadModelConfig {
|
||||
float min_speech_duration;
|
||||
|
||||
int window_size;
|
||||
|
||||
// If a speech segment is longer than this value, then we increase
|
||||
// the threshold to 0.9. After finishing detecting the segment,
|
||||
// the threshold value is reset to its original value.
|
||||
float max_speech_duration;
|
||||
} SherpaOnnxSileroVadModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxVadModelConfig {
|
||||
|
||||
Reference in New Issue
Block a user