Add APIs about max speech duration in VAD for various programming languages (#1349)
This commit is contained in:
@@ -14,6 +14,7 @@ namespace SherpaOnnx
|
||||
MinSilenceDuration = 0.5F;
|
||||
MinSpeechDuration = 0.25F;
|
||||
WindowSize = 512;
|
||||
MaxSpeechDuration = 5.0F;
|
||||
}
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
@@ -26,5 +27,7 @@ namespace SherpaOnnx
|
||||
public float MinSpeechDuration;
|
||||
|
||||
public int WindowSize;
|
||||
|
||||
public float MaxSpeechDuration;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -771,6 +771,7 @@ type SileroVadModelConfig struct {
|
||||
MinSilenceDuration float32
|
||||
MinSpeechDuration float32
|
||||
WindowSize int
|
||||
MaxSpeechDuration float32
|
||||
}
|
||||
|
||||
type VadModelConfig struct {
|
||||
@@ -849,6 +850,7 @@ func NewVoiceActivityDetector(config *VadModelConfig, bufferSizeInSeconds float3
|
||||
c.silero_vad.min_silence_duration = C.float(config.SileroVad.MinSilenceDuration)
|
||||
c.silero_vad.min_speech_duration = C.float(config.SileroVad.MinSpeechDuration)
|
||||
c.silero_vad.window_size = C.int(config.SileroVad.WindowSize)
|
||||
c.silero_vad.max_speech_duration = C.float(config.SileroVad.MaxSpeechDuration)
|
||||
|
||||
c.sample_rate = C.int(config.SampleRate)
|
||||
c.num_threads = C.int(config.NumThreads)
|
||||
|
||||
@@ -39,6 +39,9 @@ config = {
|
||||
sileroVad: {
|
||||
model: "./silero_vad.onnx",
|
||||
threshold: 0.5,
|
||||
minSilenceDuration: 0.5,
|
||||
minSpeechDuration: 0.25,
|
||||
maxSpeechDuration: 5,
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
@@ -279,6 +279,7 @@ static SherpaOnnxSileroVadModelConfig GetSileroVadConfig(
|
||||
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_silence_duration, minSilenceDuration);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_speech_duration, minSpeechDuration);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_INT32(window_size, windowSize);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(max_speech_duration, maxSpeechDuration);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user