Add APIs about max speech duration in VAD for various programming languages (#1349)

This commit is contained in:
Fangjun Kuang
2024-09-14 12:30:13 +08:00
committed by GitHub
parent 1423ddb1f0
commit e7ffcbd677
31 changed files with 88 additions and 9 deletions

View File

@@ -14,6 +14,7 @@ namespace SherpaOnnx
MinSilenceDuration = 0.5F;
MinSpeechDuration = 0.25F;
WindowSize = 512;
MaxSpeechDuration = 5.0F;
}
[MarshalAs(UnmanagedType.LPStr)]
@@ -26,5 +27,7 @@ namespace SherpaOnnx
public float MinSpeechDuration;
public int WindowSize;
public float MaxSpeechDuration;
}
}

View File

@@ -771,6 +771,7 @@ type SileroVadModelConfig struct {
MinSilenceDuration float32
MinSpeechDuration float32
WindowSize int
MaxSpeechDuration float32
}
type VadModelConfig struct {
@@ -849,6 +850,7 @@ func NewVoiceActivityDetector(config *VadModelConfig, bufferSizeInSeconds float3
c.silero_vad.min_silence_duration = C.float(config.SileroVad.MinSilenceDuration)
c.silero_vad.min_speech_duration = C.float(config.SileroVad.MinSpeechDuration)
c.silero_vad.window_size = C.int(config.SileroVad.WindowSize)
c.silero_vad.max_speech_duration = C.float(config.SileroVad.MaxSpeechDuration)
c.sample_rate = C.int(config.SampleRate)
c.num_threads = C.int(config.NumThreads)

View File

@@ -39,6 +39,9 @@ config = {
sileroVad: {
model: "./silero_vad.onnx",
threshold: 0.5,
minSilenceDuration: 0.5,
minSpeechDuration: 0.25,
maxSpeechDuration: 5,
}
}
*/

View File

@@ -279,6 +279,7 @@ static SherpaOnnxSileroVadModelConfig GetSileroVadConfig(
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_silence_duration, minSilenceDuration);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(min_speech_duration, minSpeechDuration);
SHERPA_ONNX_ASSIGN_ATTR_INT32(window_size, windowSize);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(max_speech_duration, maxSpeechDuration);
return c;
}