Add CXX API for VAD (#2077)

This commit is contained in:
Fangjun Kuang
2025-04-01 14:51:43 +08:00
committed by GitHub
parent 6ef9aeb8d8
commit 0703bc1b86
10 changed files with 455 additions and 30 deletions

View File

@@ -558,4 +558,114 @@ int32_t OfflineSpeechDenoiser::GetSampleRate() const {
return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(p_);
}
CircularBuffer CircularBuffer::Create(int32_t capacity) {
auto p = SherpaOnnxCreateCircularBuffer(capacity);
return CircularBuffer(p);
}
CircularBuffer::CircularBuffer(const SherpaOnnxCircularBuffer *p)
: MoveOnly<CircularBuffer, SherpaOnnxCircularBuffer>(p) {}
void CircularBuffer::Destroy(const SherpaOnnxCircularBuffer *p) const {
SherpaOnnxDestroyCircularBuffer(p);
}
void CircularBuffer::Push(const float *samples, int32_t n) const {
SherpaOnnxCircularBufferPush(p_, samples, n);
}
std::vector<float> CircularBuffer::Get(int32_t start_index, int32_t n) const {
const float *samples = SherpaOnnxCircularBufferGet(p_, start_index, n);
std::vector<float> ans(n);
std::copy(samples, samples + n, ans.begin());
SherpaOnnxCircularBufferFree(samples);
return ans;
}
void CircularBuffer::Pop(int32_t n) const {
SherpaOnnxCircularBufferPop(p_, n);
}
int32_t CircularBuffer::Size() const {
return SherpaOnnxCircularBufferSize(p_);
}
int32_t CircularBuffer::Head() const {
return SherpaOnnxCircularBufferHead(p_);
}
void CircularBuffer::Reset() const { SherpaOnnxCircularBufferReset(p_); }
VoiceActivityDetector VoiceActivityDetector::Create(
const VadModelConfig &config, float buffer_size_in_seconds) {
struct SherpaOnnxVadModelConfig c;
memset(&c, 0, sizeof(c));
c.silero_vad.model = config.silero_vad.model.c_str();
c.silero_vad.threshold = config.silero_vad.threshold;
c.silero_vad.min_silence_duration = config.silero_vad.min_silence_duration;
c.silero_vad.min_speech_duration = config.silero_vad.min_speech_duration;
c.silero_vad.window_size = config.silero_vad.window_size;
c.silero_vad.max_speech_duration = config.silero_vad.max_speech_duration;
c.sample_rate = config.sample_rate;
c.num_threads = config.num_threads;
c.provider = config.provider.c_str();
c.debug = config.debug;
auto p = SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
return VoiceActivityDetector(p);
}
VoiceActivityDetector::VoiceActivityDetector(
const SherpaOnnxVoiceActivityDetector *p)
: MoveOnly<VoiceActivityDetector, SherpaOnnxVoiceActivityDetector>(p) {}
void VoiceActivityDetector::Destroy(
const SherpaOnnxVoiceActivityDetector *p) const {
SherpaOnnxDestroyVoiceActivityDetector(p);
}
void VoiceActivityDetector::AcceptWaveform(const float *samples,
int32_t n) const {
SherpaOnnxVoiceActivityDetectorAcceptWaveform(p_, samples, n);
}
bool VoiceActivityDetector::IsEmpty() const {
return SherpaOnnxVoiceActivityDetectorEmpty(p_);
}
bool VoiceActivityDetector ::IsDetected() const {
return SherpaOnnxVoiceActivityDetectorDetected(p_);
}
void VoiceActivityDetector::Pop() const {
SherpaOnnxVoiceActivityDetectorPop(p_);
}
void VoiceActivityDetector::Clear() const {
SherpaOnnxVoiceActivityDetectorClear(p_);
}
SpeechSegment VoiceActivityDetector::Front() const {
auto f = SherpaOnnxVoiceActivityDetectorFront(p_);
SpeechSegment segment;
segment.start = f->start;
segment.samples = std::vector<float>{f->samples, f->samples + f->n};
SherpaOnnxDestroySpeechSegment(f);
return segment;
}
void VoiceActivityDetector::Reset() const {
SherpaOnnxVoiceActivityDetectorReset(p_);
}
void VoiceActivityDetector::Flush() const {
SherpaOnnxVoiceActivityDetectorFlush(p_);
}
} // namespace sherpa_onnx::cxx