// sherpa-onnx/csrc/voice-activity-detector.h // // Copyright (c) 2023 Xiaomi Corporation #ifndef SHERPA_ONNX_CSRC_VOICE_ACTIVITY_DETECTOR_H_ #define SHERPA_ONNX_CSRC_VOICE_ACTIVITY_DETECTOR_H_ #include #include #if __ANDROID_API__ >= 9 #include "android/asset_manager.h" #include "android/asset_manager_jni.h" #endif #if __OHOS__ #include "rawfile/raw_file_manager.h" #endif #include "sherpa-onnx/csrc/vad-model-config.h" namespace sherpa_onnx { struct SpeechSegment { int32_t start; // in samples std::vector samples; }; class VoiceActivityDetector { public: explicit VoiceActivityDetector(const VadModelConfig &config, float buffer_size_in_seconds = 60); #if __ANDROID_API__ >= 9 VoiceActivityDetector(AAssetManager *mgr, const VadModelConfig &config, float buffer_size_in_seconds = 60); #endif #if __OHOS__ VoiceActivityDetector(NativeResourceManager *mgr, const VadModelConfig &config, float buffer_size_in_seconds = 60); #endif ~VoiceActivityDetector(); void AcceptWaveform(const float *samples, int32_t n); bool Empty() const; void Pop(); void Clear(); const SpeechSegment &Front() const; bool IsSpeechDetected() const; void Reset() const; // At the end of the utterance, you can invoke this method so that // the last speech segment can be detected. void Flush() const; const VadModelConfig &GetConfig() const; private: class Impl; std::unique_ptr impl_; }; } // namespace sherpa_onnx #endif // SHERPA_ONNX_CSRC_VOICE_ACTIVITY_DETECTOR_H_