Add Silero VAD (#313)
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
include_directories(${CMAKE_SOURCE_DIR})
|
||||
|
||||
pybind11_add_module(_sherpa_onnx
|
||||
circular-buffer.cc
|
||||
display.cc
|
||||
endpoint.cc
|
||||
features.cc
|
||||
@@ -20,6 +21,10 @@ pybind11_add_module(_sherpa_onnx
|
||||
online-stream.cc
|
||||
online-transducer-model-config.cc
|
||||
sherpa-onnx.cc
|
||||
silero-vad-model-config.cc
|
||||
vad-model-config.cc
|
||||
vad-model.cc
|
||||
voice-activity-detector.cc
|
||||
)
|
||||
|
||||
if(APPLE)
|
||||
|
||||
31
sherpa-onnx/python/csrc/circular-buffer.cc
Normal file
31
sherpa-onnx/python/csrc/circular-buffer.cc
Normal file
@@ -0,0 +1,31 @@
|
||||
// sherpa-onnx/python/csrc/circular-buffer.cc
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/python/csrc/circular-buffer.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "sherpa-onnx/csrc/circular-buffer.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindCircularBuffer(py::module *m) {
|
||||
using PyClass = CircularBuffer;
|
||||
py::class_<PyClass>(*m, "CircularBuffer")
|
||||
.def(py::init<int32_t>(), py::arg("capacity"))
|
||||
.def(
|
||||
"push",
|
||||
[](PyClass &self, const std::vector<float> &samples) {
|
||||
self.Push(samples.data(), samples.size());
|
||||
},
|
||||
py::arg("samples"))
|
||||
.def("get", &PyClass::Get, py::arg("start_index"), py::arg("n"))
|
||||
.def("pop", &PyClass::Pop, py::arg("n"))
|
||||
.def("reset", &PyClass::Reset)
|
||||
.def_property_readonly("size", &PyClass::Size)
|
||||
.def_property_readonly("head", &PyClass::Head)
|
||||
.def_property_readonly("tail", &PyClass::Tail);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
16
sherpa-onnx/python/csrc/circular-buffer.h
Normal file
16
sherpa-onnx/python/csrc/circular-buffer.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// sherpa-onnx/python/csrc/circular-buffer.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#ifndef SHERPA_ONNX_PYTHON_CSRC_CIRCULAR_BUFFER_H_
|
||||
#define SHERPA_ONNX_PYTHON_CSRC_CIRCULAR_BUFFER_H_
|
||||
|
||||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindCircularBuffer(py::module *m);
|
||||
|
||||
}
|
||||
|
||||
#endif // SHERPA_ONNX_PYTHON_CSRC_CIRCULAR_BUFFER_H_
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||
|
||||
#include "sherpa-onnx/python/csrc/circular-buffer.h"
|
||||
#include "sherpa-onnx/python/csrc/display.h"
|
||||
#include "sherpa-onnx/python/csrc/endpoint.h"
|
||||
#include "sherpa-onnx/python/csrc/features.h"
|
||||
@@ -15,6 +16,9 @@
|
||||
#include "sherpa-onnx/python/csrc/online-model-config.h"
|
||||
#include "sherpa-onnx/python/csrc/online-recognizer.h"
|
||||
#include "sherpa-onnx/python/csrc/online-stream.h"
|
||||
#include "sherpa-onnx/python/csrc/vad-model-config.h"
|
||||
#include "sherpa-onnx/python/csrc/vad-model.h"
|
||||
#include "sherpa-onnx/python/csrc/voice-activity-detector.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
@@ -34,6 +38,11 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
|
||||
PybindOfflineLMConfig(&m);
|
||||
PybindOfflineModelConfig(&m);
|
||||
PybindOfflineRecognizer(&m);
|
||||
|
||||
PybindVadModelConfig(&m);
|
||||
PybindVadModel(&m);
|
||||
PybindCircularBuffer(&m);
|
||||
PybindVoiceActivityDetector(&m);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
43
sherpa-onnx/python/csrc/silero-vad-model-config.cc
Normal file
43
sherpa-onnx/python/csrc/silero-vad-model-config.cc
Normal file
@@ -0,0 +1,43 @@
|
||||
// sherpa-onnx/python/csrc/silero-vad-model-config.cc
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/python/csrc/silero-vad-model-config.h"
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "sherpa-onnx/csrc/silero-vad-model-config.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindSileroVadModelConfig(py::module *m) {
|
||||
using PyClass = SileroVadModelConfig;
|
||||
py::class_<PyClass>(*m, "SileroVadModelConfig")
|
||||
.def(py::init<>())
|
||||
.def(py::init([](const std::string &model, float threshold,
|
||||
float min_silence_duration, float min_speech_duration,
|
||||
int32_t window_size) -> std::unique_ptr<PyClass> {
|
||||
auto ans = std::make_unique<PyClass>();
|
||||
|
||||
ans->model = model;
|
||||
ans->threshold = threshold;
|
||||
ans->min_silence_duration = min_silence_duration;
|
||||
ans->min_speech_duration = min_speech_duration;
|
||||
ans->window_size = window_size;
|
||||
|
||||
return ans;
|
||||
}),
|
||||
py::arg("model"), py::arg("threshold") = 0.5,
|
||||
py::arg("min_silence_duration") = 0.5,
|
||||
py::arg("min_speech_duration") = 0.25, py::arg("window_size") = 512)
|
||||
.def_readwrite("model", &PyClass::model)
|
||||
.def_readwrite("threshold", &PyClass::threshold)
|
||||
.def_readwrite("min_silence_duration", &PyClass::min_silence_duration)
|
||||
.def_readwrite("min_speech_duration", &PyClass::min_speech_duration)
|
||||
.def_readwrite("window_size", &PyClass::window_size)
|
||||
.def("__str__", &PyClass::ToString)
|
||||
.def("validate", &PyClass::Validate);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
16
sherpa-onnx/python/csrc/silero-vad-model-config.h
Normal file
16
sherpa-onnx/python/csrc/silero-vad-model-config.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// sherpa-onnx/python/csrc/silero-vad-model-config.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#ifndef SHERPA_ONNX_PYTHON_CSRC_SILERO_VAD_MODEL_CONFIG_H_
|
||||
#define SHERPA_ONNX_PYTHON_CSRC_SILERO_VAD_MODEL_CONFIG_H_
|
||||
|
||||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindSileroVadModelConfig(py::module *m);
|
||||
|
||||
}
|
||||
|
||||
#endif // SHERPA_ONNX_PYTHON_CSRC_SILERO_VAD_MODEL_CONFIG_H_
|
||||
34
sherpa-onnx/python/csrc/vad-model-config.cc
Normal file
34
sherpa-onnx/python/csrc/vad-model-config.cc
Normal file
@@ -0,0 +1,34 @@
|
||||
// sherpa-onnx/python/csrc/vad-model-config.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/python/csrc/vad-model-config.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "sherpa-onnx/csrc/vad-model-config.h"
|
||||
#include "sherpa-onnx/python/csrc/silero-vad-model-config.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindVadModelConfig(py::module *m) {
|
||||
PybindSileroVadModelConfig(m);
|
||||
|
||||
using PyClass = VadModelConfig;
|
||||
py::class_<PyClass>(*m, "VadModelConfig")
|
||||
.def(py::init<>())
|
||||
.def(py::init<const SileroVadModelConfig &, int32_t, int32_t,
|
||||
const std::string &, bool>(),
|
||||
py::arg("silero_vad"), py::arg("sample_rate") = 16000,
|
||||
py::arg("num_threads") = 1, py::arg("provider") = "cpu",
|
||||
py::arg("debug") = false)
|
||||
.def_readwrite("silero_vad", &PyClass::silero_vad)
|
||||
.def_readwrite("sample_rate", &PyClass::sample_rate)
|
||||
.def_readwrite("num_threads", &PyClass::num_threads)
|
||||
.def_readwrite("provider", &PyClass::provider)
|
||||
.def_readwrite("debug", &PyClass::debug)
|
||||
.def("__str__", &PyClass::ToString)
|
||||
.def("validate", &PyClass::Validate);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
16
sherpa-onnx/python/csrc/vad-model-config.h
Normal file
16
sherpa-onnx/python/csrc/vad-model-config.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// sherpa-onnx/python/csrc/vad-model-config.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#ifndef SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_CONFIG_H_
|
||||
#define SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_CONFIG_H_
|
||||
|
||||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindVadModelConfig(py::module *m);
|
||||
|
||||
}
|
||||
|
||||
#endif // SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_CONFIG_H_
|
||||
29
sherpa-onnx/python/csrc/vad-model.cc
Normal file
29
sherpa-onnx/python/csrc/vad-model.cc
Normal file
@@ -0,0 +1,29 @@
|
||||
// sherpa-onnx/python/csrc/vad-model.cc
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/python/csrc/vad-model.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "sherpa-onnx/csrc/vad-model.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindVadModel(py::module *m) {
|
||||
using PyClass = VadModel;
|
||||
py::class_<PyClass>(*m, "VadModel")
|
||||
.def_static("create", &PyClass::Create, py::arg("config"))
|
||||
.def("reset", &PyClass::Reset)
|
||||
.def(
|
||||
"is_speech",
|
||||
[](PyClass &self, const std::vector<float> &samples) -> bool {
|
||||
return self.IsSpeech(samples.data(), samples.size());
|
||||
},
|
||||
py::arg("samples"))
|
||||
.def("window_size", &PyClass::WindowSize)
|
||||
.def("min_silence_duration_samples", &PyClass::MinSilenceDurationSamples)
|
||||
.def("min_speech_duration_samples", &PyClass::MinSpeechDurationSamples);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
16
sherpa-onnx/python/csrc/vad-model.h
Normal file
16
sherpa-onnx/python/csrc/vad-model.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// sherpa-onnx/python/csrc/vad-model.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#ifndef SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_H_
|
||||
#define SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_H_
|
||||
|
||||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindVadModel(py::module *m);
|
||||
|
||||
}
|
||||
|
||||
#endif // SHERPA_ONNX_PYTHON_CSRC_VAD_MODEL_H_
|
||||
41
sherpa-onnx/python/csrc/voice-activity-detector.cc
Normal file
41
sherpa-onnx/python/csrc/voice-activity-detector.cc
Normal file
@@ -0,0 +1,41 @@
|
||||
// sherpa-onnx/python/csrc/voice-activity-detector.cc
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/python/csrc/voice-activity-detector.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "sherpa-onnx/csrc/voice-activity-detector.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindSpeechSegment(py::module *m) {
|
||||
using PyClass = SpeechSegment;
|
||||
py::class_<PyClass>(*m, "SpeechSegment")
|
||||
.def_property_readonly("start",
|
||||
[](const PyClass &self) { return self.start; })
|
||||
.def_property_readonly("samples",
|
||||
[](const PyClass &self) { return self.samples; });
|
||||
}
|
||||
|
||||
void PybindVoiceActivityDetector(py::module *m) {
|
||||
PybindSpeechSegment(m);
|
||||
using PyClass = VoiceActivityDetector;
|
||||
py::class_<PyClass>(*m, "VoiceActivityDetector")
|
||||
.def(py::init<const VadModelConfig &, float>(), py::arg("config"),
|
||||
py::arg("buffer_size_in_seconds") = 60)
|
||||
.def(
|
||||
"accept_waveform",
|
||||
[](PyClass &self, const std::vector<float> &samples) {
|
||||
self.AcceptWaveform(samples.data(), samples.size());
|
||||
},
|
||||
py::arg("samples"))
|
||||
.def("empty", &PyClass::Empty)
|
||||
.def("pop", &PyClass::Pop)
|
||||
.def("is_speech_detected", &PyClass::IsSpeechDetected)
|
||||
.def("reset", &PyClass::Reset)
|
||||
.def_property_readonly("front", &PyClass::Front);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
16
sherpa-onnx/python/csrc/voice-activity-detector.h
Normal file
16
sherpa-onnx/python/csrc/voice-activity-detector.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// sherpa-onnx/python/csrc/voice-activity-detector.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#ifndef SHERPA_ONNX_PYTHON_CSRC_VOICE_ACTIVITY_DETECTOR_H_
|
||||
#define SHERPA_ONNX_PYTHON_CSRC_VOICE_ACTIVITY_DETECTOR_H_
|
||||
|
||||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindVoiceActivityDetector(py::module *m);
|
||||
|
||||
}
|
||||
|
||||
#endif // SHERPA_ONNX_PYTHON_CSRC_VOICE_ACTIVITY_DETECTOR_H_
|
||||
Reference in New Issue
Block a user