Add runtime support for wespeaker models (#516)
This commit is contained in:
@@ -30,6 +30,8 @@ pybind11_add_module(_sherpa_onnx
|
||||
online-zipformer2-ctc-model-config.cc
|
||||
sherpa-onnx.cc
|
||||
silero-vad-model-config.cc
|
||||
speaker-embedding-extractor.cc
|
||||
speaker-embedding-manager.cc
|
||||
vad-model-config.cc
|
||||
vad-model.cc
|
||||
voice-activity-detector.cc
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
// sherpa-onnx/python/csrc/online-recongizer.h
|
||||
// sherpa-onnx/python/csrc/online-recognizer.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
#include "sherpa-onnx/python/csrc/online-model-config.h"
|
||||
#include "sherpa-onnx/python/csrc/online-recognizer.h"
|
||||
#include "sherpa-onnx/python/csrc/online-stream.h"
|
||||
#include "sherpa-onnx/python/csrc/speaker-embedding-extractor.h"
|
||||
#include "sherpa-onnx/python/csrc/speaker-embedding-manager.h"
|
||||
#include "sherpa-onnx/python/csrc/vad-model-config.h"
|
||||
#include "sherpa-onnx/python/csrc/vad-model.h"
|
||||
#include "sherpa-onnx/python/csrc/voice-activity-detector.h"
|
||||
@@ -48,6 +50,8 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
|
||||
PybindVoiceActivityDetector(&m);
|
||||
|
||||
PybindOfflineTts(&m);
|
||||
PybindSpeakerEmbeddingExtractor(&m);
|
||||
PybindSpeakerEmbeddingManager(&m);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
44
sherpa-onnx/python/csrc/speaker-embedding-extractor.cc
Normal file
44
sherpa-onnx/python/csrc/speaker-embedding-extractor.cc
Normal file
@@ -0,0 +1,44 @@
|
||||
// sherpa-onnx/python/csrc/speaker-embedding-extractor.cc
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/python/csrc/speaker-embedding-extractor.h"
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
static void PybindSpeakerEmbeddingExtractorConfig(py::module *m) {
|
||||
using PyClass = SpeakerEmbeddingExtractorConfig;
|
||||
py::class_<PyClass>(*m, "SpeakerEmbeddingExtractorConfig")
|
||||
.def(py::init<>())
|
||||
.def(py::init<const std::string &, int32_t, bool, const std::string>(),
|
||||
py::arg("model"), py::arg("num_threads") = 1,
|
||||
py::arg("debug") = false, py::arg("provider") = "cpu")
|
||||
.def_readwrite("model", &PyClass::model)
|
||||
.def_readwrite("num_threads", &PyClass::num_threads)
|
||||
.def_readwrite("debug", &PyClass::debug)
|
||||
.def_readwrite("provider", &PyClass::provider)
|
||||
.def("validate", &PyClass::Validate)
|
||||
.def("__str__", &PyClass::ToString);
|
||||
}
|
||||
|
||||
void PybindSpeakerEmbeddingExtractor(py::module *m) {
|
||||
PybindSpeakerEmbeddingExtractorConfig(m);
|
||||
|
||||
using PyClass = SpeakerEmbeddingExtractor;
|
||||
py::class_<PyClass>(*m, "SpeakerEmbeddingExtractor")
|
||||
.def(py::init<const SpeakerEmbeddingExtractorConfig &>(),
|
||||
py::arg("config"), py::call_guard<py::gil_scoped_release>())
|
||||
.def_property_readonly("dim", &PyClass::Dim)
|
||||
.def("create_stream", &PyClass::CreateStream,
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def("compute", &PyClass::Compute,
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def("is_ready", &PyClass::IsReady,
|
||||
py::call_guard<py::gil_scoped_release>());
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
16
sherpa-onnx/python/csrc/speaker-embedding-extractor.h
Normal file
16
sherpa-onnx/python/csrc/speaker-embedding-extractor.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// sherpa-onnx/python/csrc/speaker-embedding-extractor.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#ifndef SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_
|
||||
#define SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_
|
||||
|
||||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindSpeakerEmbeddingExtractor(py::module *m);
|
||||
|
||||
}
|
||||
|
||||
#endif // SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_
|
||||
50
sherpa-onnx/python/csrc/speaker-embedding-manager.cc
Normal file
50
sherpa-onnx/python/csrc/speaker-embedding-manager.cc
Normal file
@@ -0,0 +1,50 @@
|
||||
// sherpa-onnx/python/csrc/speaker-embedding-manager.cc
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/python/csrc/speaker-embedding-manager.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "sherpa-onnx/csrc/speaker-embedding-manager.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindSpeakerEmbeddingManager(py::module *m) {
|
||||
using PyClass = SpeakerEmbeddingManager;
|
||||
py::class_<PyClass>(*m, "SpeakerEmbeddingManager")
|
||||
.def(py::init<int32_t>(), py::arg("dim"),
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def_property_readonly("num_speakers", &PyClass::NumSpeakers)
|
||||
.def(
|
||||
"add",
|
||||
[](const PyClass &self, const std::string &name,
|
||||
const std::vector<float> &v) -> bool {
|
||||
return self.Add(name, v.data());
|
||||
},
|
||||
py::arg("name"), py::arg("v"),
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def(
|
||||
"remove",
|
||||
[](const PyClass &self, const std::string &name) -> bool {
|
||||
return self.Remove(name);
|
||||
},
|
||||
py::arg("name"), py::call_guard<py::gil_scoped_release>())
|
||||
.def(
|
||||
"search",
|
||||
[](const PyClass &self, const std::vector<float> &v, float threshold)
|
||||
-> std::string { return self.Search(v.data(), threshold); },
|
||||
py::arg("v"), py::arg("threshold"),
|
||||
py::call_guard<py::gil_scoped_release>())
|
||||
.def(
|
||||
"verify",
|
||||
[](const PyClass &self, const std::string &name,
|
||||
const std::vector<float> &v, float threshold) -> bool {
|
||||
return self.Verify(name, v.data(), threshold);
|
||||
},
|
||||
py::arg("name"), py::arg("v"), py::arg("threshold"),
|
||||
py::call_guard<py::gil_scoped_release>());
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
16
sherpa-onnx/python/csrc/speaker-embedding-manager.h
Normal file
16
sherpa-onnx/python/csrc/speaker-embedding-manager.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// sherpa-onnx/python/csrc/speaker-embedding-manager.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#ifndef SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_MANAGER_H_
|
||||
#define SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_MANAGER_H_
|
||||
|
||||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindSpeakerEmbeddingManager(py::module *m);
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
#endif // SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_MANAGER_H_
|
||||
@@ -32,6 +32,7 @@ void PybindVoiceActivityDetector(py::module *m) {
|
||||
self.AcceptWaveform(samples.data(), samples.size());
|
||||
},
|
||||
py::arg("samples"), py::call_guard<py::gil_scoped_release>())
|
||||
.def_property_readonly("config", &PyClass::GetConfig)
|
||||
.def("empty", &PyClass::Empty, py::call_guard<py::gil_scoped_release>())
|
||||
.def("pop", &PyClass::Pop, py::call_guard<py::gil_scoped_release>())
|
||||
.def("is_speech_detected", &PyClass::IsSpeechDetected,
|
||||
|
||||
@@ -8,6 +8,9 @@ from _sherpa_onnx import (
|
||||
OfflineTtsVitsModelConfig,
|
||||
OnlineStream,
|
||||
SileroVadModelConfig,
|
||||
SpeakerEmbeddingExtractor,
|
||||
SpeakerEmbeddingExtractorConfig,
|
||||
SpeakerEmbeddingManager,
|
||||
SpeechSegment,
|
||||
VadModel,
|
||||
VadModelConfig,
|
||||
|
||||
Reference in New Issue
Block a user