Add runtime support for wespeaker models (#516)

This commit is contained in:
Fangjun Kuang
2024-01-09 22:06:08 +08:00
committed by GitHub
parent 902b21894b
commit 55266918c8
27 changed files with 1291 additions and 4 deletions

View File

@@ -30,6 +30,8 @@ pybind11_add_module(_sherpa_onnx
online-zipformer2-ctc-model-config.cc
sherpa-onnx.cc
silero-vad-model-config.cc
speaker-embedding-extractor.cc
speaker-embedding-manager.cc
vad-model-config.cc
vad-model.cc
voice-activity-detector.cc

View File

@@ -1,4 +1,4 @@
// sherpa-onnx/python/csrc/online-recongizer.h
// sherpa-onnx/python/csrc/online-recognizer.h
//
// Copyright (c) 2023 Xiaomi Corporation

View File

@@ -18,6 +18,8 @@
#include "sherpa-onnx/python/csrc/online-model-config.h"
#include "sherpa-onnx/python/csrc/online-recognizer.h"
#include "sherpa-onnx/python/csrc/online-stream.h"
#include "sherpa-onnx/python/csrc/speaker-embedding-extractor.h"
#include "sherpa-onnx/python/csrc/speaker-embedding-manager.h"
#include "sherpa-onnx/python/csrc/vad-model-config.h"
#include "sherpa-onnx/python/csrc/vad-model.h"
#include "sherpa-onnx/python/csrc/voice-activity-detector.h"
@@ -48,6 +50,8 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
PybindVoiceActivityDetector(&m);
PybindOfflineTts(&m);
PybindSpeakerEmbeddingExtractor(&m);
PybindSpeakerEmbeddingManager(&m);
}
} // namespace sherpa_onnx

View File

@@ -0,0 +1,44 @@
// sherpa-onnx/python/csrc/speaker-embedding-extractor.cc
//
// Copyright (c) 2023 Xiaomi Corporation
#include "sherpa-onnx/python/csrc/speaker-embedding-extractor.h"
#include <string>
#include "sherpa-onnx/csrc/speaker-embedding-extractor.h"
namespace sherpa_onnx {
static void PybindSpeakerEmbeddingExtractorConfig(py::module *m) {
using PyClass = SpeakerEmbeddingExtractorConfig;
py::class_<PyClass>(*m, "SpeakerEmbeddingExtractorConfig")
.def(py::init<>())
.def(py::init<const std::string &, int32_t, bool, const std::string>(),
py::arg("model"), py::arg("num_threads") = 1,
py::arg("debug") = false, py::arg("provider") = "cpu")
.def_readwrite("model", &PyClass::model)
.def_readwrite("num_threads", &PyClass::num_threads)
.def_readwrite("debug", &PyClass::debug)
.def_readwrite("provider", &PyClass::provider)
.def("validate", &PyClass::Validate)
.def("__str__", &PyClass::ToString);
}
void PybindSpeakerEmbeddingExtractor(py::module *m) {
PybindSpeakerEmbeddingExtractorConfig(m);
using PyClass = SpeakerEmbeddingExtractor;
py::class_<PyClass>(*m, "SpeakerEmbeddingExtractor")
.def(py::init<const SpeakerEmbeddingExtractorConfig &>(),
py::arg("config"), py::call_guard<py::gil_scoped_release>())
.def_property_readonly("dim", &PyClass::Dim)
.def("create_stream", &PyClass::CreateStream,
py::call_guard<py::gil_scoped_release>())
.def("compute", &PyClass::Compute,
py::call_guard<py::gil_scoped_release>())
.def("is_ready", &PyClass::IsReady,
py::call_guard<py::gil_scoped_release>());
}
} // namespace sherpa_onnx

View File

@@ -0,0 +1,16 @@
// sherpa-onnx/python/csrc/speaker-embedding-extractor.h
//
// Copyright (c) 2023 Xiaomi Corporation
#ifndef SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_
#define SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
namespace sherpa_onnx {
void PybindSpeakerEmbeddingExtractor(py::module *m);
}
#endif // SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_H_

View File

@@ -0,0 +1,50 @@
// sherpa-onnx/python/csrc/speaker-embedding-manager.cc
//
// Copyright (c) 2023 Xiaomi Corporation
#include "sherpa-onnx/python/csrc/speaker-embedding-manager.h"
#include <string>
#include <vector>
#include "sherpa-onnx/csrc/speaker-embedding-manager.h"
namespace sherpa_onnx {
void PybindSpeakerEmbeddingManager(py::module *m) {
using PyClass = SpeakerEmbeddingManager;
py::class_<PyClass>(*m, "SpeakerEmbeddingManager")
.def(py::init<int32_t>(), py::arg("dim"),
py::call_guard<py::gil_scoped_release>())
.def_property_readonly("num_speakers", &PyClass::NumSpeakers)
.def(
"add",
[](const PyClass &self, const std::string &name,
const std::vector<float> &v) -> bool {
return self.Add(name, v.data());
},
py::arg("name"), py::arg("v"),
py::call_guard<py::gil_scoped_release>())
.def(
"remove",
[](const PyClass &self, const std::string &name) -> bool {
return self.Remove(name);
},
py::arg("name"), py::call_guard<py::gil_scoped_release>())
.def(
"search",
[](const PyClass &self, const std::vector<float> &v, float threshold)
-> std::string { return self.Search(v.data(), threshold); },
py::arg("v"), py::arg("threshold"),
py::call_guard<py::gil_scoped_release>())
.def(
"verify",
[](const PyClass &self, const std::string &name,
const std::vector<float> &v, float threshold) -> bool {
return self.Verify(name, v.data(), threshold);
},
py::arg("name"), py::arg("v"), py::arg("threshold"),
py::call_guard<py::gil_scoped_release>());
}
} // namespace sherpa_onnx

View File

@@ -0,0 +1,16 @@
// sherpa-onnx/python/csrc/speaker-embedding-manager.h
//
// Copyright (c) 2023 Xiaomi Corporation
#ifndef SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_MANAGER_H_
#define SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_MANAGER_H_
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
namespace sherpa_onnx {
void PybindSpeakerEmbeddingManager(py::module *m);
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_PYTHON_CSRC_SPEAKER_EMBEDDING_MANAGER_H_

View File

@@ -32,6 +32,7 @@ void PybindVoiceActivityDetector(py::module *m) {
self.AcceptWaveform(samples.data(), samples.size());
},
py::arg("samples"), py::call_guard<py::gil_scoped_release>())
.def_property_readonly("config", &PyClass::GetConfig)
.def("empty", &PyClass::Empty, py::call_guard<py::gil_scoped_release>())
.def("pop", &PyClass::Pop, py::call_guard<py::gil_scoped_release>())
.def("is_speech_detected", &PyClass::IsSpeechDetected,

View File

@@ -8,6 +8,9 @@ from _sherpa_onnx import (
OfflineTtsVitsModelConfig,
OnlineStream,
SileroVadModelConfig,
SpeakerEmbeddingExtractor,
SpeakerEmbeddingExtractorConfig,
SpeakerEmbeddingManager,
SpeechSegment,
VadModel,
VadModelConfig,