Add Python API for speech enhancement GTCRN models (#1978)

This commit is contained in:
Fangjun Kuang
2025-03-10 19:02:17 +08:00
committed by GitHub
parent 488a6e687c
commit 5d2d792b1d
12 changed files with 268 additions and 0 deletions

View File

@@ -18,6 +18,9 @@ set(srcs
offline-punctuation.cc
offline-recognizer.cc
offline-sense-voice-model-config.cc
offline-speech-denoiser-gtcrn-model-config.cc
offline-speech-denoiser-model-config.cc
offline-speech-denoiser.cc
offline-stream.cc
offline-tdnn-model-config.cc
offline-transducer-model-config.cc

View File

@@ -0,0 +1,22 @@
// sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.cc
//
// Copyright (c) 2025 Xiaomi Corporation
#include "sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.h"
#include <string>
#include "sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-model-config.h"
namespace sherpa_onnx {
void PybindOfflineSpeechDenoiserGtcrnModelConfig(py::module *m) {
using PyClass = OfflineSpeechDenoiserGtcrnModelConfig;
py::class_<PyClass>(*m, "OfflineSpeechDenoiserGtcrnModelConfig")
.def(py::init<const std::string &>(), py::arg("model") = "")
.def_readwrite("model", &PyClass::model)
.def("validate", &PyClass::Validate)
.def("__str__", &PyClass::ToString);
}
} // namespace sherpa_onnx

View File

@@ -0,0 +1,16 @@
// sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.h
//
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_CONFIG_H_
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
namespace sherpa_onnx {
void PybindOfflineSpeechDenoiserGtcrnModelConfig(py::module *m);
}
#endif // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_GTCRN_MODEL_CONFIG_H_

View File

@@ -0,0 +1,33 @@
// sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.cc
//
// Copyright (c) 2025 Xiaomi Corporation
#include "sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.h"
#include <string>
#include "sherpa-onnx/csrc/offline-speech-denoiser-model-config.h"
#include "sherpa-onnx/python/csrc/offline-speech-denoiser-gtcrn-model-config.h"
namespace sherpa_onnx {
void PybindOfflineSpeechDenoiserModelConfig(py::module *m) {
PybindOfflineSpeechDenoiserGtcrnModelConfig(m);
using PyClass = OfflineSpeechDenoiserModelConfig;
py::class_<PyClass>(*m, "OfflineSpeechDenoiserModelConfig")
.def(py::init<>())
.def(py::init<const OfflineSpeechDenoiserGtcrnModelConfig &, int32_t,
bool, const std::string &>(),
py::arg("gtcrn") = OfflineSpeechDenoiserGtcrnModelConfig{},
py::arg("num_threads") = 1, py::arg("debug") = false,
py::arg("provider") = "cpu")
.def_readwrite("gtcrn", &PyClass::gtcrn)
.def_readwrite("num_threads", &PyClass::num_threads)
.def_readwrite("debug", &PyClass::debug)
.def_readwrite("provider", &PyClass::provider)
.def("validate", &PyClass::Validate)
.def("__str__", &PyClass::ToString);
}
} // namespace sherpa_onnx

View File

@@ -0,0 +1,16 @@
// sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.h
//
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_MODEL_CONFIG_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_MODEL_CONFIG_H_
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
namespace sherpa_onnx {
void PybindOfflineSpeechDenoiserModelConfig(py::module *m);
}
#endif // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_MODEL_CONFIG_H_

View File

@@ -0,0 +1,61 @@
// sherpa-onnx/python/csrc/offline-speech-denoiser.cc
//
// Copyright (c) 2025 Xiaomi Corporation
#include "sherpa-onnx/python/csrc/offline-speech-denoiser.h"
#include <vector>
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/python/csrc/offline-speech-denoiser-model-config.h"
namespace sherpa_onnx {
void PybindOfflineSpeechDenoiserConfig(py::module *m) {
PybindOfflineSpeechDenoiserModelConfig(m);
using PyClass = OfflineSpeechDenoiserConfig;
py::class_<PyClass>(*m, "OfflineSpeechDenoiserConfig")
.def(py::init<>())
.def(py::init<const OfflineSpeechDenoiserModelConfig &>(),
py::arg("model") = OfflineSpeechDenoiserModelConfig{})
.def_readwrite("model", &PyClass::model)
.def("validate", &PyClass::Validate)
.def("__str__", &PyClass::ToString);
}
void PybindDenoisedAudio(py::module *m) {
using PyClass = DenoisedAudio;
py::class_<PyClass>(*m, "DenoisedAudio")
.def_property_readonly(
"sample_rate", [](const PyClass &self) { return self.sample_rate; })
.def_property_readonly("samples",
[](const PyClass &self) { return self.samples; });
}
void PybindOfflineSpeechDenoiser(py::module *m) {
PybindOfflineSpeechDenoiserConfig(m);
PybindDenoisedAudio(m);
using PyClass = OfflineSpeechDenoiser;
py::class_<PyClass>(*m, "OfflineSpeechDenoiser")
.def(py::init<const OfflineSpeechDenoiserConfig &>(), py::arg("config"),
py::call_guard<py::gil_scoped_release>())
.def(
"__call__",
[](const PyClass &self, const std::vector<float> &samples,
int32_t sample_rate) {
return self.Run(samples.data(), samples.size(), sample_rate);
},
py::call_guard<py::gil_scoped_release>())
.def(
"run",
[](const PyClass &self, const std::vector<float> &samples,
int32_t sample_rate) {
return self.Run(samples.data(), samples.size(), sample_rate);
},
py::call_guard<py::gil_scoped_release>())
.def_property_readonly("sample_rate", &PyClass::GetSampleRate);
}
} // namespace sherpa_onnx

View File

@@ -0,0 +1,16 @@
// sherpa-onnx/python/csrc/offline-speech-denoiser.h
//
// Copyright (c) 2025 Xiaomi Corporation
#ifndef SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_H_
#define SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_H_
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
namespace sherpa_onnx {
void PybindOfflineSpeechDenoiser(py::module *m);
}
#endif // SHERPA_ONNX_PYTHON_CSRC_OFFLINE_SPEECH_DENOISER_H_

View File

@@ -16,6 +16,7 @@
#include "sherpa-onnx/python/csrc/offline-model-config.h"
#include "sherpa-onnx/python/csrc/offline-punctuation.h"
#include "sherpa-onnx/python/csrc/offline-recognizer.h"
#include "sherpa-onnx/python/csrc/offline-speech-denoiser.h"
#include "sherpa-onnx/python/csrc/offline-stream.h"
#include "sherpa-onnx/python/csrc/online-ctc-fst-decoder-config.h"
#include "sherpa-onnx/python/csrc/online-lm-config.h"
@@ -87,6 +88,7 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
#endif
PybindAlsa(&m);
PybindOfflineSpeechDenoiser(&m);
}
} // namespace sherpa_onnx