Configurable low_freq high_freq, dithering (#664)

This commit is contained in:
Karel Vesely
2024-03-22 14:41:44 +01:00
committed by GitHub
parent 2fc1201924
commit eaec4c83c2
10 changed files with 96 additions and 15 deletions

View File

@@ -11,10 +11,17 @@ namespace sherpa_onnx {
static void PybindFeatureExtractorConfig(py::module *m) {
using PyClass = FeatureExtractorConfig;
py::class_<PyClass>(*m, "FeatureExtractorConfig")
.def(py::init<int32_t, int32_t>(), py::arg("sampling_rate") = 16000,
py::arg("feature_dim") = 80)
.def(py::init<int32_t, int32_t, float, float, float>(),
py::arg("sampling_rate") = 16000,
py::arg("feature_dim") = 80,
py::arg("low_freq") = 20.0f,
py::arg("high_freq") = -400.0f,
py::arg("dither") = 0.0f)
.def_readwrite("sampling_rate", &PyClass::sampling_rate)
.def_readwrite("feature_dim", &PyClass::feature_dim)
.def_readwrite("low_freq", &PyClass::low_freq)
.def_readwrite("high_freq", &PyClass::high_freq)
.def_readwrite("dither", &PyClass::high_freq)
.def("__str__", &PyClass::ToString);
}

View File

@@ -41,6 +41,9 @@ class OnlineRecognizer(object):
num_threads: int = 2,
sample_rate: float = 16000,
feature_dim: int = 80,
low_freq: float = 20.0,
high_freq: float = -400.0,
dither: float = 0.0,
enable_endpoint_detection: bool = False,
rule1_min_trailing_silence: float = 2.4,
rule2_min_trailing_silence: float = 1.2,
@@ -80,6 +83,16 @@ class OnlineRecognizer(object):
Sample rate of the training data used to train the model.
feature_dim:
Dimension of the feature used to train the model.
low_freq:
Low cutoff frequency for mel bins in feature extraction.
high_freq:
High cutoff frequency for mel bins in feature extraction
(if <= 0, offset from Nyquist)
dither:
Dithering constant (0.0 means no dither).
By default the audio samples are in range [-1,+1],
so dithering constant 0.00003 is a good value,
equivalent to the default 1.0 from kaldi
enable_endpoint_detection:
True to enable endpoint detection. False to disable endpoint
detection.
@@ -140,6 +153,9 @@ class OnlineRecognizer(object):
feat_config = FeatureExtractorConfig(
sampling_rate=sample_rate,
feature_dim=feature_dim,
low_freq=low_freq,
high_freq=high_freq,
dither=dither,
)
endpoint_config = EndpointConfig(