Configurable low_freq high_freq, dithering (#664)

2024-03-22 14:41:44 +01:00
parent 2fc1201924
commit eaec4c83c2
10 changed files with 96 additions and 15 deletions
--- a/sherpa-onnx/python/csrc/features.cc
+++ b/sherpa-onnx/python/csrc/features.cc
@@ -11,10 +11,17 @@ namespace sherpa_onnx {
 static void PybindFeatureExtractorConfig(py::module *m) {
  using PyClass = FeatureExtractorConfig;
  py::class_<PyClass>(*m, "FeatureExtractorConfig")
-      .def(py::init<int32_t, int32_t>(), py::arg("sampling_rate") = 16000,
-           py::arg("feature_dim") = 80)
+      .def(py::init<int32_t, int32_t, float, float, float>(),
+           py::arg("sampling_rate") = 16000,
+           py::arg("feature_dim") = 80,
+           py::arg("low_freq") = 20.0f,
+           py::arg("high_freq") = -400.0f,
+           py::arg("dither") = 0.0f)
      .def_readwrite("sampling_rate", &PyClass::sampling_rate)
      .def_readwrite("feature_dim", &PyClass::feature_dim)
+      .def_readwrite("low_freq", &PyClass::low_freq)
+      .def_readwrite("high_freq", &PyClass::high_freq)
+      .def_readwrite("dither", &PyClass::high_freq)
      .def("__str__", &PyClass::ToString);
 }

--- a/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
+++ b/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
@@ -41,6 +41,9 @@ class OnlineRecognizer(object):
        num_threads: int = 2,
        sample_rate: float = 16000,
        feature_dim: int = 80,
+        low_freq: float = 20.0,
+        high_freq: float = -400.0,
+        dither: float = 0.0,
        enable_endpoint_detection: bool = False,
        rule1_min_trailing_silence: float = 2.4,
        rule2_min_trailing_silence: float = 1.2,
@@ -80,6 +83,16 @@ class OnlineRecognizer(object):
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
+          low_freq:
+            Low cutoff frequency for mel bins in feature extraction.
+          high_freq:
+            High cutoff frequency for mel bins in feature extraction
+            (if <= 0, offset from Nyquist)
+          dither:
+            Dithering constant (0.0 means no dither).
+            By default the audio samples are in range [-1,+1],
+            so dithering constant 0.00003 is a good value,
+            equivalent to the default 1.0 from kaldi
          enable_endpoint_detection:
            True to enable endpoint detection. False to disable endpoint
            detection.
@@ -140,6 +153,9 @@ class OnlineRecognizer(object):
        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
+            low_freq=low_freq,
+            high_freq=high_freq,
+            dither=dither,
        )

        endpoint_config = EndpointConfig(