Configurable low_freq high_freq, dithering (#664)

2024-03-22 14:41:44 +01:00
parent 2fc1201924
commit eaec4c83c2
10 changed files with 96 additions and 15 deletions
--- a/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
+++ b/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
@@ -41,6 +41,9 @@ class OnlineRecognizer(object):
        num_threads: int = 2,
        sample_rate: float = 16000,
        feature_dim: int = 80,
+        low_freq: float = 20.0,
+        high_freq: float = -400.0,
+        dither: float = 0.0,
        enable_endpoint_detection: bool = False,
        rule1_min_trailing_silence: float = 2.4,
        rule2_min_trailing_silence: float = 1.2,
@@ -80,6 +83,16 @@ class OnlineRecognizer(object):
            Sample rate of the training data used to train the model.
          feature_dim:
            Dimension of the feature used to train the model.
+          low_freq:
+            Low cutoff frequency for mel bins in feature extraction.
+          high_freq:
+            High cutoff frequency for mel bins in feature extraction
+            (if <= 0, offset from Nyquist)
+          dither:
+            Dithering constant (0.0 means no dither).
+            By default the audio samples are in range [-1,+1],
+            so dithering constant 0.00003 is a good value,
+            equivalent to the default 1.0 from kaldi
          enable_endpoint_detection:
            True to enable endpoint detection. False to disable endpoint
            detection.
@@ -140,6 +153,9 @@ class OnlineRecognizer(object):
        feat_config = FeatureExtractorConfig(
            sampling_rate=sample_rate,
            feature_dim=feature_dim,
+            low_freq=low_freq,
+            high_freq=high_freq,
+            dither=dither,
        )

        endpoint_config = EndpointConfig(