Ebranchformer (#1951)

* adding ebranchformer encoder * extend surfaced FeatureExtractorConfig - so ebranchformer feature extraction can be configured from Python - the GlobCmvn is not needed, as it is a module in the OnnxEncoder * clean the code * Integrating remarks from Fangjun
2025-03-04 12:41:09 +01:00
parent 209eaaae1d
commit 7740dbfb96
8 changed files with 609 additions and 5 deletions
--- a/sherpa-onnx/python/csrc/features.cc
+++ b/sherpa-onnx/python/csrc/features.cc
@@ -11,15 +11,21 @@ namespace sherpa_onnx {
 static void PybindFeatureExtractorConfig(py::module *m) {
  using PyClass = FeatureExtractorConfig;
  py::class_<PyClass>(*m, "FeatureExtractorConfig")
-      .def(py::init<int32_t, int32_t, float, float, float>(),
-           py::arg("sampling_rate") = 16000, py::arg("feature_dim") = 80,
-           py::arg("low_freq") = 20.0f, py::arg("high_freq") = -400.0f,
-           py::arg("dither") = 0.0f)
+      .def(py::init<int32_t, int32_t, float, float, float, bool, bool>(),
+           py::arg("sampling_rate") = 16000,
+           py::arg("feature_dim") = 80,
+           py::arg("low_freq") = 20.0f,
+           py::arg("high_freq") = -400.0f,
+           py::arg("dither") = 0.0f,
+           py::arg("normalize_samples") = true,
+           py::arg("snip_edges") = false)
      .def_readwrite("sampling_rate", &PyClass::sampling_rate)
      .def_readwrite("feature_dim", &PyClass::feature_dim)
      .def_readwrite("low_freq", &PyClass::low_freq)
      .def_readwrite("high_freq", &PyClass::high_freq)
      .def_readwrite("dither", &PyClass::dither)
+      .def_readwrite("normalize_samples", &PyClass::normalize_samples)
+      .def_readwrite("snip_edges", &PyClass::snip_edges)
      .def("__str__", &PyClass::ToString);
 }

--- a/sherpa-onnx/python/csrc/online-stream.cc
+++ b/sherpa-onnx/python/csrc/online-stream.cc
@@ -22,6 +22,23 @@ Args:
    to the range [-1, 1].
 )";

+
+constexpr const char *kGetFramesUsage = R"(
+Get n frames starting from the given frame index.
+(hint: intended for debugging, for comparing FBANK features across pipelines)
+
+Args:
+  frame_index:
+    The starting frame index
+  n:
+    Number of frames to get.
+Return:
+  Return a 2-D tensor of shape (n, feature_dim).
+  which is flattened into a 1-D vector (flattened in row major).
+  Unflatten in python with:
+    `features = np.reshape(arr, (n, feature_dim))`
+)";
+
 void PybindOnlineStream(py::module *m) {
  using PyClass = OnlineStream;
  py::class_<PyClass>(*m, "OnlineStream")
@@ -34,6 +51,9 @@ void PybindOnlineStream(py::module *m) {
          py::arg("sample_rate"), py::arg("waveform"), kAcceptWaveformUsage,
          py::call_guard<py::gil_scoped_release>())
      .def("input_finished", &PyClass::InputFinished,
+           py::call_guard<py::gil_scoped_release>())
+      .def("get_frames", &PyClass::GetFrames,
+           py::arg("frame_index"), py::arg("n"), kGetFramesUsage,
           py::call_guard<py::gil_scoped_release>());
 }