Reduce model initialization time for online speech recognition (#215)

* Reduce model initialization time for online speech recognition * Fixed Styling --------- Co-authored-by: w11wo <wilsowong961@gmail.com>
2023-07-14 20:20:10 +07:00
parent fe0630fe1f
commit 5a6b55c5a7
7 changed files with 69 additions and 8 deletions
--- a/sherpa-onnx/python/csrc/online-transducer-model-config.cc
+++ b/sherpa-onnx/python/csrc/online-transducer-model-config.cc
@@ -15,11 +15,11 @@ void PybindOnlineTransducerModelConfig(py::module *m) {
  py::class_<PyClass>(*m, "OnlineTransducerModelConfig")
      .def(py::init<const std::string &, const std::string &,
                    const std::string &, const std::string &, int32_t, bool,
-                    const std::string &>(),
+                    const std::string &, const std::string &>(),
           py::arg("encoder_filename"), py::arg("decoder_filename"),
           py::arg("joiner_filename"), py::arg("tokens"),
           py::arg("num_threads"), py::arg("debug") = false,
-           py::arg("provider") = "cpu")
+           py::arg("provider") = "cpu", py::arg("model_type") = "")
      .def_readwrite("encoder_filename", &PyClass::encoder_filename)
      .def_readwrite("decoder_filename", &PyClass::decoder_filename)
      .def_readwrite("joiner_filename", &PyClass::joiner_filename)
@@ -27,6 +27,7 @@ void PybindOnlineTransducerModelConfig(py::module *m) {
      .def_readwrite("num_threads", &PyClass::num_threads)
      .def_readwrite("debug", &PyClass::debug)
      .def_readwrite("provider", &PyClass::provider)
+      .def_readwrite("model_type", &PyClass::model_type)
      .def("__str__", &PyClass::ToString);
 }

--- a/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
+++ b/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
@@ -41,6 +41,7 @@ class OnlineRecognizer(object):
        max_active_paths: int = 4,
        context_score: float = 1.5,
        provider: str = "cpu",
+        model_type: str = "",
    ):
        """
        Please refer to
@@ -90,6 +91,9 @@ class OnlineRecognizer(object):
            the maximum number of active paths during beam search.
          provider:
            onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
+          model_type:
+            Online transducer model type. Valid values are: conformer, lstm,
+            zipformer, zipformer2. All other values lead to loading the model twice.
        """
        _assert_file_exists(tokens)
        _assert_file_exists(encoder)
@@ -105,6 +109,7 @@ class OnlineRecognizer(object):
            tokens=tokens,
            num_threads=num_threads,
            provider=provider,
+            model_type=model_type,
        )

        feat_config = FeatureExtractorConfig(