Reduce model initialization time for online speech recognition (#215)

* Reduce model initialization time for online speech recognition

* Fixed Styling

---------

Co-authored-by: w11wo <wilsowong961@gmail.com>
This commit is contained in:
Wilson Wongso
2023-07-14 20:20:10 +07:00
committed by GitHub
parent fe0630fe1f
commit 5a6b55c5a7
7 changed files with 69 additions and 8 deletions

View File

@@ -15,11 +15,11 @@ void PybindOnlineTransducerModelConfig(py::module *m) {
py::class_<PyClass>(*m, "OnlineTransducerModelConfig")
.def(py::init<const std::string &, const std::string &,
const std::string &, const std::string &, int32_t, bool,
const std::string &>(),
const std::string &, const std::string &>(),
py::arg("encoder_filename"), py::arg("decoder_filename"),
py::arg("joiner_filename"), py::arg("tokens"),
py::arg("num_threads"), py::arg("debug") = false,
py::arg("provider") = "cpu")
py::arg("provider") = "cpu", py::arg("model_type") = "")
.def_readwrite("encoder_filename", &PyClass::encoder_filename)
.def_readwrite("decoder_filename", &PyClass::decoder_filename)
.def_readwrite("joiner_filename", &PyClass::joiner_filename)
@@ -27,6 +27,7 @@ void PybindOnlineTransducerModelConfig(py::module *m) {
.def_readwrite("num_threads", &PyClass::num_threads)
.def_readwrite("debug", &PyClass::debug)
.def_readwrite("provider", &PyClass::provider)
.def_readwrite("model_type", &PyClass::model_type)
.def("__str__", &PyClass::ToString);
}

View File

@@ -41,6 +41,7 @@ class OnlineRecognizer(object):
max_active_paths: int = 4,
context_score: float = 1.5,
provider: str = "cpu",
model_type: str = "",
):
"""
Please refer to
@@ -90,6 +91,9 @@ class OnlineRecognizer(object):
the maximum number of active paths during beam search.
provider:
onnxruntime execution providers. Valid values are: cpu, cuda, coreml.
model_type:
Online transducer model type. Valid values are: conformer, lstm,
zipformer, zipformer2. All other values lead to loading the model twice.
"""
_assert_file_exists(tokens)
_assert_file_exists(encoder)
@@ -105,6 +109,7 @@ class OnlineRecognizer(object):
tokens=tokens,
num_threads=num_threads,
provider=provider,
model_type=model_type,
)
feat_config = FeatureExtractorConfig(