Refactor online recognizer (#250)

* Refactor online recognizer. Make it easier to support other streaming models. Note that it is a breaking change for the Python API. `sherpa_onnx.OnlineRecognizer()` used before should be replaced by `sherpa_onnx.OnlineRecognizer.from_transducer()`.
2023-08-09 20:27:31 +08:00
parent 6061318e3f
commit 79c2ce5dd4
40 changed files with 670 additions and 480 deletions
--- a/sherpa-onnx/python/csrc/CMakeLists.txt
+++ b/sherpa-onnx/python/csrc/CMakeLists.txt
@@ -13,6 +13,7 @@ pybind11_add_module(_sherpa_onnx
  offline-transducer-model-config.cc
  offline-whisper-model-config.cc
  online-lm-config.cc
+  online-model-config.cc
  online-recognizer.cc
  online-stream.cc
  online-transducer-model-config.cc
--- a/sherpa-onnx/python/csrc/online-model-config.cc
+++ b/sherpa-onnx/python/csrc/online-model-config.cc
@@ -0,0 +1,35 @@
+// sherpa-onnx/python/csrc/online-model-config.cc
+//
+// Copyright (c)  2023 by manyeyes
+
+#include "sherpa-onnx/python/csrc/online-model-config.h"
+
+#include <string>
+#include <vector>
+
+#include "sherpa-onnx/csrc/online-model-config.h"
+#include "sherpa-onnx/csrc/online-transducer-model-config.h"
+#include "sherpa-onnx/python/csrc/online-transducer-model-config.h"
+
+namespace sherpa_onnx {
+
+void PybindOnlineModelConfig(py::module *m) {
+  PybindOnlineTransducerModelConfig(m);
+
+  using PyClass = OnlineModelConfig;
+  py::class_<PyClass>(*m, "OnlineModelConfig")
+      .def(py::init<const OnlineTransducerModelConfig &, std::string &, int32_t,
+                    bool, const std::string &, const std::string &>(),
+           py::arg("transducer") = OnlineTransducerModelConfig(),
+           py::arg("tokens"), py::arg("num_threads"), py::arg("debug") = false,
+           py::arg("provider") = "cpu", py::arg("model_type") = "")
+      .def_readwrite("transducer", &PyClass::transducer)
+      .def_readwrite("tokens", &PyClass::tokens)
+      .def_readwrite("num_threads", &PyClass::num_threads)
+      .def_readwrite("debug", &PyClass::debug)
+      .def_readwrite("provider", &PyClass::provider)
+      .def_readwrite("model_type", &PyClass::model_type)
+      .def("__str__", &PyClass::ToString);
+}
+
+}  // namespace sherpa_onnx
--- a/sherpa-onnx/python/csrc/online-model-config.h
+++ b/sherpa-onnx/python/csrc/online-model-config.h
@@ -0,0 +1,16 @@
+// sherpa-onnx/python/csrc/online-model-config.h
+//
+// Copyright (c)  2023 by manyeyes
+
+#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_MODEL_CONFIG_H_
+#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_MODEL_CONFIG_H_
+
+#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
+
+namespace sherpa_onnx {
+
+void PybindOnlineModelConfig(py::module *m);
+
+}
+
+#endif  // SHERPA_ONNX_PYTHON_CSRC_ONLINE_MODEL_CONFIG_H_
--- a/sherpa-onnx/python/csrc/online-recognizer.cc
+++ b/sherpa-onnx/python/csrc/online-recognizer.cc
@@ -27,10 +27,9 @@ static void PybindOnlineRecognizerResult(py::module *m) {
 static void PybindOnlineRecognizerConfig(py::module *m) {
  using PyClass = OnlineRecognizerConfig;
  py::class_<PyClass>(*m, "OnlineRecognizerConfig")
-      .def(py::init<const FeatureExtractorConfig &,
-                    const OnlineTransducerModelConfig &, const OnlineLMConfig &,
-                    const EndpointConfig &, bool, const std::string &, int32_t,
-                    float>(),
+      .def(py::init<const FeatureExtractorConfig &, const OnlineModelConfig &,
+                    const OnlineLMConfig &, const EndpointConfig &, bool,
+                    const std::string &, int32_t, float>(),
           py::arg("feat_config"), py::arg("model_config"),
           py::arg("lm_config") = OnlineLMConfig(), py::arg("endpoint_config"),
           py::arg("enable_endpoint"), py::arg("decoding_method"),
--- a/sherpa-onnx/python/csrc/online-transducer-model-config.cc
+++ b/sherpa-onnx/python/csrc/online-transducer-model-config.cc
@@ -14,20 +14,11 @@ void PybindOnlineTransducerModelConfig(py::module *m) {
  using PyClass = OnlineTransducerModelConfig;
  py::class_<PyClass>(*m, "OnlineTransducerModelConfig")
      .def(py::init<const std::string &, const std::string &,
-                    const std::string &, const std::string &, int32_t, bool,
-                    const std::string &, const std::string &>(),
-           py::arg("encoder_filename"), py::arg("decoder_filename"),
-           py::arg("joiner_filename"), py::arg("tokens"),
-           py::arg("num_threads"), py::arg("debug") = false,
-           py::arg("provider") = "cpu", py::arg("model_type") = "")
-      .def_readwrite("encoder_filename", &PyClass::encoder_filename)
-      .def_readwrite("decoder_filename", &PyClass::decoder_filename)
-      .def_readwrite("joiner_filename", &PyClass::joiner_filename)
-      .def_readwrite("tokens", &PyClass::tokens)
-      .def_readwrite("num_threads", &PyClass::num_threads)
-      .def_readwrite("debug", &PyClass::debug)
-      .def_readwrite("provider", &PyClass::provider)
-      .def_readwrite("model_type", &PyClass::model_type)
+                    const std::string &>(),
+           py::arg("encoder"), py::arg("decoder"), py::arg("joiner"))
+      .def_readwrite("encoder", &PyClass::encoder)
+      .def_readwrite("decoder", &PyClass::decoder)
+      .def_readwrite("joiner", &PyClass::joiner)
      .def("__str__", &PyClass::ToString);
 }

--- a/sherpa-onnx/python/csrc/sherpa-onnx.cc
+++ b/sherpa-onnx/python/csrc/sherpa-onnx.cc
@@ -12,9 +12,9 @@
 #include "sherpa-onnx/python/csrc/offline-recognizer.h"
 #include "sherpa-onnx/python/csrc/offline-stream.h"
 #include "sherpa-onnx/python/csrc/online-lm-config.h"
+#include "sherpa-onnx/python/csrc/online-model-config.h"
 #include "sherpa-onnx/python/csrc/online-recognizer.h"
 #include "sherpa-onnx/python/csrc/online-stream.h"
-#include "sherpa-onnx/python/csrc/online-transducer-model-config.h"

 namespace sherpa_onnx {

@@ -22,7 +22,7 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
  m.doc() = "pybind11 binding of sherpa-onnx";

  PybindFeatures(&m);
-  PybindOnlineTransducerModelConfig(&m);
+  PybindOnlineModelConfig(&m);
  PybindOnlineLMConfig(&m);
  PybindOnlineStream(&m);
  PybindEndpoint(&m);
--- a/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
+++ b/sherpa-onnx/python/sherpa_onnx/online_recognizer.py
@@ -5,6 +5,7 @@ from typing import List, Optional
 from _sherpa_onnx import (
    EndpointConfig,
    FeatureExtractorConfig,
+    OnlineModelConfig,
    OnlineRecognizer as _Recognizer,
    OnlineRecognizerConfig,
    OnlineStream,
@@ -24,8 +25,9 @@ class OnlineRecognizer(object):
     - https://github.com/k2-fsa/sherpa-onnx/blob/master/python-api-examples/online-decode-files.py
    """

-    def __init__(
-        self,
+    @classmethod
+    def from_transducer(
+        cls,
        tokens: str,
        encoder: str,
        decoder: str,
@@ -95,6 +97,7 @@ class OnlineRecognizer(object):
            Online transducer model type. Valid values are: conformer, lstm,
            zipformer, zipformer2. All other values lead to loading the model twice.
        """
+        self = cls.__new__(cls)
        _assert_file_exists(tokens)
        _assert_file_exists(encoder)
        _assert_file_exists(decoder)
@@ -102,10 +105,14 @@ class OnlineRecognizer(object):

        assert num_threads > 0, num_threads

-        model_config = OnlineTransducerModelConfig(
-            encoder_filename=encoder,
-            decoder_filename=decoder,
-            joiner_filename=joiner,
+        transducer_config = OnlineTransducerModelConfig(
+            encoder=encoder,
+            decoder=decoder,
+            joiner=joiner,
+        )
+
+        model_config = OnlineModelConfig(
+            transducer=transducer_config,
            tokens=tokens,
            num_threads=num_threads,
            provider=provider,
@@ -135,6 +142,7 @@ class OnlineRecognizer(object):

        self.recognizer = _Recognizer(recognizer_config)
        self.config = recognizer_config
+        return self

    def create_stream(self, contexts_list: Optional[List[List[int]]] = None):
        if contexts_list is None:
--- a/sherpa-onnx/python/tests/test_online_recognizer.py
+++ b/sherpa-onnx/python/tests/test_online_recognizer.py
@@ -65,7 +65,7 @@ class TestOnlineRecognizer(unittest.TestCase):
                return

            for decoding_method in ["greedy_search", "modified_beam_search"]:
-                recognizer = sherpa_onnx.OnlineRecognizer(
+                recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
                    encoder=encoder,
                    decoder=decoder,
                    joiner=joiner,
@@ -109,7 +109,7 @@ class TestOnlineRecognizer(unittest.TestCase):
                return

            for decoding_method in ["greedy_search", "modified_beam_search"]:
-                recognizer = sherpa_onnx.OnlineRecognizer(
+                recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
                    encoder=encoder,
                    decoder=decoder,
                    joiner=joiner,
--- a/sherpa-onnx/python/tests/test_online_transducer_model_config.py
+++ b/sherpa-onnx/python/tests/test_online_transducer_model_config.py
@@ -14,19 +14,13 @@ import _sherpa_onnx
 class TestOnlineTransducerModelConfig(unittest.TestCase):
    def test_constructor(self):
        config = _sherpa_onnx.OnlineTransducerModelConfig(
-            encoder_filename="encoder.onnx",
-            decoder_filename="decoder.onnx",
-            joiner_filename="joiner.onnx",
-            tokens="tokens.txt",
-            num_threads=8,
-            debug=True,
+            encoder="encoder.onnx",
+            decoder="decoder.onnx",
+            joiner="joiner.onnx",
        )
-        assert config.encoder_filename == "encoder.onnx", config.encoder_filename
-        assert config.decoder_filename == "decoder.onnx", config.decoder_filename
-        assert config.joiner_filename == "joiner.onnx", config.joiner_filename
-        assert config.tokens == "tokens.txt", config.tokens
-        assert config.num_threads == 8, config.num_threads
-        assert config.debug is True, config.debug
+        assert config.encoder == "encoder.onnx", config.encoder
+        assert config.decoder == "decoder.onnx", config.decoder
+        assert config.joiner == "joiner.onnx", config.joiner
        print(config)