Reduce model initialization time for online speech recognition (#215)

* Reduce model initialization time for online speech recognition * Fixed Styling --------- Co-authored-by: w11wo <wilsowong961@gmail.com>
2023-07-14 20:20:10 +07:00
parent fe0630fe1f
commit 5a6b55c5a7
7 changed files with 69 additions and 8 deletions
--- a/sherpa-onnx/csrc/online-transducer-model-config.h
+++ b/sherpa-onnx/csrc/online-transducer-model-config.h
@@ -19,19 +19,33 @@ struct OnlineTransducerModelConfig {
  bool debug = false;
  std::string provider = "cpu";

+  // With the help of this field, we only need to load the model once
+  // instead of twice; and therefore it reduces initialization time.
+  //
+  // Valid values:
+  //  - conformer
+  //  - lstm
+  //  - zipformer
+  //  - zipformer2
+  //
+  // All other values are invalid and lead to loading the model twice.
+  std::string model_type;
+
  OnlineTransducerModelConfig() = default;
  OnlineTransducerModelConfig(const std::string &encoder_filename,
                              const std::string &decoder_filename,
                              const std::string &joiner_filename,
                              const std::string &tokens, int32_t num_threads,
-                              bool debug, const std::string &provider)
+                              bool debug, const std::string &provider,
+                              const std::string &model_type)
      : encoder_filename(encoder_filename),
        decoder_filename(decoder_filename),
        joiner_filename(joiner_filename),
        tokens(tokens),
        num_threads(num_threads),
        debug(debug),
-        provider(provider) {}
+        provider(provider),
+        model_type(model_type) {}

  void Register(ParseOptions *po);
  bool Validate() const;