Refactor online recognizer (#250)
* Refactor online recognizer. Make it easier to support other streaming models. Note that it is a breaking change for the Python API. `sherpa_onnx.OnlineRecognizer()` used before should be replaced by `sherpa_onnx.OnlineRecognizer.from_transducer()`.
This commit is contained in:
@@ -13,6 +13,7 @@ pybind11_add_module(_sherpa_onnx
|
||||
offline-transducer-model-config.cc
|
||||
offline-whisper-model-config.cc
|
||||
online-lm-config.cc
|
||||
online-model-config.cc
|
||||
online-recognizer.cc
|
||||
online-stream.cc
|
||||
online-transducer-model-config.cc
|
||||
|
||||
35
sherpa-onnx/python/csrc/online-model-config.cc
Normal file
35
sherpa-onnx/python/csrc/online-model-config.cc
Normal file
@@ -0,0 +1,35 @@
|
||||
// sherpa-onnx/python/csrc/online-model-config.cc
|
||||
//
|
||||
// Copyright (c) 2023 by manyeyes
|
||||
|
||||
#include "sherpa-onnx/python/csrc/online-model-config.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "sherpa-onnx/csrc/online-model-config.h"
|
||||
#include "sherpa-onnx/csrc/online-transducer-model-config.h"
|
||||
#include "sherpa-onnx/python/csrc/online-transducer-model-config.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindOnlineModelConfig(py::module *m) {
|
||||
PybindOnlineTransducerModelConfig(m);
|
||||
|
||||
using PyClass = OnlineModelConfig;
|
||||
py::class_<PyClass>(*m, "OnlineModelConfig")
|
||||
.def(py::init<const OnlineTransducerModelConfig &, std::string &, int32_t,
|
||||
bool, const std::string &, const std::string &>(),
|
||||
py::arg("transducer") = OnlineTransducerModelConfig(),
|
||||
py::arg("tokens"), py::arg("num_threads"), py::arg("debug") = false,
|
||||
py::arg("provider") = "cpu", py::arg("model_type") = "")
|
||||
.def_readwrite("transducer", &PyClass::transducer)
|
||||
.def_readwrite("tokens", &PyClass::tokens)
|
||||
.def_readwrite("num_threads", &PyClass::num_threads)
|
||||
.def_readwrite("debug", &PyClass::debug)
|
||||
.def_readwrite("provider", &PyClass::provider)
|
||||
.def_readwrite("model_type", &PyClass::model_type)
|
||||
.def("__str__", &PyClass::ToString);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
16
sherpa-onnx/python/csrc/online-model-config.h
Normal file
16
sherpa-onnx/python/csrc/online-model-config.h
Normal file
@@ -0,0 +1,16 @@
|
||||
// sherpa-onnx/python/csrc/online-model-config.h
|
||||
//
|
||||
// Copyright (c) 2023 by manyeyes
|
||||
|
||||
#ifndef SHERPA_ONNX_PYTHON_CSRC_ONLINE_MODEL_CONFIG_H_
|
||||
#define SHERPA_ONNX_PYTHON_CSRC_ONLINE_MODEL_CONFIG_H_
|
||||
|
||||
#include "sherpa-onnx/python/csrc/sherpa-onnx.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
void PybindOnlineModelConfig(py::module *m);
|
||||
|
||||
}
|
||||
|
||||
#endif // SHERPA_ONNX_PYTHON_CSRC_ONLINE_MODEL_CONFIG_H_
|
||||
@@ -27,10 +27,9 @@ static void PybindOnlineRecognizerResult(py::module *m) {
|
||||
static void PybindOnlineRecognizerConfig(py::module *m) {
|
||||
using PyClass = OnlineRecognizerConfig;
|
||||
py::class_<PyClass>(*m, "OnlineRecognizerConfig")
|
||||
.def(py::init<const FeatureExtractorConfig &,
|
||||
const OnlineTransducerModelConfig &, const OnlineLMConfig &,
|
||||
const EndpointConfig &, bool, const std::string &, int32_t,
|
||||
float>(),
|
||||
.def(py::init<const FeatureExtractorConfig &, const OnlineModelConfig &,
|
||||
const OnlineLMConfig &, const EndpointConfig &, bool,
|
||||
const std::string &, int32_t, float>(),
|
||||
py::arg("feat_config"), py::arg("model_config"),
|
||||
py::arg("lm_config") = OnlineLMConfig(), py::arg("endpoint_config"),
|
||||
py::arg("enable_endpoint"), py::arg("decoding_method"),
|
||||
|
||||
@@ -14,20 +14,11 @@ void PybindOnlineTransducerModelConfig(py::module *m) {
|
||||
using PyClass = OnlineTransducerModelConfig;
|
||||
py::class_<PyClass>(*m, "OnlineTransducerModelConfig")
|
||||
.def(py::init<const std::string &, const std::string &,
|
||||
const std::string &, const std::string &, int32_t, bool,
|
||||
const std::string &, const std::string &>(),
|
||||
py::arg("encoder_filename"), py::arg("decoder_filename"),
|
||||
py::arg("joiner_filename"), py::arg("tokens"),
|
||||
py::arg("num_threads"), py::arg("debug") = false,
|
||||
py::arg("provider") = "cpu", py::arg("model_type") = "")
|
||||
.def_readwrite("encoder_filename", &PyClass::encoder_filename)
|
||||
.def_readwrite("decoder_filename", &PyClass::decoder_filename)
|
||||
.def_readwrite("joiner_filename", &PyClass::joiner_filename)
|
||||
.def_readwrite("tokens", &PyClass::tokens)
|
||||
.def_readwrite("num_threads", &PyClass::num_threads)
|
||||
.def_readwrite("debug", &PyClass::debug)
|
||||
.def_readwrite("provider", &PyClass::provider)
|
||||
.def_readwrite("model_type", &PyClass::model_type)
|
||||
const std::string &>(),
|
||||
py::arg("encoder"), py::arg("decoder"), py::arg("joiner"))
|
||||
.def_readwrite("encoder", &PyClass::encoder)
|
||||
.def_readwrite("decoder", &PyClass::decoder)
|
||||
.def_readwrite("joiner", &PyClass::joiner)
|
||||
.def("__str__", &PyClass::ToString);
|
||||
}
|
||||
|
||||
|
||||
@@ -12,9 +12,9 @@
|
||||
#include "sherpa-onnx/python/csrc/offline-recognizer.h"
|
||||
#include "sherpa-onnx/python/csrc/offline-stream.h"
|
||||
#include "sherpa-onnx/python/csrc/online-lm-config.h"
|
||||
#include "sherpa-onnx/python/csrc/online-model-config.h"
|
||||
#include "sherpa-onnx/python/csrc/online-recognizer.h"
|
||||
#include "sherpa-onnx/python/csrc/online-stream.h"
|
||||
#include "sherpa-onnx/python/csrc/online-transducer-model-config.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
@@ -22,7 +22,7 @@ PYBIND11_MODULE(_sherpa_onnx, m) {
|
||||
m.doc() = "pybind11 binding of sherpa-onnx";
|
||||
|
||||
PybindFeatures(&m);
|
||||
PybindOnlineTransducerModelConfig(&m);
|
||||
PybindOnlineModelConfig(&m);
|
||||
PybindOnlineLMConfig(&m);
|
||||
PybindOnlineStream(&m);
|
||||
PybindEndpoint(&m);
|
||||
|
||||
@@ -5,6 +5,7 @@ from typing import List, Optional
|
||||
from _sherpa_onnx import (
|
||||
EndpointConfig,
|
||||
FeatureExtractorConfig,
|
||||
OnlineModelConfig,
|
||||
OnlineRecognizer as _Recognizer,
|
||||
OnlineRecognizerConfig,
|
||||
OnlineStream,
|
||||
@@ -24,8 +25,9 @@ class OnlineRecognizer(object):
|
||||
- https://github.com/k2-fsa/sherpa-onnx/blob/master/python-api-examples/online-decode-files.py
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
@classmethod
|
||||
def from_transducer(
|
||||
cls,
|
||||
tokens: str,
|
||||
encoder: str,
|
||||
decoder: str,
|
||||
@@ -95,6 +97,7 @@ class OnlineRecognizer(object):
|
||||
Online transducer model type. Valid values are: conformer, lstm,
|
||||
zipformer, zipformer2. All other values lead to loading the model twice.
|
||||
"""
|
||||
self = cls.__new__(cls)
|
||||
_assert_file_exists(tokens)
|
||||
_assert_file_exists(encoder)
|
||||
_assert_file_exists(decoder)
|
||||
@@ -102,10 +105,14 @@ class OnlineRecognizer(object):
|
||||
|
||||
assert num_threads > 0, num_threads
|
||||
|
||||
model_config = OnlineTransducerModelConfig(
|
||||
encoder_filename=encoder,
|
||||
decoder_filename=decoder,
|
||||
joiner_filename=joiner,
|
||||
transducer_config = OnlineTransducerModelConfig(
|
||||
encoder=encoder,
|
||||
decoder=decoder,
|
||||
joiner=joiner,
|
||||
)
|
||||
|
||||
model_config = OnlineModelConfig(
|
||||
transducer=transducer_config,
|
||||
tokens=tokens,
|
||||
num_threads=num_threads,
|
||||
provider=provider,
|
||||
@@ -135,6 +142,7 @@ class OnlineRecognizer(object):
|
||||
|
||||
self.recognizer = _Recognizer(recognizer_config)
|
||||
self.config = recognizer_config
|
||||
return self
|
||||
|
||||
def create_stream(self, contexts_list: Optional[List[List[int]]] = None):
|
||||
if contexts_list is None:
|
||||
|
||||
@@ -65,7 +65,7 @@ class TestOnlineRecognizer(unittest.TestCase):
|
||||
return
|
||||
|
||||
for decoding_method in ["greedy_search", "modified_beam_search"]:
|
||||
recognizer = sherpa_onnx.OnlineRecognizer(
|
||||
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
|
||||
encoder=encoder,
|
||||
decoder=decoder,
|
||||
joiner=joiner,
|
||||
@@ -109,7 +109,7 @@ class TestOnlineRecognizer(unittest.TestCase):
|
||||
return
|
||||
|
||||
for decoding_method in ["greedy_search", "modified_beam_search"]:
|
||||
recognizer = sherpa_onnx.OnlineRecognizer(
|
||||
recognizer = sherpa_onnx.OnlineRecognizer.from_transducer(
|
||||
encoder=encoder,
|
||||
decoder=decoder,
|
||||
joiner=joiner,
|
||||
|
||||
@@ -14,19 +14,13 @@ import _sherpa_onnx
|
||||
class TestOnlineTransducerModelConfig(unittest.TestCase):
|
||||
def test_constructor(self):
|
||||
config = _sherpa_onnx.OnlineTransducerModelConfig(
|
||||
encoder_filename="encoder.onnx",
|
||||
decoder_filename="decoder.onnx",
|
||||
joiner_filename="joiner.onnx",
|
||||
tokens="tokens.txt",
|
||||
num_threads=8,
|
||||
debug=True,
|
||||
encoder="encoder.onnx",
|
||||
decoder="decoder.onnx",
|
||||
joiner="joiner.onnx",
|
||||
)
|
||||
assert config.encoder_filename == "encoder.onnx", config.encoder_filename
|
||||
assert config.decoder_filename == "decoder.onnx", config.decoder_filename
|
||||
assert config.joiner_filename == "joiner.onnx", config.joiner_filename
|
||||
assert config.tokens == "tokens.txt", config.tokens
|
||||
assert config.num_threads == 8, config.num_threads
|
||||
assert config.debug is True, config.debug
|
||||
assert config.encoder == "encoder.onnx", config.encoder
|
||||
assert config.decoder == "decoder.onnx", config.decoder
|
||||
assert config.joiner == "joiner.onnx", config.joiner
|
||||
print(config)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user