Add non-streaming ASR (#92)

This commit is contained in:
Fangjun Kuang
2023-03-26 08:53:42 +08:00
committed by GitHub
parent 6f92bc7362
commit 5572246253
48 changed files with 1526 additions and 150 deletions

View File

@@ -11,12 +11,10 @@ namespace sherpa_onnx {
static void PybindFeatureExtractorConfig(py::module *m) {
using PyClass = FeatureExtractorConfig;
py::class_<PyClass>(*m, "FeatureExtractorConfig")
.def(py::init<int32_t, int32_t, int32_t>(),
py::arg("sampling_rate") = 16000, py::arg("feature_dim") = 80,
py::arg("max_feature_vectors") = -1)
.def(py::init<int32_t, int32_t>(), py::arg("sampling_rate") = 16000,
py::arg("feature_dim") = 80)
.def_readwrite("sampling_rate", &PyClass::sampling_rate)
.def_readwrite("feature_dim", &PyClass::feature_dim)
.def_readwrite("max_feature_vectors", &PyClass::max_feature_vectors)
.def("__str__", &PyClass::ToString);
}

View File

@@ -34,7 +34,6 @@ class OnlineRecognizer(object):
rule3_min_utterance_length: int = 20,
decoding_method: str = "greedy_search",
max_active_paths: int = 4,
max_feature_vectors: int = -1,
):
"""
Please refer to
@@ -82,9 +81,6 @@ class OnlineRecognizer(object):
max_active_paths:
Use only when decoding_method is modified_beam_search. It specifies
the maximum number of active paths during beam search.
max_feature_vectors:
Number of feature vectors to cache. -1 means to cache all feature
frames that have been processed.
"""
_assert_file_exists(tokens)
_assert_file_exists(encoder)
@@ -104,7 +100,6 @@ class OnlineRecognizer(object):
feat_config = FeatureExtractorConfig(
sampling_rate=sample_rate,
feature_dim=feature_dim,
max_feature_vectors=max_feature_vectors,
)
endpoint_config = EndpointConfig(

View File

@@ -8,18 +8,18 @@
import unittest
import sherpa_onnx
import _sherpa_onnx
class TestFeatureExtractorConfig(unittest.TestCase):
def test_default_constructor(self):
config = sherpa_onnx.FeatureExtractorConfig()
config = _sherpa_onnx.FeatureExtractorConfig()
assert config.sampling_rate == 16000, config.sampling_rate
assert config.feature_dim == 80, config.feature_dim
print(config)
def test_constructor(self):
config = sherpa_onnx.FeatureExtractorConfig(sampling_rate=8000, feature_dim=40)
config = _sherpa_onnx.FeatureExtractorConfig(sampling_rate=8000, feature_dim=40)
assert config.sampling_rate == 8000, config.sampling_rate
assert config.feature_dim == 40, config.feature_dim
print(config)

View File

@@ -8,21 +8,23 @@
import unittest
import sherpa_onnx
import _sherpa_onnx
class TestOnlineTransducerModelConfig(unittest.TestCase):
def test_constructor(self):
config = sherpa_onnx.OnlineTransducerModelConfig(
config = _sherpa_onnx.OnlineTransducerModelConfig(
encoder_filename="encoder.onnx",
decoder_filename="decoder.onnx",
joiner_filename="joiner.onnx",
tokens="tokens.txt",
num_threads=8,
debug=True,
)
assert config.encoder_filename == "encoder.onnx", config.encoder_filename
assert config.decoder_filename == "decoder.onnx", config.decoder_filename
assert config.joiner_filename == "joiner.onnx", config.joiner_filename
assert config.tokens == "tokens.txt", config.tokens
assert config.num_threads == 8, config.num_threads
assert config.debug is True, config.debug
print(config)