Add LODR support to online and offline recognizers (#2026)

This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore.

- Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id.
- Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths.
- Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options.
This commit is contained in:
Askars Salimbajevs
2025-07-09 11:23:46 +03:00
committed by GitHub
parent 6122a678f5
commit f0960342ad
21 changed files with 613 additions and 14 deletions

View File

@@ -13,13 +13,19 @@ namespace sherpa_onnx {
void PybindOfflineLMConfig(py::module *m) {
using PyClass = OfflineLMConfig;
py::class_<PyClass>(*m, "OfflineLMConfig")
.def(py::init<const std::string &, float, int32_t, const std::string &>(),
.def(py::init<const std::string &, float, int32_t, const std::string &,
const std::string &, float, int32_t>(),
py::arg("model"), py::arg("scale") = 0.5f,
py::arg("lm_num_threads") = 1, py::arg("lm_provider") = "cpu")
py::arg("lm_num_threads") = 1, py::arg("lm_provider") = "cpu",
py::arg("lodr_fst") = "", py::arg("lodr_scale") = 0.0f,
py::arg("lodr_backoff_id") = -1)
.def_readwrite("model", &PyClass::model)
.def_readwrite("scale", &PyClass::scale)
.def_readwrite("lm_provider", &PyClass::lm_provider)
.def_readwrite("lm_num_threads", &PyClass::lm_num_threads)
.def_readwrite("lodr_fst", &PyClass::lodr_fst)
.def_readwrite("lodr_scale", &PyClass::lodr_scale)
.def_readwrite("lodr_backoff_id", &PyClass::lodr_backoff_id)
.def("__str__", &PyClass::ToString);
}

View File

@@ -14,15 +14,21 @@ void PybindOnlineLMConfig(py::module *m) {
using PyClass = OnlineLMConfig;
py::class_<PyClass>(*m, "OnlineLMConfig")
.def(py::init<const std::string &, float, int32_t,
const std::string &, bool>(),
const std::string &, bool, const std::string &,
float, int>(),
py::arg("model") = "", py::arg("scale") = 0.5f,
py::arg("lm_num_threads") = 1, py::arg("lm_provider") = "cpu",
py::arg("shallow_fusion") = true)
py::arg("shallow_fusion") = true, py::arg("lodr_fst") = "",
py::arg("lodr_scale") = 0.0f, py::arg("lodr_backoff_id") = -1)
.def_readwrite("model", &PyClass::model)
.def_readwrite("scale", &PyClass::scale)
.def_readwrite("lm_provider", &PyClass::lm_provider)
.def_readwrite("lm_num_threads", &PyClass::lm_num_threads)
.def_readwrite("shallow_fusion", &PyClass::shallow_fusion)
.def_readwrite("lodr_fst", &PyClass::lodr_fst)
.def_readwrite("lodr_scale", &PyClass::lodr_scale)
.def_readwrite("lodr_backoff_id", &PyClass::lodr_backoff_id)
.def("__str__", &PyClass::ToString);
}