Add LODR support to online and offline recognizers (#2026)

This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore.

- Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id.
- Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths.
- Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options.
This commit is contained in:
Askars Salimbajevs
2025-07-09 11:23:46 +03:00
committed by GitHub
parent 6122a678f5
commit f0960342ad
21 changed files with 613 additions and 14 deletions

View File

@@ -20,6 +20,10 @@ void OnlineLMConfig::Register(ParseOptions *po) {
"Specify a provider to LM model use: cpu, cuda, coreml");
po->Register("lm-shallow-fusion", &shallow_fusion,
"Boolean whether to use shallow fusion or rescore.");
po->Register("lodr-fst", &lodr_fst, "Path to LODR FST model.");
po->Register("lodr-scale", &lodr_scale, "LODR scale.");
po->Register("lodr-backoff-id", &lodr_backoff_id,
"ID of the backoff in the LODR FST. -1 means autodetect");
}
bool OnlineLMConfig::Validate() const {
@@ -28,6 +32,11 @@ bool OnlineLMConfig::Validate() const {
return false;
}
if (!lodr_fst.empty() && !FileExists(lodr_fst)) {
SHERPA_ONNX_LOGE("'%s' does not exist", lodr_fst.c_str());
return false;
}
return true;
}
@@ -37,6 +46,9 @@ std::string OnlineLMConfig::ToString() const {
os << "OnlineLMConfig(";
os << "model=\"" << model << "\", ";
os << "scale=" << scale << ", ";
os << "lodr_scale=" << lodr_scale << ", ";
os << "lodr_fst=\"" << lodr_fst << "\", ";
os << "lodr_backoff_id=" << lodr_backoff_id << ", ";
os << "shallow_fusion=" << (shallow_fusion ? "True" : "False") << ")";
return os.str();