Add LODR support to online and offline recognizers (#2026)

This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore.

- Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id.
- Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths.
- Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options.
This commit is contained in:
Askars Salimbajevs
2025-07-09 11:23:46 +03:00
committed by GitHub
parent 6122a678f5
commit f0960342ad
21 changed files with 613 additions and 14 deletions

View File

@@ -18,6 +18,10 @@ void OfflineLMConfig::Register(ParseOptions *po) {
"Number of threads to run the neural network of LM model");
po->Register("lm-provider", &lm_provider,
"Specify a provider to LM model use: cpu, cuda, coreml");
po->Register("lodr-fst", &lodr_fst, "Path to LODR FST model.");
po->Register("lodr-scale", &lodr_scale, "LODR scale.");
po->Register("lodr-backoff-id", &lodr_backoff_id,
"ID of the backoff in the LODR FST. -1 means autodetect");
}
bool OfflineLMConfig::Validate() const {
@@ -26,6 +30,11 @@ bool OfflineLMConfig::Validate() const {
return false;
}
if (!lodr_fst.empty() && !FileExists(lodr_fst)) {
SHERPA_ONNX_LOGE("'%s' does not exist", lodr_fst.c_str());
return false;
}
return true;
}
@@ -34,7 +43,10 @@ std::string OfflineLMConfig::ToString() const {
os << "OfflineLMConfig(";
os << "model=\"" << model << "\", ";
os << "scale=" << scale << ")";
os << "scale=" << scale << ", ";
os << "lodr_scale=" << lodr_scale << ", ";
os << "lodr_fst=\"" << lodr_fst << "\", ";
os << "lodr_backoff_id=" << lodr_backoff_id << ")";
return os.str();
}