Add LODR support to online and offline recognizers (#2026)

This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore. - Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id. - Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths. - Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options.
2025-07-09 11:23:46 +03:00
parent 6122a678f5
commit f0960342ad
21 changed files with 613 additions and 14 deletions
--- a/sherpa-onnx/csrc/offline-lm.cc
+++ b/sherpa-onnx/csrc/offline-lm.cc
@@ -17,6 +17,7 @@
 #include "rawfile/raw_file_manager.h"
 #endif

+#include "sherpa-onnx/csrc/lodr-fst.h"
 #include "sherpa-onnx/csrc/offline-rnn-lm.h"

 namespace sherpa_onnx {
@@ -74,11 +75,17 @@ void OfflineLM::ComputeLMScore(float scale, int32_t context_size,
  }
  auto negative_loglike = Rescore(std::move(x), std::move(x_lens));
  const float *p_nll = negative_loglike.GetTensorData<float>();
+  // We scale LODR scale with LM scale to replicate Icefall code
+  auto lodr_scale = config_.lodr_scale * scale;
  for (auto &h : *hyps) {
    for (auto &t : h) {
      // Use -scale here since we want to change negative loglike to loglike.
      t.second.lm_log_prob = -scale * (*p_nll);
      ++p_nll;
+      // apply LODR to hyp score
+      if (lodr_fst_ != nullptr) {
+        lodr_fst_->ComputeScore(lodr_scale, &t.second, context_size);
+      }
    }
  }
 }