This repository has been archived on 2025-08-26. You can view files and clone it, but cannot push or open issues or pull requests.
Files
enginex_bi_series-sherpa-onnx/sherpa-onnx/csrc/offline-lm.cc
Askars Salimbajevs f0960342ad Add LODR support to online and offline recognizers (#2026)
This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore.

- Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id.
- Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths.
- Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options.
2025-07-09 16:23:46 +08:00

104 lines
3.1 KiB
C++

// sherpa-onnx/csrc/offline-lm.cc
//
// Copyright (c) 2023 Xiaomi Corporation
#include "sherpa-onnx/csrc/offline-lm.h"
#include <algorithm>
#include <utility>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#if __OHOS__
#include "rawfile/raw_file_manager.h"
#endif
#include "sherpa-onnx/csrc/lodr-fst.h"
#include "sherpa-onnx/csrc/offline-rnn-lm.h"
namespace sherpa_onnx {
std::unique_ptr<OfflineLM> OfflineLM::Create(const OfflineLMConfig &config) {
return std::make_unique<OfflineRnnLM>(config);
}
template <typename Manager>
std::unique_ptr<OfflineLM> OfflineLM::Create(Manager *mgr,
const OfflineLMConfig &config) {
return std::make_unique<OfflineRnnLM>(mgr, config);
}
void OfflineLM::ComputeLMScore(float scale, int32_t context_size,
std::vector<Hypotheses> *hyps) {
// compute the max token seq so that we know how much space to allocate
int32_t max_token_seq = 0;
int32_t num_hyps = 0;
// we subtract context_size below since each token sequence is prepended
// with context_size blanks
for (const auto &h : *hyps) {
num_hyps += h.Size();
for (const auto &t : h) {
max_token_seq =
std::max<int32_t>(max_token_seq, t.second.ys.size() - context_size);
}
}
Ort::AllocatorWithDefaultOptions allocator;
std::array<int64_t, 2> x_shape{num_hyps, max_token_seq};
Ort::Value x = Ort::Value::CreateTensor<int64_t>(allocator, x_shape.data(),
x_shape.size());
std::array<int64_t, 1> x_lens_shape{num_hyps};
Ort::Value x_lens = Ort::Value::CreateTensor<int64_t>(
allocator, x_lens_shape.data(), x_lens_shape.size());
int64_t *p = x.GetTensorMutableData<int64_t>();
std::fill(p, p + num_hyps * max_token_seq, 0);
int64_t *p_lens = x_lens.GetTensorMutableData<int64_t>();
for (const auto &h : *hyps) {
for (const auto &t : h) {
const auto &ys = t.second.ys;
int32_t len = ys.size() - context_size;
std::copy(ys.begin() + context_size, ys.end(), p);
*p_lens = len;
p += max_token_seq;
++p_lens;
}
}
auto negative_loglike = Rescore(std::move(x), std::move(x_lens));
const float *p_nll = negative_loglike.GetTensorData<float>();
// We scale LODR scale with LM scale to replicate Icefall code
auto lodr_scale = config_.lodr_scale * scale;
for (auto &h : *hyps) {
for (auto &t : h) {
// Use -scale here since we want to change negative loglike to loglike.
t.second.lm_log_prob = -scale * (*p_nll);
++p_nll;
// apply LODR to hyp score
if (lodr_fst_ != nullptr) {
lodr_fst_->ComputeScore(lodr_scale, &t.second, context_size);
}
}
}
}
#if __ANDROID_API__ >= 9
template std::unique_ptr<OfflineLM> OfflineLM::Create(
AAssetManager *mgr, const OfflineLMConfig &config);
#endif
#if __OHOS__
template std::unique_ptr<OfflineLM> OfflineLM::Create(
NativeResourceManager *mgr, const OfflineLMConfig &config);
#endif
} // namespace sherpa_onnx