Add LODR support to online and offline recognizers (#2026)

This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore. - Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id. - Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths. - Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options.
2025-07-09 11:23:46 +03:00
parent 6122a678f5
commit f0960342ad
21 changed files with 613 additions and 14 deletions
--- a/.github/scripts/test-offline-transducer.sh
+++ b/.github/scripts/test-offline-transducer.sh
@@ -281,7 +281,39 @@ time $EXE \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

-rm -rf $repo
+lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
+log "Download pre-trained RNN-LM model from ${lm_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
+lm_repo=$(basename $lm_repo_url)
+pushd $lm_repo
+git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
+popd
+
+bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
+log "Download bi-gram LM from ${bigram_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
+bigramlm_repo=$(basename $bigram_repo_url)
+pushd $bigramlm_repo
+git lfs pull --include "2gram.fst"
+popd
+
+log "Start testing with LM and bi-gram LODR"
+# TODO: find test examples that change with the LODR
+time $EXE \
+  --tokens=$repo/tokens.txt \
+  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
+  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
+  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
+  --num-threads=2 \
+  --decoding_method="modified_beam_search" \
+  --lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
+  --lodr-fst=$bigramlm_repo/2gram.fst \
+  --lodr-scale=-0.5  \
+  $repo/test_wavs/0.wav \
+  $repo/test_wavs/1.wav \
+  $repo/test_wavs/8k.wav
+
+rm -rf $repo $lm_repo $bigramlm_repo

 log "------------------------------------------------------------"
 log "Run Paraformer (Chinese)"
--- a/.github/scripts/test-online-transducer.sh
+++ b/.github/scripts/test-online-transducer.sh
@@ -174,7 +174,60 @@ for wave in ${waves[@]}; do
  $wave
 done

-rm -rf $repo
+lm_repo_url=https://huggingface.co/vsd-vector/icefall-librispeech-rnn-lm
+log "Download pre-trained RNN-LM model from ${lm_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
+lm_repo=$(basename $lm_repo_url)
+pushd $lm_repo
+git lfs pull --include "with-state-epoch-99-avg-1.onnx"
+popd
+
+bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
+log "Download bi-gram LM from ${bigram_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
+bigramlm_repo=$(basename $bigram_repo_url)
+pushd $bigramlm_repo
+git lfs pull --include "2gram.fst"
+popd
+
+log "Start testing LODR"
+
+waves=(
+$repo/test_wavs/0.wav
+$repo/test_wavs/1.wav
+$repo/test_wavs/8k.wav
+)
+
+for wave in ${waves[@]}; do
+  time $EXE \
+  --tokens=$repo/tokens.txt \
+  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
+  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
+  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
+  --num-threads=2 \
+  --decoding_method="modified_beam_search" \
+  --lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
+  --lodr-fst=$bigramlm_repo/2gram.fst \
+  --lodr-scale=-0.5  \
+  $wave
+done
+
+for wave in ${waves[@]}; do
+  time $EXE \
+  --tokens=$repo/tokens.txt \
+  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
+  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
+  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
+  --num-threads=2 \
+  --decoding_method="modified_beam_search" \
+  --lm=$lm_repo/with-state-epoch-99-avg-1.onnx \
+  --lodr-fst=$bigramlm_repo/2gram.fst \
+  --lodr-scale=-0.5  \
+  --lm-shallow-fusion=true \
+  $wave
+done
+
+rm -rf $repo $bigramlm_repo $lm_repo

 log "------------------------------------------------------------"
 log "Run streaming Zipformer transducer (Bilingual, Chinese + English)"
--- a/.github/scripts/test-python.sh
+++ b/.github/scripts/test-python.sh
@@ -562,9 +562,39 @@ python3 ./python-api-examples/offline-decode-files.py \
  $repo/test_wavs/1.wav \
  $repo/test_wavs/8k.wav

+lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
+log "Download pre-trained RNN-LM model from ${lm_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
+lm_repo=$(basename $lm_repo_url)
+pushd $lm_repo
+git lfs pull --include "exp/no-state-epoch-99-avg-1.onnx"
+popd
+
+bigram_repo_url=https://huggingface.co/vsd-vector/librispeech_bigram_sherpa-onnx-zipformer-large-en-2023-06-26
+log "Download bi-gram LM from ${bigram_repo_url}"
+GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
+bigramlm_repo=$(basename $bigram_repo_url)
+pushd $bigramlm_repo
+git lfs pull --include "2gram.fst"
+popd
+
+log "Perform offline decoding with RNN-LM and LODR"
+python3 ./python-api-examples/offline-decode-files.py \
+  --tokens=$repo/tokens.txt \
+  --encoder=$repo/encoder-epoch-99-avg-1.onnx \
+  --decoder=$repo/decoder-epoch-99-avg-1.onnx \
+  --joiner=$repo/joiner-epoch-99-avg-1.onnx \
+  --decoding-method=modified_beam_search \
+  --lm=$lm_repo/exp/no-state-epoch-99-avg-1.onnx \
+  --lodr-fst=$bigramlm_repo/2gram.fst \
+  --lodr-scale=-0.5 \
+  $repo/test_wavs/0.wav \
+  $repo/test_wavs/1.wav \
+  $repo/test_wavs/8k.wav
+
 python3 sherpa-onnx/python/tests/test_offline_recognizer.py --verbose

-rm -rf $repo
+rm -rf $repo $lm_repo $bigramlm_repo

 log "Test non-streaming paraformer models"