Track token scores (#571)

* add export of per-token scores (ys, lm, context) - for best path of the modified-beam-search decoding of transducer * refactoring JSON export of OnlineRecognitionResult, extending pybind11 API of OnlineRecognitionResult * export per-token scores also for greedy-search (online-transducer) - export un-scaled lm_probs (modified-beam search, online-transducer) - polishing * fill lm_probs/context_scores only if LM/ContextGraph is present (make Result smaller)
2024-02-28 23:28:45 +01:00
parent 85d59b5840
commit 38c072dcb2
11 changed files with 155 additions and 49 deletions
--- a/sherpa-onnx/csrc/hypothesis.h
+++ b/sherpa-onnx/csrc/hypothesis.h
@@ -29,9 +29,21 @@ struct Hypothesis {
  std::vector<int32_t> timestamps;

  // The acoustic probability for each token in ys.
-  // Only used for keyword spotting task.
+  // Used for keyword spotting task.
+  // For transducer mofified beam-search and greedy-search,
+  // this is filled with log_posterior scores.
  std::vector<float> ys_probs;

+  // lm_probs[i] contains the lm score for each token in ys.
+  // Used only in transducer mofified beam-search.
+  // Elements filled only if LM is used.
+  std::vector<float> lm_probs;
+
+  // context_scores[i] contains the context-graph score for each token in ys.
+  // Used only in transducer mofified beam-search.
+  // Elements filled only if `ContextGraph` is used.
+  std::vector<float> context_scores;
+
  // The total score of ys in log space.
  // It contains only acoustic scores
  double log_prob = 0;