enginex_bi_series-sherpa-onnx/sherpa-onnx/csrc/hypothesis.h

/**
 * Copyright (c)  2023  Xiaomi Corporation
 * Copyright (c)  2023  Pingfeng Luo
 *
 */

#ifndef SHERPA_ONNX_CSRC_HYPOTHESIS_H_
#define SHERPA_ONNX_CSRC_HYPOTHESIS_H_

#include <sstream>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <memory>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/context-graph.h"
#include "sherpa-onnx/csrc/lodr-fst.h"
#include "sherpa-onnx/csrc/math.h"
#include "sherpa-onnx/csrc/onnx-utils.h"

namespace sherpa_onnx {

struct Hypothesis {
  // The predicted tokens so far. Newly predicated tokens are appended.
  std::vector<int64_t> ys;

  // timestamps[i] contains the frame number after subsampling
  // on which ys[i] is decoded.
  std::vector<int32_t> timestamps;

  // The acoustic probability for each token in ys.
  // Used for keyword spotting task.
  // For transducer mofified beam-search and greedy-search,
  // this is filled with log_posterior scores.
  std::vector<float> ys_probs;

  // lm_probs[i] contains the lm score for each token in ys.
  // Used only in transducer mofified beam-search.
  // Elements filled only if LM is used.
  std::vector<float> lm_probs;

  // context_scores[i] contains the context-graph score for each token in ys.
  // Used only in transducer mofified beam-search.
  // Elements filled only if `ContextGraph` is used.
  std::vector<float> context_scores;

  // The total score of ys in log space.
  // It contains only acoustic scores
  double log_prob = 0;

  // LM log prob if any.
  double lm_log_prob = 0;

  // the nn lm score for next token given the current ys,
  // when using shallow fusion
  CopyableOrtValue nn_lm_scores;

  // cur scored tokens by RNN LM, when rescoring
  int32_t cur_scored_pos = 0;

  // the nn lm states
  std::vector<CopyableOrtValue> nn_lm_states;

  // the LODR states
  std::shared_ptr<LodrStateCost> lodr_state;

  const ContextState *context_state;

  // TODO(fangjun): Make it configurable
  // the minimum of tokens in a chunk for streaming RNN LM
  int32_t lm_rescore_min_chunk = 2;  // a const

  int32_t num_trailing_blanks = 0;

  Hypothesis() = default;
  Hypothesis(const std::vector<int64_t> &ys, double log_prob,
             const ContextState *context_state = nullptr)
      : ys(ys), log_prob(log_prob), context_state(context_state) {}

  double TotalLogProb() const { return log_prob + lm_log_prob; }

  // If two Hypotheses have the same `Key`, then they contain
  // the same token sequence.
  std::string Key() const {
    // TODO(fangjun): Use a hash function?
    std::ostringstream os;
    std::string sep;
    for (auto i : ys) {
      os << sep << i;
      sep = "-";
    }
    return os.str();
  }

  // For debugging
  std::string ToString() const {
    std::ostringstream os;
    os << "(" << Key() << ", " << log_prob << ")";
    return os.str();
  }
};

class Hypotheses {
 public:
  Hypotheses() = default;

  explicit Hypotheses(std::vector<Hypothesis> hyps) {
    for (auto &h : hyps) {
      hyps_dict_[h.Key()] = std::move(h);
    }
  }

  explicit Hypotheses(std::unordered_map<std::string, Hypothesis> hyps_dict)
      : hyps_dict_(std::move(hyps_dict)) {}

  // Add hyp to this object. If it already exists, its log_prob
  // is updated with the given hyp using log-sum-exp.
  void Add(Hypothesis hyp);

  // Get the hyp that has the largest log_prob.
  // If length_norm is true, hyp's log_prob is divided by
  // len(hyp.ys) before comparison.
  Hypothesis GetMostProbable(bool length_norm) const;

  // Get the k hyps that have the largest log_prob.
  // If length_norm is true, hyp's log_prob is divided by
  // len(hyp.ys) before comparison.
  std::vector<Hypothesis> GetTopK(int32_t k, bool length_norm) const;

  int32_t Size() const { return hyps_dict_.size(); }

  std::string ToString() const {
    std::ostringstream os;
    for (const auto &p : hyps_dict_) {
      os << p.second.ToString() << "\n";
    }
    return os.str();
  }

  auto begin() const { return hyps_dict_.begin(); }
  auto end() const { return hyps_dict_.end(); }

  auto begin() { return hyps_dict_.begin(); }
  auto end() { return hyps_dict_.end(); }

  void Clear() { hyps_dict_.clear(); }

  // Return a list of hyps contained in this object.
  std::vector<Hypothesis> Vec() const {
    std::vector<Hypothesis> ans;
    ans.reserve(hyps_dict_.size());
    for (const auto &p : hyps_dict_) {
      ans.push_back(p.second);
    }
    return ans;
  }

 private:
  using Map = std ::unordered_map<std::string, Hypothesis>;
  Map hyps_dict_;
};

const std::vector<int32_t> GetHypsRowSplits(
    const std::vector<Hypotheses> &hyps);

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_HYPOTHESIS_H_
add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`/**`
			`* Copyright (c) 2023 Xiaomi Corporation`
Add lm rescore to online-modified-beam-search (#133) 2023-05-05 21:23:54 +08:00			`* Copyright (c) 2023 Pingfeng Luo`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`*`
			`*/`

			`#ifndef SHERPA_ONNX_CSRC_HYPOTHESIS_H_`
			`#define SHERPA_ONNX_CSRC_HYPOTHESIS_H_`

			`#include <sstream>`
			`#include <string>`
			`#include <unordered_map>`
			`#include <utility>`
			`#include <vector>`
Add LODR support to online and offline recognizers (#2026) This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore. - Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id. - Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths. - Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options. 2025-07-09 11:23:46 +03:00			`#include <memory>`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00
Add lm rescore to online-modified-beam-search (#133) 2023-05-05 21:23:54 +08:00			`#include "onnxruntime_cxx_api.h" // NOLINT`
Implement context biasing with a Aho Corasick automata (#145) * Implement context graph * Modify the interface to support context biasing * Support context biasing in modified beam search; add python wrapper * Support context biasing in python api example * Minor fixes * Fix context graph * Minor fixes * Fix tests * Fix style * Fix style * Fix comments * Minor fixes * Add missing header * Replace std::shared_ptr with std::unique_ptr for effciency * Build graph in constructor * Fix comments * Minor fixes * Fix docs 2023-06-16 14:26:36 +08:00			`#include "sherpa-onnx/csrc/context-graph.h"`
Add LODR support to online and offline recognizers (#2026) This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore. - Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id. - Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths. - Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options. 2025-07-09 11:23:46 +03:00			`#include "sherpa-onnx/csrc/lodr-fst.h"`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`#include "sherpa-onnx/csrc/math.h"`
Add lm rescore to online-modified-beam-search (#133) 2023-05-05 21:23:54 +08:00			`#include "sherpa-onnx/csrc/onnx-utils.h"`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00
			`namespace sherpa_onnx {`

			`struct Hypothesis {`
			`// The predicted tokens so far. Newly predicated tokens are appended.`
Code refactoring (#74) * Don't reset model state and feature extractor on endpointing * support passing decoding_method from commandline * Add modified_beam_search to Python API * fix C API example * Fix style issues 2023-03-03 12:10:59 +08:00			`std::vector<int64_t> ys;`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00
			`// timestamps[i] contains the frame number after subsampling`
			`// on which ys[i] is decoded.`
			`std::vector<int32_t> timestamps;`

decoder for open vocabulary keyword spotting (#505) * various fixes to ContextGraph to support open vocabulary keywords decoder * Add keyword spotter runtime * Add binary * First version works * Minor fixes * update text2token * default values * Add jni for kws * add kws android project * Minor fixes * Remove unused interface * Minor fixes * Add workflow * handle extra info in texts * Minor fixes * Add more comments * Fix ci * fix cpp style * Add input box in android demo so that users can specify their keywords * Fix cpp style * Fix comments * Minor fixes * Minor fixes * minor fixes * Minor fixes * Minor fixes * Add CI * Fix code style * cpplint * Fix comments * Fix error 2024-01-20 22:52:41 +08:00			`// The acoustic probability for each token in ys.`
Track token scores (#571) * add export of per-token scores (ys, lm, context) - for best path of the modified-beam-search decoding of transducer * refactoring JSON export of OnlineRecognitionResult, extending pybind11 API of OnlineRecognitionResult * export per-token scores also for greedy-search (online-transducer) - export un-scaled lm_probs (modified-beam search, online-transducer) - polishing * fill lm_probs/context_scores only if LM/ContextGraph is present (make Result smaller) 2024-02-28 23:28:45 +01:00			`// Used for keyword spotting task.`
			`// For transducer mofified beam-search and greedy-search,`
			`// this is filled with log_posterior scores.`
decoder for open vocabulary keyword spotting (#505) * various fixes to ContextGraph to support open vocabulary keywords decoder * Add keyword spotter runtime * Add binary * First version works * Minor fixes * update text2token * default values * Add jni for kws * add kws android project * Minor fixes * Remove unused interface * Minor fixes * Add workflow * handle extra info in texts * Minor fixes * Add more comments * Fix ci * fix cpp style * Add input box in android demo so that users can specify their keywords * Fix cpp style * Fix comments * Minor fixes * Minor fixes * minor fixes * Minor fixes * Minor fixes * Add CI * Fix code style * cpplint * Fix comments * Fix error 2024-01-20 22:52:41 +08:00			`std::vector<float> ys_probs;`

Track token scores (#571) * add export of per-token scores (ys, lm, context) - for best path of the modified-beam-search decoding of transducer * refactoring JSON export of OnlineRecognitionResult, extending pybind11 API of OnlineRecognitionResult * export per-token scores also for greedy-search (online-transducer) - export un-scaled lm_probs (modified-beam search, online-transducer) - polishing * fill lm_probs/context_scores only if LM/ContextGraph is present (make Result smaller) 2024-02-28 23:28:45 +01:00			`// lm_probs[i] contains the lm score for each token in ys.`
			`// Used only in transducer mofified beam-search.`
			`// Elements filled only if LM is used.`
			`std::vector<float> lm_probs;`

			`// context_scores[i] contains the context-graph score for each token in ys.`
			`// Used only in transducer mofified beam-search.`
			// Elements filled only if `ContextGraph` is used.
			`std::vector<float> context_scores;`

add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`// The total score of ys in log space.`
Add RNN LM rescore for offline ASR with modified_beam_search (#125) 2023-04-23 17:15:18 +08:00			`// It contains only acoustic scores`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`double log_prob = 0;`

Add RNN LM rescore for offline ASR with modified_beam_search (#125) 2023-04-23 17:15:18 +08:00			`// LM log prob if any.`
			`double lm_log_prob = 0;`

Re-implement LM rescore for online transducer (#1231) Co-authored-by: Martins Kronis <martins.kuznecovs@tilde.lv> 2024-09-06 05:01:25 +03:00			`// the nn lm score for next token given the current ys,`
			`// when using shallow fusion`
add shallow fusion (#147) 2023-05-10 22:30:57 +08:00			`CopyableOrtValue nn_lm_scores;`
Re-implement LM rescore for online transducer (#1231) Co-authored-by: Martins Kronis <martins.kuznecovs@tilde.lv> 2024-09-06 05:01:25 +03:00
			`// cur scored tokens by RNN LM, when rescoring`
			`int32_t cur_scored_pos = 0;`

add shallow fusion (#147) 2023-05-10 22:30:57 +08:00			`// the nn lm states`
Add lm rescore to online-modified-beam-search (#133) 2023-05-05 21:23:54 +08:00			`std::vector<CopyableOrtValue> nn_lm_states;`

Add LODR support to online and offline recognizers (#2026) This PR integrates LODR (Level-Ordered Deterministic Rescoring) support from Icefall into both online and offline recognizers, enabling LODR for LM shallow fusion and LM rescore. - Extended OnlineLMConfig and OfflineLMConfig to include lodr_fst, lodr_scale, and lodr_backoff_id. - Implemented LodrFst and LodrStateCost classes and wired them into RNN LM scoring in both online and offline code paths. - Updated Python bindings, CLI entry points, examples, and CI test scripts to accept and exercise the new LODR options. 2025-07-09 11:23:46 +03:00			`// the LODR states`
			`std::shared_ptr<LodrStateCost> lodr_state;`

Implement context biasing with a Aho Corasick automata (#145) * Implement context graph * Modify the interface to support context biasing * Support context biasing in modified beam search; add python wrapper * Support context biasing in python api example * Minor fixes * Fix context graph * Minor fixes * Fix tests * Fix style * Fix style * Fix comments * Minor fixes * Add missing header * Replace std::shared_ptr with std::unique_ptr for effciency * Build graph in constructor * Fix comments * Minor fixes * Fix docs 2023-06-16 14:26:36 +08:00			`const ContextState *context_state;`

			`// TODO(fangjun): Make it configurable`
			`// the minimum of tokens in a chunk for streaming RNN LM`
			`int32_t lm_rescore_min_chunk = 2; // a const`

add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`int32_t num_trailing_blanks = 0;`

			`Hypothesis() = default;`
Implement context biasing with a Aho Corasick automata (#145) * Implement context graph * Modify the interface to support context biasing * Support context biasing in modified beam search; add python wrapper * Support context biasing in python api example * Minor fixes * Fix context graph * Minor fixes * Fix tests * Fix style * Fix style * Fix comments * Minor fixes * Add missing header * Replace std::shared_ptr with std::unique_ptr for effciency * Build graph in constructor * Fix comments * Minor fixes * Fix docs 2023-06-16 14:26:36 +08:00			`Hypothesis(const std::vector<int64_t> &ys, double log_prob,`
			`const ContextState *context_state = nullptr)`
			`: ys(ys), log_prob(log_prob), context_state(context_state) {}`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00
Add RNN LM rescore for offline ASR with modified_beam_search (#125) 2023-04-23 17:15:18 +08:00			`double TotalLogProb() const { return log_prob + lm_log_prob; }`

add modified beam search (#69) 2023-03-01 15:32:54 +08:00			// If two Hypotheses have the same `Key`, then they contain
			`// the same token sequence.`
			`std::string Key() const {`
			`// TODO(fangjun): Use a hash function?`
			`std::ostringstream os;`
Updated hypothesis key generation to be the same as sherpa (#226) 2023-07-27 23:19:49 -07:00			`std::string sep;`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`for (auto i : ys) {`
Updated hypothesis key generation to be the same as sherpa (#226) 2023-07-27 23:19:49 -07:00			`os << sep << i;`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`sep = "-";`
			`}`
			`return os.str();`
			`}`

			`// For debugging`
			`std::string ToString() const {`
			`std::ostringstream os;`
			`os << "(" << Key() << ", " << log_prob << ")";`
			`return os.str();`
			`}`
			`};`

			`class Hypotheses {`
			`public:`
			`Hypotheses() = default;`

			`explicit Hypotheses(std::vector<Hypothesis> hyps) {`
			`for (auto &h : hyps) {`
			`hyps_dict_[h.Key()] = std::move(h);`
			`}`
			`}`

			`explicit Hypotheses(std::unordered_map<std::string, Hypothesis> hyps_dict)`
			`: hyps_dict_(std::move(hyps_dict)) {}`

			`// Add hyp to this object. If it already exists, its log_prob`
			`// is updated with the given hyp using log-sum-exp.`
			`void Add(Hypothesis hyp);`

			`// Get the hyp that has the largest log_prob.`
			`// If length_norm is true, hyp's log_prob is divided by`
			`// len(hyp.ys) before comparison.`
			`Hypothesis GetMostProbable(bool length_norm) const;`

			`// Get the k hyps that have the largest log_prob.`
			`// If length_norm is true, hyp's log_prob is divided by`
			`// len(hyp.ys) before comparison.`
			`std::vector<Hypothesis> GetTopK(int32_t k, bool length_norm) const;`

			`int32_t Size() const { return hyps_dict_.size(); }`

			`std::string ToString() const {`
			`std::ostringstream os;`
			`for (const auto &p : hyps_dict_) {`
			`os << p.second.ToString() << "\n";`
			`}`
			`return os.str();`
			`}`

Add address sanitizer and undefined behavior sanitizer (#951) 2024-05-31 13:17:01 +08:00			`auto begin() const { return hyps_dict_.begin(); }`
			`auto end() const { return hyps_dict_.end(); }`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00
Add RNN LM rescore for offline ASR with modified_beam_search (#125) 2023-04-23 17:15:18 +08:00			`auto begin() { return hyps_dict_.begin(); }`
			`auto end() { return hyps_dict_.end(); }`

add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`void Clear() { hyps_dict_.clear(); }`

			`// Return a list of hyps contained in this object.`
			`std::vector<Hypothesis> Vec() const {`
			`std::vector<Hypothesis> ans;`
			`ans.reserve(hyps_dict_.size());`
			`for (const auto &p : hyps_dict_) {`
			`ans.push_back(p.second);`
			`}`
			`return ans;`
			`}`

			`private:`
			`using Map = std ::unordered_map<std::string, Hypothesis>;`
			`Map hyps_dict_;`
			`};`

share GetHypsRowSplits interface and fix getting Topk not taking logprob (#131) 2023-04-26 11:41:04 +08:00			`const std::vector<int32_t> GetHypsRowSplits(`
			`const std::vector<Hypotheses> &hyps);`

add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`} // namespace sherpa_onnx`

			`#endif // SHERPA_ONNX_CSRC_HYPOTHESIS_H_`