add modified beam search (#69)

2023-03-01 15:32:54 +08:00
parent e0b76655c8
commit 5326d0f81f
19 changed files with 614 additions and 87 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -34,3 +34,4 @@ decode-file
 tokens.txt
 *.onnx
 log.txt
 tags
--- a/sherpa-onnx/csrc/CMakeLists.txt
+++ b/sherpa-onnx/csrc/CMakeLists.txt
@@ -5,11 +5,13 @@ set(sources
  endpoint.cc
  features.cc
  file-utils.cc
  hypothesis.cc
  online-lstm-transducer-model.cc
  online-recognizer.cc
  online-stream.cc
  online-transducer-greedy-search-decoder.cc
  online-transducer-model-config.cc
  online-transducer-modified-beam-search-decoder.cc
  online-transducer-model.cc
  online-zipformer-transducer-model.cc
  onnx-utils.cc
--- a/sherpa-onnx/csrc/hypothesis.cc
+++ b/sherpa-onnx/csrc/hypothesis.cc
@@ -0,0 +1,65 @@
 /**
 * Copyright (c)  2023  Xiaomi Corporation
 *
 */
 #include "sherpa-onnx/csrc/hypothesis.h"
 #include <algorithm>
 #include <utility>
 namespace sherpa_onnx {
 void Hypotheses::Add(Hypothesis hyp) {
  auto key = hyp.Key();
  auto it = hyps_dict_.find(key);
  if (it == hyps_dict_.end()) {
    hyps_dict_[key] = std::move(hyp);
  } else {
    it->second.log_prob = LogAdd<double>()(it->second.log_prob, hyp.log_prob);
  }
 }
 Hypothesis Hypotheses::GetMostProbable(bool length_norm) const {
  if (length_norm == false) {
    return std::max_element(hyps_dict_.begin(), hyps_dict_.end(),
                            [](const auto &left, auto &right) -> bool {
                              return left.second.log_prob <
                                     right.second.log_prob;
                            })
        ->second;
  } else {
    // for length_norm is true
    return std::max_element(
               hyps_dict_.begin(), hyps_dict_.end(),
               [](const auto &left, const auto &right) -> bool {
                 return left.second.log_prob / left.second.ys.size() <
                        right.second.log_prob / right.second.ys.size();
               })
        ->second;
  }
 }
 std::vector<Hypothesis> Hypotheses::GetTopK(int32_t k, bool length_norm) const {
  k = std::max(k, 1);
  k = std::min(k, Size());
  std::vector<Hypothesis> all_hyps = Vec();
  if (length_norm == false) {
    std::partial_sort(
        all_hyps.begin(), all_hyps.begin() + k, all_hyps.end(),
        [](const auto &a, const auto &b) { return a.log_prob > b.log_prob; });
  } else {
    // for length_norm is true
    std::partial_sort(all_hyps.begin(), all_hyps.begin() + k, all_hyps.end(),
                      [](const auto &a, const auto &b) {
                        return a.log_prob / a.ys.size() >
                               b.log_prob / b.ys.size();
                      });
  }
  return {all_hyps.begin(), all_hyps.begin() + k};
 }
 }  // namespace sherpa_onnx
--- a/sherpa-onnx/csrc/hypothesis.h
+++ b/sherpa-onnx/csrc/hypothesis.h
@@ -0,0 +1,117 @@
 /**
 * Copyright (c)  2023  Xiaomi Corporation
 *
 */
 #ifndef SHERPA_ONNX_CSRC_HYPOTHESIS_H_
 #define SHERPA_ONNX_CSRC_HYPOTHESIS_H_
 #include <sstream>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 #include "sherpa-onnx/csrc/math.h"
 namespace sherpa_onnx {
 struct Hypothesis {
  // The predicted tokens so far. Newly predicated tokens are appended.
  std::vector<int32_t> ys;
  // timestamps[i] contains the frame number after subsampling
  // on which ys[i] is decoded.
  std::vector<int32_t> timestamps;
  // The total score of ys in log space.
  double log_prob = 0;
  int32_t num_trailing_blanks = 0;
  Hypothesis() = default;
  Hypothesis(const std::vector<int32_t> &ys, double log_prob)
      : ys(ys), log_prob(log_prob) {}
  // If two Hypotheses have the same `Key`, then they contain
  // the same token sequence.
  std::string Key() const {
    // TODO(fangjun): Use a hash function?
    std::ostringstream os;
    std::string sep = "-";
    for (auto i : ys) {
      os << i << sep;
      sep = "-";
    }
    return os.str();
  }
  // For debugging
  std::string ToString() const {
    std::ostringstream os;
    os << "(" << Key() << ", " << log_prob << ")";
    return os.str();
  }
 };
 class Hypotheses {
 public:
  Hypotheses() = default;
  explicit Hypotheses(std::vector<Hypothesis> hyps) {
    for (auto &h : hyps) {
      hyps_dict_[h.Key()] = std::move(h);
    }
  }
  explicit Hypotheses(std::unordered_map<std::string, Hypothesis> hyps_dict)
      : hyps_dict_(std::move(hyps_dict)) {}
  // Add hyp to this object. If it already exists, its log_prob
  // is updated with the given hyp using log-sum-exp.
  void Add(Hypothesis hyp);
  // Get the hyp that has the largest log_prob.
  // If length_norm is true, hyp's log_prob is divided by
  // len(hyp.ys) before comparison.
  Hypothesis GetMostProbable(bool length_norm) const;
  // Get the k hyps that have the largest log_prob.
  // If length_norm is true, hyp's log_prob is divided by
  // len(hyp.ys) before comparison.
  std::vector<Hypothesis> GetTopK(int32_t k, bool length_norm) const;
  int32_t Size() const { return hyps_dict_.size(); }
  std::string ToString() const {
    std::ostringstream os;
    for (const auto &p : hyps_dict_) {
      os << p.second.ToString() << "\n";
    }
    return os.str();
  }
  const auto begin() const { return hyps_dict_.begin(); }
  const auto end() const { return hyps_dict_.end(); }
  void Clear() { hyps_dict_.clear(); }
 private:
  // Return a list of hyps contained in this object.
  std::vector<Hypothesis> Vec() const {
    std::vector<Hypothesis> ans;
    ans.reserve(hyps_dict_.size());
    for (const auto &p : hyps_dict_) {
      ans.push_back(p.second);
    }
    return ans;
  }
 private:
  using Map = std ::unordered_map<std::string, Hypothesis>;
  Map hyps_dict_;
 };
 }  // namespace sherpa_onnx
 #endif  // SHERPA_ONNX_CSRC_HYPOTHESIS_H_
--- a/sherpa-onnx/csrc/math.h
+++ b/sherpa-onnx/csrc/math.h
@@ -0,0 +1,107 @@
 /**
 * Copyright (c)  2022  Xiaomi Corporation (authors: Daniel Povey)
 * Copyright (c)  2023                     (Pingfeng Luo)
 *
 */
 // This file is copied from k2/csrc/utils.h
 #ifndef SHERPA_ONNX_CSRC_MATH_H_
 #define SHERPA_ONNX_CSRC_MATH_H_
 #include <algorithm>
 #include <cassert>
 #include <cmath>
 #include <numeric>
 #include <vector>
 namespace sherpa_onnx {
 // logf(FLT_EPSILON)
 #define SHERPA_ONNX_MIN_LOG_DIFF_FLOAT -15.9423847198486328125f
 // log(DBL_EPSILON)
 #define SHERPA_ONNX_MIN_LOG_DIFF_DOUBLE \
  -36.0436533891171535515240975655615329742431640625
 template <typename T>
 struct LogAdd;
 template <>
 struct LogAdd<double> {
  double operator()(double x, double y) const {
    double diff;
    if (x < y) {
      diff = x - y;
      x = y;
    } else {
      diff = y - x;
    }
    // diff is negative.  x is now the larger one.
    if (diff >= SHERPA_ONNX_MIN_LOG_DIFF_DOUBLE) {
      double res;
      res = x + log1p(exp(diff));
      return res;
    }
    return x;  // return the larger one.
  }
 };
 template <>
 struct LogAdd<float> {
  float operator()(float x, float y) const {
    float diff;
    if (x < y) {
      diff = x - y;
      x = y;
    } else {
      diff = y - x;
    }
    // diff is negative.  x is now the larger one.
    if (diff >= SHERPA_ONNX_MIN_LOG_DIFF_DOUBLE) {
      float res;
      res = x + log1pf(expf(diff));
      return res;
    }
    return x;  // return the larger one.
  }
 };
 template <class T>
 void LogSoftmax(T *input, int32_t input_len) {
  assert(input);
  T m = *std::max_element(input, input + input_len);
  T sum = 0.0;
  for (int32_t i = 0; i < input_len; i++) {
    sum += exp(input[i] - m);
  }
  T offset = m + log(sum);
  for (int32_t i = 0; i < input_len; i++) {
    input[i] -= offset;
  }
 }
 template <class T>
 std::vector<int32_t> TopkIndex(const T *vec, int32_t size, int32_t topk) {
  std::vector<int32_t> vec_index(size);
  std::iota(vec_index.begin(), vec_index.end(), 0);
  std::sort(vec_index.begin(), vec_index.end(),
            [vec](int32_t index_1, int32_t index_2) {
              return vec[index_1] > vec[index_2];
            });
  int32_t k_num = std::min<int32_t>(size, topk);
  std::vector<int32_t> index(vec_index.begin(), vec_index.begin() + k_num);
  return index;
 }
 }  // namespace sherpa_onnx
 #endif  // SHERPA_ONNX_CSRC_MATH_H_
--- a/sherpa-onnx/csrc/online-lstm-transducer-model.cc
+++ b/sherpa-onnx/csrc/online-lstm-transducer-model.cc
@@ -247,24 +247,6 @@ OnlineLstmTransducerModel::RunEncoder(Ort::Value features,
  return {std::move(encoder_out[0]), std::move(next_states)};
 }
 Ort::Value OnlineLstmTransducerModel::BuildDecoderInput(
    const std::vector<OnlineTransducerDecoderResult> &results) {
  int32_t batch_size = static_cast<int32_t>(results.size());
  std::array<int64_t, 2> shape{batch_size, context_size_};
  Ort::Value decoder_input =
      Ort::Value::CreateTensor<int64_t>(allocator_, shape.data(), shape.size());
  int64_t *p = decoder_input.GetTensorMutableData<int64_t>();
  for (const auto &r : results) {
    const int64_t *begin = r.tokens.data() + r.tokens.size() - context_size_;
    const int64_t *end = r.tokens.data() + r.tokens.size();
    std::copy(begin, end, p);
    p += context_size_;
  }
  return decoder_input;
 }
 Ort::Value OnlineLstmTransducerModel::RunDecoder(Ort::Value decoder_input) {
  auto decoder_out = decoder_sess_->Run(
      {}, decoder_input_names_ptr_.data(), &decoder_input, 1,
--- a/sherpa-onnx/csrc/online-lstm-transducer-model.h
+++ b/sherpa-onnx/csrc/online-lstm-transducer-model.h
@@ -40,9 +40,6 @@ class OnlineLstmTransducerModel : public OnlineTransducerModel {
  std::pair<Ort::Value, std::vector<Ort::Value>> RunEncoder(
      Ort::Value features, std::vector<Ort::Value> states) override;
  Ort::Value BuildDecoderInput(
      const std::vector<OnlineTransducerDecoderResult> &results) override;
  Ort::Value RunDecoder(Ort::Value decoder_input) override;
  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) override;
--- a/sherpa-onnx/csrc/online-recognizer.cc
+++ b/sherpa-onnx/csrc/online-recognizer.cc
@@ -1,6 +1,7 @@
 // sherpa-onnx/csrc/online-recognizer.cc
 //
 // Copyright (c)  2023  Xiaomi Corporation
 // Copyright (c)  2023  Pingfeng Luo
 #include "sherpa-onnx/csrc/online-recognizer.h"
@@ -16,6 +17,7 @@
 #include "sherpa-onnx/csrc/online-transducer-decoder.h"
 #include "sherpa-onnx/csrc/online-transducer-greedy-search-decoder.h"
 #include "sherpa-onnx/csrc/online-transducer-model.h"
 #include "sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.h"
 #include "sherpa-onnx/csrc/symbol-table.h"
 namespace sherpa_onnx {
@@ -39,6 +41,11 @@ void OnlineRecognizerConfig::Register(ParseOptions *po) {
  po->Register("enable-endpoint", &enable_endpoint,
               "True to enable endpoint detection. False to disable it.");
  po->Register("max-active-paths", &max_active_paths,
               "beam size used in modified beam search.");
  po->Register("decoding-mothod", &decoding_method,
               "decoding method,"
               "now support greedy_search and modified_beam_search.");
 }
 bool OnlineRecognizerConfig::Validate() const {
@@ -52,7 +59,9 @@ std::string OnlineRecognizerConfig::ToString() const {
  os << "feat_config=" << feat_config.ToString() << ", ";
  os << "model_config=" << model_config.ToString() << ", ";
  os << "endpoint_config=" << endpoint_config.ToString() << ", ";
-  os << "enable_endpoint=" << (enable_endpoint ? "True" : "False") << ")";
+  os << "enable_endpoint=" << (enable_endpoint ? "True" : "False") << ",";
  os << "max_active_paths=" << max_active_paths << ",";
  os << "decoding_method=\"" << decoding_method << "\")";
  return os.str();
 }
@@ -64,8 +73,17 @@ class OnlineRecognizer::Impl {
        model_(OnlineTransducerModel::Create(config.model_config)),
        sym_(config.model_config.tokens),
        endpoint_(config_.endpoint_config) {
-    decoder_ =
+    if (config.decoding_method == "modified_beam_search") {
-        std::make_unique<OnlineTransducerGreedySearchDecoder>(model_.get());
+      decoder_ = std::make_unique<OnlineTransducerModifiedBeamSearchDecoder>(
          model_.get(), config_.max_active_paths);
    } else if (config.decoding_method == "greedy_search") {
      decoder_ =
          std::make_unique<OnlineTransducerGreedySearchDecoder>(model_.get());
    } else {
      fprintf(stderr, "Unsupported decoding method: %s\n",
              config.decoding_method.c_str());
      exit(-1);
    }
  }
 #if __ANDROID_API__ >= 9
@@ -74,8 +92,17 @@ class OnlineRecognizer::Impl {
        model_(OnlineTransducerModel::Create(mgr, config.model_config)),
        sym_(mgr, config.model_config.tokens),
        endpoint_(config_.endpoint_config) {
-    decoder_ =
+    if (config.decoding_method == "modified_beam_search") {
-        std::make_unique<OnlineTransducerGreedySearchDecoder>(model_.get());
+      decoder_ = std::make_unique<OnlineTransducerModifiedBeamSearchDecoder>(
          model_.get(), config_.max_active_paths);
    } else if (config.decoding_method == "greedy_search") {
      decoder_ =
          std::make_unique<OnlineTransducerGreedySearchDecoder>(model_.get());
    } else {
      fprintf(stderr, "Unsupported decoding method: %s\n",
              config.decoding_method.c_str());
      exit(-1);
    }
  }
 #endif
--- a/sherpa-onnx/csrc/online-recognizer.h
+++ b/sherpa-onnx/csrc/online-recognizer.h
@@ -32,7 +32,11 @@ struct OnlineRecognizerConfig {
  FeatureExtractorConfig feat_config;
  OnlineTransducerModelConfig model_config;
  EndpointConfig endpoint_config;
-  bool enable_endpoint;
+  bool enable_endpoint = true;
  int32_t max_active_paths = 4;
  std::string decoding_method = "modified_beam_search";
  // now support modified_beam_search and greedy_search
  OnlineRecognizerConfig() = default;
--- a/sherpa-onnx/csrc/online-transducer-decoder.h
+++ b/sherpa-onnx/csrc/online-transducer-decoder.h
@@ -8,6 +8,7 @@
 #include <vector>
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/hypothesis.h"
 namespace sherpa_onnx {
@@ -17,6 +18,9 @@ struct OnlineTransducerDecoderResult {
  /// number of trailing blank frames decoded so far
  int32_t num_trailing_blanks = 0;
  // used only in modified beam_search
  Hypotheses hyps;
 };
 class OnlineTransducerDecoder {
--- a/sherpa-onnx/csrc/online-transducer-greedy-search-decoder.cc
+++ b/sherpa-onnx/csrc/online-transducer-greedy-search-decoder.cc
@@ -4,8 +4,6 @@
 #include "sherpa-onnx/csrc/online-transducer-greedy-search-decoder.h"
 #include <assert.h>
 #include <algorithm>
 #include <utility>
 #include <vector>
@@ -15,39 +13,6 @@
 namespace sherpa_onnx {
 static Ort::Value GetFrame(OrtAllocator *allocator, Ort::Value *encoder_out,
                           int32_t t) {
  std::vector<int64_t> encoder_out_shape =
      encoder_out->GetTensorTypeAndShapeInfo().GetShape();
  auto batch_size = encoder_out_shape[0];
  auto num_frames = encoder_out_shape[1];
  assert(t < num_frames);
  auto encoder_out_dim = encoder_out_shape[2];
  auto offset = num_frames * encoder_out_dim;
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
  std::array<int64_t, 2> shape{batch_size, encoder_out_dim};
  Ort::Value ans =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *dst = ans.GetTensorMutableData<float>();
  const float *src = encoder_out->GetTensorData<float>();
  for (int32_t i = 0; i != batch_size; ++i) {
    std::copy(src + t * encoder_out_dim, src + (t + 1) * encoder_out_dim, dst);
    src += offset;
    dst += encoder_out_dim;
  }
  return ans;
 }
 OnlineTransducerDecoderResult
 OnlineTransducerGreedySearchDecoder::GetEmptyResult() const {
  int32_t context_size = model_->ContextSize();
@@ -90,7 +55,8 @@ void OnlineTransducerGreedySearchDecoder::Decode(
  Ort::Value decoder_out = model_->RunDecoder(std::move(decoder_input));
  for (int32_t t = 0; t != num_frames; ++t) {
-    Ort::Value cur_encoder_out = GetFrame(model_->Allocator(), &encoder_out, t);
+    Ort::Value cur_encoder_out =
        GetEncoderOutFrame(model_->Allocator(), &encoder_out, t);
    Ort::Value logit = model_->RunJoiner(
        std::move(cur_encoder_out), Clone(model_->Allocator(), &decoder_out));
--- a/sherpa-onnx/csrc/online-transducer-model.cc
+++ b/sherpa-onnx/csrc/online-transducer-model.cc
@@ -1,6 +1,7 @@
 // sherpa-onnx/csrc/online-transducer-model.cc
 //
 // Copyright (c)  2023  Xiaomi Corporation
 // Copyright (c)  2023  Pingfeng Luo
 #include "sherpa-onnx/csrc/online-transducer-model.h"
 #if __ANDROID_API__ >= 9
@@ -8,6 +9,7 @@
 #include "android/asset_manager_jni.h"
 #endif
 #include <algorithm>
 #include <memory>
 #include <sstream>
 #include <string>
@@ -75,6 +77,40 @@ std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
  return nullptr;
 }
 Ort::Value OnlineTransducerModel::BuildDecoderInput(
    const std::vector<OnlineTransducerDecoderResult> &results) {
  int32_t batch_size = static_cast<int32_t>(results.size());
  int32_t context_size = ContextSize();
  std::array<int64_t, 2> shape{batch_size, context_size};
  Ort::Value decoder_input = Ort::Value::CreateTensor<int64_t>(
      Allocator(), shape.data(), shape.size());
  int64_t *p = decoder_input.GetTensorMutableData<int64_t>();
  for (const auto &r : results) {
    const int64_t *begin = r.tokens.data() + r.tokens.size() - context_size;
    const int64_t *end = r.tokens.data() + r.tokens.size();
    std::copy(begin, end, p);
    p += context_size;
  }
  return decoder_input;
 }
 Ort::Value OnlineTransducerModel::BuildDecoderInput(
    const std::vector<Hypothesis> &hyps) {
  int32_t batch_size = static_cast<int32_t>(hyps.size());
  int32_t context_size = ContextSize();
  std::array<int64_t, 2> shape{batch_size, context_size};
  Ort::Value decoder_input = Ort::Value::CreateTensor<int64_t>(
      Allocator(), shape.data(), shape.size());
  int64_t *p = decoder_input.GetTensorMutableData<int64_t>();
  for (const auto &h : hyps) {
    std::copy(h.ys.end() - context_size, h.ys.end(), p);
    p += context_size;
  }
  return decoder_input;
 }
 #if __ANDROID_API__ >= 9
 std::unique_ptr<OnlineTransducerModel> OnlineTransducerModel::Create(
    AAssetManager *mgr, const OnlineTransducerModelConfig &config) {
--- a/sherpa-onnx/csrc/online-transducer-model.h
+++ b/sherpa-onnx/csrc/online-transducer-model.h
@@ -14,6 +14,8 @@
 #endif
 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/hypothesis.h"
 #include "sherpa-onnx/csrc/online-transducer-decoder.h"
 #include "sherpa-onnx/csrc/online-transducer-model-config.h"
 namespace sherpa_onnx {
@@ -71,9 +73,6 @@ class OnlineTransducerModel {
      Ort::Value features,
      std::vector<Ort::Value> states) = 0;  // NOLINT
  virtual Ort::Value BuildDecoderInput(
      const std::vector<OnlineTransducerDecoderResult> &results) = 0;
  /** Run the decoder network.
   *
   * Caution: We assume there are no recurrent connections in the decoder and
@@ -125,7 +124,13 @@ class OnlineTransducerModel {
  virtual int32_t VocabSize() const = 0;
  virtual int32_t SubsamplingFactor() const { return 4; }
  virtual OrtAllocator *Allocator() = 0;
  Ort::Value BuildDecoderInput(
      const std::vector<OnlineTransducerDecoderResult> &results);
  Ort::Value BuildDecoderInput(const std::vector<Hypothesis> &hyps);
 };
 }  // namespace sherpa_onnx
--- a/sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc
+++ b/sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc
@@ -0,0 +1,154 @@
 // sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.cc
 //
 // Copyright (c)  2023  Pingfeng Luo
 // Copyright (c)  2023  Xiaomi Corporation
 #include "sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.h"
 #include <algorithm>
 #include <utility>
 #include <vector>
 #include "sherpa-onnx/csrc/onnx-utils.h"
 namespace sherpa_onnx {
 static Ort::Value Repeat(OrtAllocator *allocator, Ort::Value *cur_encoder_out,
                         const std::vector<int32_t> &hyps_num_split) {
  std::vector<int64_t> cur_encoder_out_shape =
      cur_encoder_out->GetTensorTypeAndShapeInfo().GetShape();
  std::array<int64_t, 2> ans_shape{hyps_num_split.back(),
                                   cur_encoder_out_shape[1]};
  Ort::Value ans = Ort::Value::CreateTensor<float>(allocator, ans_shape.data(),
                                                   ans_shape.size());
  const float *src = cur_encoder_out->GetTensorData<float>();
  float *dst = ans.GetTensorMutableData<float>();
  int32_t batch_size = static_cast<int32_t>(hyps_num_split.size()) - 1;
  for (int32_t b = 0; b != batch_size; ++b) {
    int32_t cur_stream_hyps_num = hyps_num_split[b + 1] - hyps_num_split[b];
    for (int32_t i = 0; i != cur_stream_hyps_num; ++i) {
      std::copy(src, src + cur_encoder_out_shape[1], dst);
      dst += cur_encoder_out_shape[1];
    }
    src += cur_encoder_out_shape[1];
  }
  return ans;
 }
 static void LogSoftmax(float *in, int32_t w, int32_t h) {
  for (int32_t i = 0; i != h; ++i) {
    LogSoftmax(in, w);
    in += w;
  }
 }
 OnlineTransducerDecoderResult
 OnlineTransducerModifiedBeamSearchDecoder::GetEmptyResult() const {
  int32_t context_size = model_->ContextSize();
  int32_t blank_id = 0;  // always 0
  OnlineTransducerDecoderResult r;
  std::vector<int32_t> blanks(context_size, blank_id);
  Hypotheses blank_hyp({{blanks, 0}});
  r.hyps = std::move(blank_hyp);
  return r;
 }
 void OnlineTransducerModifiedBeamSearchDecoder::StripLeadingBlanks(
    OnlineTransducerDecoderResult *r) const {
  int32_t context_size = model_->ContextSize();
  auto hyp = r->hyps.GetMostProbable(true);
  std::vector<int64_t> tokens(hyp.ys.begin() + context_size, hyp.ys.end());
  r->tokens = std::move(tokens);
  r->num_trailing_blanks = hyp.num_trailing_blanks;
 }
 void OnlineTransducerModifiedBeamSearchDecoder::Decode(
    Ort::Value encoder_out,
    std::vector<OnlineTransducerDecoderResult> *result) {
  std::vector<int64_t> encoder_out_shape =
      encoder_out.GetTensorTypeAndShapeInfo().GetShape();
  if (encoder_out_shape[0] != result->size()) {
    fprintf(stderr,
            "Size mismatch! encoder_out.size(0) %d, result.size(0): %d\n",
            static_cast<int32_t>(encoder_out_shape[0]),
            static_cast<int32_t>(result->size()));
    exit(-1);
  }
  int32_t batch_size = static_cast<int32_t>(encoder_out_shape[0]);
  int32_t num_frames = static_cast<int32_t>(encoder_out_shape[1]);
  int32_t vocab_size = model_->VocabSize();
  std::vector<Hypotheses> cur;
  for (auto &r : *result) {
    cur.push_back(std::move(r.hyps));
  }
  std::vector<Hypothesis> prev;
  for (int32_t t = 0; t != num_frames; ++t) {
    // Due to merging paths with identical token sequences,
    // not all utterances have "num_active_paths" paths.
    int32_t hyps_num_acc = 0;
    std::vector<int32_t> hyps_num_split;
    hyps_num_split.push_back(0);
    prev.clear();
    for (auto &hyps : cur) {
      for (auto &h : hyps) {
        prev.push_back(std::move(h.second));
        hyps_num_acc++;
      }
      hyps_num_split.push_back(hyps_num_acc);
    }
    cur.clear();
    cur.reserve(batch_size);
    Ort::Value decoder_input = model_->BuildDecoderInput(prev);
    Ort::Value decoder_out = model_->RunDecoder(std::move(decoder_input));
    Ort::Value cur_encoder_out =
        GetEncoderOutFrame(model_->Allocator(), &encoder_out, t);
    cur_encoder_out =
        Repeat(model_->Allocator(), &cur_encoder_out, hyps_num_split);
    Ort::Value logit = model_->RunJoiner(
        std::move(cur_encoder_out), Clone(model_->Allocator(), &decoder_out));
    float *p_logit = logit.GetTensorMutableData<float>();
    for (int32_t b = 0; b < batch_size; ++b) {
      int32_t start = hyps_num_split[b];
      int32_t end = hyps_num_split[b + 1];
      LogSoftmax(p_logit, vocab_size, (end - start));
      auto topk =
          TopkIndex(p_logit, vocab_size * (end - start), max_active_paths_);
      Hypotheses hyps;
      for (auto i : topk) {
        int32_t hyp_index = i / vocab_size + start;
        int32_t new_token = i % vocab_size;
        Hypothesis new_hyp = prev[hyp_index];
        if (new_token != 0) {
          new_hyp.ys.push_back(new_token);
          new_hyp.num_trailing_blanks = 0;
        } else {
          ++new_hyp.num_trailing_blanks;
        }
        new_hyp.log_prob += p_logit[i];
        hyps.Add(std::move(new_hyp));
      }
      cur.push_back(std::move(hyps));
      p_logit += vocab_size * (end - start);
    }
  }
  for (int32_t b = 0; b != batch_size; ++b) {
    (*result)[b].hyps = std::move(cur[b]);
  }
 }
 }  // namespace sherpa_onnx
--- a/sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.h
+++ b/sherpa-onnx/csrc/online-transducer-modified-beam-search-decoder.h
@@ -0,0 +1,37 @@
 // sherpa-onnx/csrc/online-transducer-modified_beam-search-decoder.h
 //
 // Copyright (c)  2023  Pingfeng Luo
 // Copyright (c)  2023  Xiaomi Corporation
 #ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_
 #define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_
 #include <vector>
 #include "sherpa-onnx/csrc/online-transducer-decoder.h"
 #include "sherpa-onnx/csrc/online-transducer-model.h"
 namespace sherpa_onnx {
 class OnlineTransducerModifiedBeamSearchDecoder
    : public OnlineTransducerDecoder {
 public:
  OnlineTransducerModifiedBeamSearchDecoder(OnlineTransducerModel *model,
                                            int32_t max_active_paths)
      : model_(model), max_active_paths_(max_active_paths) {}
  OnlineTransducerDecoderResult GetEmptyResult() const override;
  void StripLeadingBlanks(OnlineTransducerDecoderResult *r) const override;
  void Decode(Ort::Value encoder_out,
              std::vector<OnlineTransducerDecoderResult> *result) override;
 private:
  OnlineTransducerModel *model_;  // Not owned
  int32_t max_active_paths_;
 };
 }  // namespace sherpa_onnx
 #endif  // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_
--- a/sherpa-onnx/csrc/online-zipformer-transducer-model.cc
+++ b/sherpa-onnx/csrc/online-zipformer-transducer-model.cc
@@ -461,24 +461,6 @@ OnlineZipformerTransducerModel::RunEncoder(Ort::Value features,
  return {std::move(encoder_out[0]), std::move(next_states)};
 }
 Ort::Value OnlineZipformerTransducerModel::BuildDecoderInput(
    const std::vector<OnlineTransducerDecoderResult> &results) {
  int32_t batch_size = static_cast<int32_t>(results.size());
  std::array<int64_t, 2> shape{batch_size, context_size_};
  Ort::Value decoder_input =
      Ort::Value::CreateTensor<int64_t>(allocator_, shape.data(), shape.size());
  int64_t *p = decoder_input.GetTensorMutableData<int64_t>();
  for (const auto &r : results) {
    const int64_t *begin = r.tokens.data() + r.tokens.size() - context_size_;
    const int64_t *end = r.tokens.data() + r.tokens.size();
    std::copy(begin, end, p);
    p += context_size_;
  }
  return decoder_input;
 }
 Ort::Value OnlineZipformerTransducerModel::RunDecoder(
    Ort::Value decoder_input) {
  auto decoder_out = decoder_sess_->Run(
--- a/sherpa-onnx/csrc/online-zipformer-transducer-model.h
+++ b/sherpa-onnx/csrc/online-zipformer-transducer-model.h
@@ -41,9 +41,6 @@ class OnlineZipformerTransducerModel : public OnlineTransducerModel {
  std::pair<Ort::Value, std::vector<Ort::Value>> RunEncoder(
      Ort::Value features, std::vector<Ort::Value> states) override;
  Ort::Value BuildDecoderInput(
      const std::vector<OnlineTransducerDecoderResult> &results) override;
  Ort::Value RunDecoder(Ort::Value decoder_input) override;
  Ort::Value RunJoiner(Ort::Value encoder_out, Ort::Value decoder_out) override;
--- a/sherpa-onnx/csrc/onnx-utils.cc
+++ b/sherpa-onnx/csrc/onnx-utils.cc
@@ -44,6 +44,38 @@ void GetOutputNames(Ort::Session *sess, std::vector<std::string> *output_names,
  }
 }
 Ort::Value GetEncoderOutFrame(OrtAllocator *allocator, Ort::Value *encoder_out,
                              int32_t t) {
  std::vector<int64_t> encoder_out_shape =
      encoder_out->GetTensorTypeAndShapeInfo().GetShape();
  auto batch_size = encoder_out_shape[0];
  auto num_frames = encoder_out_shape[1];
  assert(t < num_frames);
  auto encoder_out_dim = encoder_out_shape[2];
  auto offset = num_frames * encoder_out_dim;
  auto memory_info =
      Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
  std::array<int64_t, 2> shape{batch_size, encoder_out_dim};
  Ort::Value ans =
      Ort::Value::CreateTensor<float>(allocator, shape.data(), shape.size());
  float *dst = ans.GetTensorMutableData<float>();
  const float *src = encoder_out->GetTensorData<float>();
  for (int32_t i = 0; i != batch_size; ++i) {
    std::copy(src + t * encoder_out_dim, src + (t + 1) * encoder_out_dim, dst);
    src += offset;
    dst += encoder_out_dim;
  }
  return ans;
 }
 void PrintModelMetadata(std::ostream &os, const Ort::ModelMetadata &meta_data) {
  Ort::AllocatorWithDefaultOptions allocator;
  std::vector<Ort::AllocatedStringPtr> v =
--- a/sherpa-onnx/csrc/onnx-utils.h
+++ b/sherpa-onnx/csrc/onnx-utils.h
@@ -10,6 +10,7 @@
 #include <locale>
 #endif
 #include <cassert>
 #include <ostream>
 #include <string>
 #include <vector>
@@ -57,6 +58,17 @@ void GetInputNames(Ort::Session *sess, std::vector<std::string> *input_names,
 void GetOutputNames(Ort::Session *sess, std::vector<std::string> *output_names,
                    std::vector<const char *> *output_names_ptr);
 /**
 * Get the output frame of Encoder
 *
 * @param allocator allocator of onnxruntime
 * @param encoder_out encoder out tensor
 * @param t frame_index
 *
 */
 Ort::Value GetEncoderOutFrame(OrtAllocator *allocator, Ort::Value *encoder_out,
                              int32_t t);
 void PrintModelMetadata(std::ostream &os,
                        const Ort::ModelMetadata &meta_data);  // NOLINT