enginex_bi_series-sherpa-onnx/sherpa-onnx/csrc/online-transducer-decoder.h

// sherpa-onnx/csrc/online-transducer-decoder.h
//
// Copyright (c)  2023  Xiaomi Corporation

#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_

#include <vector>

#include "onnxruntime_cxx_api.h"  // NOLINT
#include "sherpa-onnx/csrc/hypothesis.h"
#include "sherpa-onnx/csrc/macros.h"

namespace sherpa_onnx {

struct OnlineTransducerDecoderResult {
  /// Number of frames after subsampling we have decoded so far
  int32_t frame_offset = 0;

  /// The decoded token IDs so far
  std::vector<int64_t> tokens;

  /// number of trailing blank frames decoded so far
  int32_t num_trailing_blanks = 0;

  /// timestamps[i] contains the output frame index where tokens[i] is decoded.
  std::vector<int32_t> timestamps;

  std::vector<float> ys_probs;
  std::vector<float> lm_probs;
  std::vector<float> context_scores;

  // Cache decoder_out for endpointing
  Ort::Value decoder_out;

  // used only in modified beam_search
  Hypotheses hyps;

  OnlineTransducerDecoderResult()
      : tokens{}, num_trailing_blanks(0), decoder_out{nullptr}, hyps{} {}

  OnlineTransducerDecoderResult(const OnlineTransducerDecoderResult &other);

  OnlineTransducerDecoderResult &operator=(
      const OnlineTransducerDecoderResult &other);

  OnlineTransducerDecoderResult(OnlineTransducerDecoderResult &&other) noexcept;

  OnlineTransducerDecoderResult &operator=(
      OnlineTransducerDecoderResult &&other) noexcept;
};

class OnlineStream;
class OnlineTransducerDecoder {
 public:
  virtual ~OnlineTransducerDecoder() = default;

  /* Return an empty result.
   *
   * To simplify the decoding code, we add `context_size` blanks
   * to the beginning of the decoding result, which will be
   * stripped by calling `StripPrecedingBlanks()`.
   */
  virtual OnlineTransducerDecoderResult GetEmptyResult() const = 0;

  /** Strip blanks added by `GetEmptyResult()`.
   *
   * @param r It is changed in-place.
   */
  virtual void StripLeadingBlanks(OnlineTransducerDecoderResult * /*r*/) const {
  }

  /** Run transducer beam search given the output from the encoder model.
   *
   * @param encoder_out A 3-D tensor of shape (N, T, joiner_dim)
   * @param result  It is modified in-place.
   *
   * @note There is no need to pass encoder_out_length here since for the
   * online decoding case, each utterance has the same number of frames
   * and there are no paddings.
   */
  virtual void Decode(Ort::Value encoder_out,
                      std::vector<OnlineTransducerDecoderResult> *result) = 0;

  /** Run transducer beam search given the output from the encoder model.
   *
   * Note: Currently this interface is for contextual-biasing feature which
   *       needs a ContextGraph owned by the OnlineStream.
   *
   * @param encoder_out A 3-D tensor of shape (N, T, joiner_dim)
   * @param ss  A list of OnlineStreams.
   * @param result  It is modified in-place.
   *
   * @note There is no need to pass encoder_out_length here since for the
   * online decoding case, each utterance has the same number of frames
   * and there are no paddings.
   */
  virtual void Decode(Ort::Value /*encoder_out*/, OnlineStream ** /*ss*/,
                      std::vector<OnlineTransducerDecoderResult> * /*result*/) {
    SHERPA_ONNX_LOGE(
        "This interface is for OnlineTransducerModifiedBeamSearchDecoder.");
    exit(-1);
  }

  // used for endpointing. We need to keep decoder_out after reset
  virtual void UpdateDecoderOut(OnlineTransducerDecoderResult * /*result*/) {}
};

}  // namespace sherpa_onnx

#endif  // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_
Add Python API (#31) 2023-02-19 19:36:03 +08:00			`// sherpa-onnx/csrc/online-transducer-decoder.h`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00			`//`
			`// Copyright (c) 2023 Xiaomi Corporation`

			`#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_`
			`#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_`

			`#include <vector>`

			`#include "onnxruntime_cxx_api.h" // NOLINT`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`#include "sherpa-onnx/csrc/hypothesis.h"`
Support contextual-biasing for streaming model (#184) * Support contextual-biasing for streaming model * The whole pipeline runs normally * Fix comments 2023-06-30 16:46:24 +08:00			`#include "sherpa-onnx/csrc/macros.h"`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00
			`namespace sherpa_onnx {`

			`struct OnlineTransducerDecoderResult {`
Add timestamps for streaming ASR. (#123) 2023-04-19 16:02:37 +08:00			`/// Number of frames after subsampling we have decoded so far`
			`int32_t frame_offset = 0;`

Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00			`/// The decoded token IDs so far`
			`std::vector<int64_t> tokens;`
Add endpointing (#54) 2023-02-22 15:35:55 +08:00
			`/// number of trailing blank frames decoded so far`
			`int32_t num_trailing_blanks = 0;`
add modified beam search (#69) 2023-03-01 15:32:54 +08:00
Add timestamps for streaming ASR. (#123) 2023-04-19 16:02:37 +08:00			`/// timestamps[i] contains the output frame index where tokens[i] is decoded.`
			`std::vector<int32_t> timestamps;`

Track token scores (#571) * add export of per-token scores (ys, lm, context) - for best path of the modified-beam-search decoding of transducer * refactoring JSON export of OnlineRecognitionResult, extending pybind11 API of OnlineRecognitionResult * export per-token scores also for greedy-search (online-transducer) - export un-scaled lm_probs (modified-beam search, online-transducer) - polishing * fill lm_probs/context_scores only if LM/ContextGraph is present (make Result smaller) 2024-02-28 23:28:45 +01:00			`std::vector<float> ys_probs;`
			`std::vector<float> lm_probs;`
			`std::vector<float> context_scores;`

Code refactoring (#74) * Don't reset model state and feature extractor on endpointing * support passing decoding_method from commandline * Add modified_beam_search to Python API * fix C API example * Fix style issues 2023-03-03 12:10:59 +08:00			`// Cache decoder_out for endpointing`
			`Ort::Value decoder_out;`

add modified beam search (#69) 2023-03-01 15:32:54 +08:00			`// used only in modified beam_search`
			`Hypotheses hyps;`
Code refactoring (#74) * Don't reset model state and feature extractor on endpointing * support passing decoding_method from commandline * Add modified_beam_search to Python API * fix C API example * Fix style issues 2023-03-03 12:10:59 +08:00
			`OnlineTransducerDecoderResult()`
			`: tokens{}, num_trailing_blanks(0), decoder_out{nullptr}, hyps{} {}`

			`OnlineTransducerDecoderResult(const OnlineTransducerDecoderResult &other);`

			`OnlineTransducerDecoderResult &operator=(`
			`const OnlineTransducerDecoderResult &other);`

Support clang-tidy (#1034) 2024-06-19 20:51:57 +08:00			`OnlineTransducerDecoderResult(OnlineTransducerDecoderResult &&other) noexcept;`
Code refactoring (#74) * Don't reset model state and feature extractor on endpointing * support passing decoding_method from commandline * Add modified_beam_search to Python API * fix C API example * Fix style issues 2023-03-03 12:10:59 +08:00
			`OnlineTransducerDecoderResult &operator=(`
Support clang-tidy (#1034) 2024-06-19 20:51:57 +08:00			`OnlineTransducerDecoderResult &&other) noexcept;`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00			`};`

Support contextual-biasing for streaming model (#184) * Support contextual-biasing for streaming model * The whole pipeline runs normally * Fix comments 2023-06-30 16:46:24 +08:00			`class OnlineStream;`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00			`class OnlineTransducerDecoder {`
			`public:`
			`virtual ~OnlineTransducerDecoder() = default;`

			`/* Return an empty result.`
			`*`
			* To simplify the decoding code, we add `context_size` blanks
			`* to the beginning of the decoding result, which will be`
			* stripped by calling `StripPrecedingBlanks()`.
			`*/`
add online-recognizer (#29) 2023-02-19 12:45:38 +08:00			`virtual OnlineTransducerDecoderResult GetEmptyResult() const = 0;`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00
			/** Strip blanks added by `GetEmptyResult()`.
			`*`
			`* @param r It is changed in-place.`
			`*/`
add online-recognizer (#29) 2023-02-19 12:45:38 +08:00			`virtual void StripLeadingBlanks(OnlineTransducerDecoderResult * /r/) const {`
			`}`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00
			`/** Run transducer beam search given the output from the encoder model.`
			`*`
			`* @param encoder_out A 3-D tensor of shape (N, T, joiner_dim)`
			`* @param result It is modified in-place.`
			`*`
			`* @note There is no need to pass encoder_out_length here since for the`
			`* online decoding case, each utterance has the same number of frames`
			`* and there are no paddings.`
			`*/`
			`virtual void Decode(Ort::Value encoder_out,`
			`std::vector<OnlineTransducerDecoderResult> *result) = 0;`
Code refactoring (#74) * Don't reset model state and feature extractor on endpointing * support passing decoding_method from commandline * Add modified_beam_search to Python API * fix C API example * Fix style issues 2023-03-03 12:10:59 +08:00
Support contextual-biasing for streaming model (#184) * Support contextual-biasing for streaming model * The whole pipeline runs normally * Fix comments 2023-06-30 16:46:24 +08:00			`/** Run transducer beam search given the output from the encoder model.`
			`*`
			`* Note: Currently this interface is for contextual-biasing feature which`
			`* needs a ContextGraph owned by the OnlineStream.`
			`*`
			`* @param encoder_out A 3-D tensor of shape (N, T, joiner_dim)`
			`* @param ss A list of OnlineStreams.`
			`* @param result It is modified in-place.`
			`*`
			`* @note There is no need to pass encoder_out_length here since for the`
			`* online decoding case, each utterance has the same number of frames`
			`* and there are no paddings.`
			`*/`
Add address sanitizer and undefined behavior sanitizer (#951) 2024-05-31 13:17:01 +08:00			`virtual void Decode(Ort::Value /encoder_out/, OnlineStream ** /ss/,`
			`std::vector<OnlineTransducerDecoderResult> * /result/) {`
Support contextual-biasing for streaming model (#184) * Support contextual-biasing for streaming model * The whole pipeline runs normally * Fix comments 2023-06-30 16:46:24 +08:00			`SHERPA_ONNX_LOGE(`
			`"This interface is for OnlineTransducerModifiedBeamSearchDecoder.");`
			`exit(-1);`
			`}`

Code refactoring (#74) * Don't reset model state and feature extractor on endpointing * support passing decoding_method from commandline * Add modified_beam_search to Python API * fix C API example * Fix style issues 2023-03-03 12:10:59 +08:00			`// used for endpointing. We need to keep decoder_out after reset`
Add address sanitizer and undefined behavior sanitizer (#951) 2024-05-31 13:17:01 +08:00			`virtual void UpdateDecoderOut(OnlineTransducerDecoderResult * /result/) {}`
Add online transducer decoder (#27) 2023-02-19 10:39:07 +08:00			`};`

			`} // namespace sherpa_onnx`

			`#endif // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_`