Add online transducer decoder (#27)

This commit is contained in:
Fangjun Kuang
2023-02-19 10:39:07 +08:00
committed by GitHub
parent 710edaa6f9
commit 0f6f58d1d3
13 changed files with 229 additions and 124 deletions

View File

@@ -0,0 +1,52 @@
// sherpa/csrc/online-transducer-decoder.h
//
// Copyright (c) 2023 Xiaomi Corporation
#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_
#include <vector>
#include "onnxruntime_cxx_api.h" // NOLINT
namespace sherpa_onnx {
struct OnlineTransducerDecoderResult {
/// The decoded token IDs so far
std::vector<int64_t> tokens;
};
class OnlineTransducerDecoder {
public:
virtual ~OnlineTransducerDecoder() = default;
/* Return an empty result.
*
* To simplify the decoding code, we add `context_size` blanks
* to the beginning of the decoding result, which will be
* stripped by calling `StripPrecedingBlanks()`.
*/
virtual OnlineTransducerDecoderResult GetEmptyResult() = 0;
/** Strip blanks added by `GetEmptyResult()`.
*
* @param r It is changed in-place.
*/
virtual void StripLeadingBlanks(OnlineTransducerDecoderResult * /*r*/) {}
/** Run transducer beam search given the output from the encoder model.
*
* @param encoder_out A 3-D tensor of shape (N, T, joiner_dim)
* @param result It is modified in-place.
*
* @note There is no need to pass encoder_out_length here since for the
* online decoding case, each utterance has the same number of frames
* and there are no paddings.
*/
virtual void Decode(Ort::Value encoder_out,
std::vector<OnlineTransducerDecoderResult> *result) = 0;
};
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_