Add online transducer decoder (#27)
This commit is contained in:
52
sherpa-onnx/csrc/online-transducer-decoder.h
Normal file
52
sherpa-onnx/csrc/online-transducer-decoder.h
Normal file
@@ -0,0 +1,52 @@
|
||||
// sherpa/csrc/online-transducer-decoder.h
|
||||
//
|
||||
// Copyright (c) 2023 Xiaomi Corporation
|
||||
|
||||
#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_
|
||||
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "onnxruntime_cxx_api.h" // NOLINT
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
struct OnlineTransducerDecoderResult {
|
||||
/// The decoded token IDs so far
|
||||
std::vector<int64_t> tokens;
|
||||
};
|
||||
|
||||
class OnlineTransducerDecoder {
|
||||
public:
|
||||
virtual ~OnlineTransducerDecoder() = default;
|
||||
|
||||
/* Return an empty result.
|
||||
*
|
||||
* To simplify the decoding code, we add `context_size` blanks
|
||||
* to the beginning of the decoding result, which will be
|
||||
* stripped by calling `StripPrecedingBlanks()`.
|
||||
*/
|
||||
virtual OnlineTransducerDecoderResult GetEmptyResult() = 0;
|
||||
|
||||
/** Strip blanks added by `GetEmptyResult()`.
|
||||
*
|
||||
* @param r It is changed in-place.
|
||||
*/
|
||||
virtual void StripLeadingBlanks(OnlineTransducerDecoderResult * /*r*/) {}
|
||||
|
||||
/** Run transducer beam search given the output from the encoder model.
|
||||
*
|
||||
* @param encoder_out A 3-D tensor of shape (N, T, joiner_dim)
|
||||
* @param result It is modified in-place.
|
||||
*
|
||||
* @note There is no need to pass encoder_out_length here since for the
|
||||
* online decoding case, each utterance has the same number of frames
|
||||
* and there are no paddings.
|
||||
*/
|
||||
virtual void Decode(Ort::Value encoder_out,
|
||||
std::vector<OnlineTransducerDecoderResult> *result) = 0;
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
#endif // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_DECODER_H_
|
||||
Reference in New Issue
Block a user