* Adding temperature scaling on Joiner logits:
- T hard-coded to 2.0
- so far best result NCE 0.122 (still not so high)
- the BPE scores were rescaled with 0.2 (but then also incorrect words
get high confidence, visually reasonable histograms are for 0.5 scale)
- BPE->WORD score merging done by min(.) function
(tried also prob-product, and also arithmetic, geometric, harmonic mean)
- without temperature scaling (i.e. scale 1.0), the best NCE was 0.032 (here product merging was best)
Results seem consistent with: https://arxiv.org/abs/2110.15222
Everything tuned on a very-small set of 100 sentences with 813 words and 10.2% WER, a Czech model.
I also experimented with blank posteriors mixed into the BPE confidences,
but no NCE improvement found, so not pushing that.
Temperature scling added also to the Greedy search confidences.
* making `temperature_scale` configurable from outside
61 lines
2.1 KiB
C++
61 lines
2.1 KiB
C++
// sherpa-onnx/csrc/online-transducer-modified_beam-search-decoder.h
|
|
//
|
|
// Copyright (c) 2023 Pingfeng Luo
|
|
// Copyright (c) 2023 Xiaomi Corporation
|
|
|
|
#ifndef SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_
|
|
#define SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_
|
|
|
|
#include <vector>
|
|
|
|
#include "sherpa-onnx/csrc/online-lm.h"
|
|
#include "sherpa-onnx/csrc/online-stream.h"
|
|
#include "sherpa-onnx/csrc/online-transducer-decoder.h"
|
|
#include "sherpa-onnx/csrc/online-transducer-model.h"
|
|
|
|
namespace sherpa_onnx {
|
|
|
|
class OnlineTransducerModifiedBeamSearchDecoder
|
|
: public OnlineTransducerDecoder {
|
|
public:
|
|
OnlineTransducerModifiedBeamSearchDecoder(OnlineTransducerModel *model,
|
|
OnlineLM *lm,
|
|
int32_t max_active_paths,
|
|
float lm_scale, int32_t unk_id,
|
|
float blank_penalty,
|
|
float temperature_scale)
|
|
: model_(model),
|
|
lm_(lm),
|
|
max_active_paths_(max_active_paths),
|
|
lm_scale_(lm_scale),
|
|
unk_id_(unk_id),
|
|
blank_penalty_(blank_penalty),
|
|
temperature_scale_(temperature_scale) {}
|
|
|
|
OnlineTransducerDecoderResult GetEmptyResult() const override;
|
|
|
|
void StripLeadingBlanks(OnlineTransducerDecoderResult *r) const override;
|
|
|
|
void Decode(Ort::Value encoder_out,
|
|
std::vector<OnlineTransducerDecoderResult> *result) override;
|
|
|
|
void Decode(Ort::Value encoder_out, OnlineStream **ss,
|
|
std::vector<OnlineTransducerDecoderResult> *result) override;
|
|
|
|
void UpdateDecoderOut(OnlineTransducerDecoderResult *result) override;
|
|
|
|
private:
|
|
OnlineTransducerModel *model_; // Not owned
|
|
OnlineLM *lm_; // Not owned
|
|
|
|
int32_t max_active_paths_;
|
|
float lm_scale_; // used only when lm_ is not nullptr
|
|
int32_t unk_id_;
|
|
float blank_penalty_;
|
|
float temperature_scale_;
|
|
};
|
|
|
|
} // namespace sherpa_onnx
|
|
|
|
#endif // SHERPA_ONNX_CSRC_ONLINE_TRANSDUCER_MODIFIED_BEAM_SEARCH_DECODER_H_
|