2023-04-23 17:15:18 +08:00
|
|
|
// sherpa-onnx/csrc/online-lm.h
|
|
|
|
|
//
|
|
|
|
|
// Copyright (c) 2023 Xiaomi Corporation
|
|
|
|
|
|
|
|
|
|
#ifndef SHERPA_ONNX_CSRC_ONLINE_LM_H_
|
|
|
|
|
#define SHERPA_ONNX_CSRC_ONLINE_LM_H_
|
|
|
|
|
|
|
|
|
|
#include <memory>
|
|
|
|
|
#include <utility>
|
|
|
|
|
#include <vector>
|
|
|
|
|
|
|
|
|
|
#include "onnxruntime_cxx_api.h" // NOLINT
|
|
|
|
|
#include "sherpa-onnx/csrc/hypothesis.h"
|
2023-06-12 15:51:27 +08:00
|
|
|
#include "sherpa-onnx/csrc/online-lm-config.h"
|
2023-04-23 17:15:18 +08:00
|
|
|
|
|
|
|
|
namespace sherpa_onnx {
|
|
|
|
|
|
|
|
|
|
class OnlineLM {
|
|
|
|
|
public:
|
|
|
|
|
virtual ~OnlineLM() = default;
|
|
|
|
|
|
2023-06-12 15:51:27 +08:00
|
|
|
static std::unique_ptr<OnlineLM> Create(const OnlineLMConfig &config);
|
2023-04-23 17:15:18 +08:00
|
|
|
|
2024-09-06 05:01:25 +03:00
|
|
|
// init states for classic rescore
|
|
|
|
|
virtual std::vector<Ort::Value> GetInitStates() = 0;
|
2023-04-23 17:15:18 +08:00
|
|
|
|
2024-09-06 05:01:25 +03:00
|
|
|
// init states for shallow fusion
|
|
|
|
|
virtual std::pair<Ort::Value, std::vector<Ort::Value>> GetInitStatesSF() = 0;
|
|
|
|
|
|
|
|
|
|
/** ScoreToken a batch of sentences (shallow fusion).
|
2023-04-23 17:15:18 +08:00
|
|
|
*
|
2023-05-10 22:30:57 +08:00
|
|
|
* @param x A 2-D tensor of shape (N, 1) with data type int64.
|
2023-04-23 17:15:18 +08:00
|
|
|
* @param states It contains the states for the LM model
|
2024-09-06 05:01:25 +03:00
|
|
|
* @return Return a pair containing
|
2023-05-10 22:30:57 +08:00
|
|
|
* - log_prob of NN LM
|
2023-04-23 17:15:18 +08:00
|
|
|
* - updated states
|
|
|
|
|
*
|
|
|
|
|
*/
|
2023-05-10 22:30:57 +08:00
|
|
|
virtual std::pair<Ort::Value, std::vector<Ort::Value>> ScoreToken(
|
|
|
|
|
Ort::Value x, std::vector<Ort::Value> states) = 0;
|
|
|
|
|
|
2024-09-06 05:01:25 +03:00
|
|
|
/** This function updates hyp.lm_log_prob of hyps (classic rescore).
|
|
|
|
|
*
|
|
|
|
|
* @param scale LM score
|
|
|
|
|
* @param context_size Context size of the transducer decoder model
|
|
|
|
|
* @param hyps It is changed in-place.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
virtual void ComputeLMScore(float scale, int32_t context_size,
|
|
|
|
|
std::vector<Hypotheses> *hyps) = 0;
|
|
|
|
|
|
|
|
|
|
/** This function updates lm_log_prob and nn_lm_scores of hyp (shallow fusion).
|
2023-05-10 22:30:57 +08:00
|
|
|
*
|
|
|
|
|
* @param scale LM score
|
|
|
|
|
* @param hyps It is changed in-place.
|
|
|
|
|
*
|
|
|
|
|
*/
|
2024-09-06 05:01:25 +03:00
|
|
|
virtual void ComputeLMScoreSF(float scale, Hypothesis *hyp) = 0;
|
2023-04-23 17:15:18 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
} // namespace sherpa_onnx
|
|
|
|
|
|
|
|
|
|
#endif // SHERPA_ONNX_CSRC_ONLINE_LM_H_
|