Add inverse text normalization for online ASR (#1020)

This commit is contained in:
Fangjun Kuang
2024-06-17 18:39:23 +08:00
committed by GitHub
parent 6e09933d99
commit 349d957da2
12 changed files with 390 additions and 32 deletions

View File

@@ -9,6 +9,12 @@
#include <string>
#include <vector>
#if __ANDROID_API__ >= 9
#include "android/asset_manager.h"
#include "android/asset_manager_jni.h"
#endif
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/online-recognizer.h"
#include "sherpa-onnx/csrc/online-stream.h"
@@ -17,10 +23,15 @@ namespace sherpa_onnx {
class OnlineRecognizerImpl {
public:
explicit OnlineRecognizerImpl(const OnlineRecognizerConfig &config);
static std::unique_ptr<OnlineRecognizerImpl> Create(
const OnlineRecognizerConfig &config);
#if __ANDROID_API__ >= 9
OnlineRecognizerImpl(AAssetManager *mgr,
const OnlineRecognizerConfig &config);
static std::unique_ptr<OnlineRecognizerImpl> Create(
AAssetManager *mgr, const OnlineRecognizerConfig &config);
#endif
@@ -50,6 +61,15 @@ class OnlineRecognizerImpl {
virtual bool IsEndpoint(OnlineStream *s) const = 0;
virtual void Reset(OnlineStream *s) const = 0;
std::string ApplyInverseTextNormalization(std::string text) const;
private:
OnlineRecognizerConfig config_;
// for inverse text normalization. Used only if
// config.rule_fsts is not empty or
// config.rule_fars is not empty
std::vector<std::unique_ptr<kaldifst::TextNormalizer>> itn_list_;
};
} // namespace sherpa_onnx