Add inverse text normalization for non-streaming ASR (#1017)

This commit is contained in:
Fangjun Kuang
2024-06-17 14:28:53 +08:00
committed by GitHub
parent dd69a1b56b
commit b0f7ed3ee3
13 changed files with 380 additions and 19 deletions

View File

@@ -14,6 +14,7 @@
#include "android/asset_manager_jni.h"
#endif
#include "kaldifst/csrc/text-normalizer.h"
#include "sherpa-onnx/csrc/macros.h"
#include "sherpa-onnx/csrc/offline-recognizer.h"
#include "sherpa-onnx/csrc/offline-stream.h"
@@ -22,10 +23,15 @@ namespace sherpa_onnx {
class OfflineRecognizerImpl {
public:
explicit OfflineRecognizerImpl(const OfflineRecognizerConfig &config);
static std::unique_ptr<OfflineRecognizerImpl> Create(
const OfflineRecognizerConfig &config);
#if __ANDROID_API__ >= 9
OfflineRecognizerImpl(AAssetManager *mgr,
const OfflineRecognizerConfig &config);
static std::unique_ptr<OfflineRecognizerImpl> Create(
AAssetManager *mgr, const OfflineRecognizerConfig &config);
#endif
@@ -41,6 +47,15 @@ class OfflineRecognizerImpl {
virtual std::unique_ptr<OfflineStream> CreateStream() const = 0;
virtual void DecodeStreams(OfflineStream **ss, int32_t n) const = 0;
std::string ApplyInverseTextNormalization(std::string text) const;
private:
OfflineRecognizerConfig config_;
// for inverse text normalization. Used only if
// config.rule_fsts is not empty or
// config.rule_fars is not empty
std::vector<std::unique_ptr<kaldifst::TextNormalizer>> itn_list_;
};
} // namespace sherpa_onnx