// sherpa-onnx/csrc/lexicon.h // // Copyright (c) 2022-2023 Xiaomi Corporation #ifndef SHERPA_ONNX_CSRC_LEXICON_H_ #define SHERPA_ONNX_CSRC_LEXICON_H_ #include #include #include #include #include #include #include #include "sherpa-onnx/csrc/offline-tts-frontend.h" namespace sherpa_onnx { class Lexicon : public OfflineTtsFrontend { public: Lexicon() = default; // for subclasses // // Note: for models from piper, we won't use this class. Lexicon(const std::string &lexicon, const std::string &tokens, const std::string &punctuations, const std::string &language, bool debug = false); template Lexicon(Manager *mgr, const std::string &lexicon, const std::string &tokens, const std::string &punctuations, const std::string &language, bool debug = false); std::vector ConvertTextToTokenIds( const std::string &text, const std::string &voice = "") const override; private: std::vector ConvertTextToTokenIdsNotChinese( const std::string &text) const; std::vector ConvertTextToTokenIdsChinese( const std::string &text) const; void InitLanguage(const std::string &lang); void InitTokens(std::istream &is); void InitLexicon(std::istream &is); void InitPunctuations(const std::string &punctuations); private: enum class Language { kNotChinese, kChinese, kUnknown, }; private: std::unordered_map> word2ids_; std::unordered_set punctuations_; std::unordered_map token2id_; Language language_ = Language::kUnknown; bool debug_ = false; }; } // namespace sherpa_onnx #endif // SHERPA_ONNX_CSRC_LEXICON_H_