// sherpa-onnx/csrc/lexicon.h // // Copyright (c) 2022-2023 Xiaomi Corporation #ifndef SHERPA_ONNX_CSRC_LEXICON_H_ #define SHERPA_ONNX_CSRC_LEXICON_H_ #include #include #include #include #include namespace sherpa_onnx { class Lexicon { public: Lexicon(const std::string &lexicon, const std::string &tokens, const std::string &punctuations); std::vector ConvertTextToTokenIds(const std::string &text) const; private: std::unordered_map> word2ids_; std::unordered_set punctuations_; std::unordered_map token2id_; int32_t blank_; // ID for the blank token }; } // namespace sherpa_onnx #endif // SHERPA_ONNX_CSRC_LEXICON_H_