Support printing input text and words after splitting (#376)

This commit is contained in:
Fangjun Kuang
2023-10-20 12:06:30 +08:00
committed by GitHub
parent 2a932aca86
commit 3ba9a4932f
3 changed files with 37 additions and 3 deletions

View File

@@ -17,7 +17,8 @@ namespace sherpa_onnx {
class Lexicon {
public:
Lexicon(const std::string &lexicon, const std::string &tokens,
const std::string &punctuations, const std::string &language);
const std::string &punctuations, const std::string &language,
bool debug = false);
std::vector<int64_t> ConvertTextToTokenIds(const std::string &text) const;
@@ -45,6 +46,7 @@ class Lexicon {
std::unordered_set<std::string> punctuations_;
std::unordered_map<std::string, int32_t> token2id_;
Language language_;
bool debug_;
//
};