Support decoding with byte-level BPE (bbpe) models. (#1633)

This commit is contained in:
Fangjun Kuang
2024-12-20 19:21:32 +08:00
committed by GitHub
parent 7192e576a9
commit b76cd9033a
11 changed files with 270 additions and 10 deletions

View File

@@ -56,12 +56,17 @@ class SymbolTable {
int32_t NumSymbols() const { return id2sym_.size(); }
std::string DecodeByteBpe(const std::string &text) const;
bool IsByteBpe() const { return is_bbpe_; }
private:
void Init(std::istream &is);
private:
std::unordered_map<std::string, int32_t> sym2id_;
std::unordered_map<int32_t, std::string> id2sym_;
bool is_bbpe_ = false;
};
std::ostream &operator<<(std::ostream &os, const SymbolTable &symbol_table);