Fix tokens processing for byte-level BPE (#333)

This commit is contained in:
Fangjun Kuang
2023-09-22 13:28:19 +08:00
committed by GitHub
parent 969fff5622
commit 43b2b7760d
3 changed files with 41 additions and 16 deletions

View File

@@ -51,7 +51,7 @@ void SymbolTable::Init(std::istream &is) {
if (id >= 3 && id <= 258 && sym.size() == 6 && sym[0] == '<' &&
sym[1] == '0' && sym[2] == 'x' && sym[5] == '>') {
std::ostringstream os;
os << std::hex << (id - 3);
os << std::hex << std::uppercase << (id - 3);
if (std::string(sym.data() + 3, sym.data() + 5) == os.str()) {
uint8_t i = id - 3;