Fix symbol table for byte bpe (#361)

This commit is contained in:
Fangjun Kuang
2023-10-13 10:51:59 +08:00
committed by GitHub
parent efd3cd3312
commit 323f532ad2

View File

@@ -60,7 +60,16 @@ void SymbolTable::Init(std::istream &is) {
}
assert(!sym.empty());
assert(sym2id_.count(sym) == 0);
// for byte bpe, after replacing ▁ with a space, whose ascii is also 0x20,
// there is a conflict between the real byte 0x20 and ▁, so we disable
// the following check.
//
// Note: Only id2sym_ matters as we use it to convert ID to symbols.
if (sym != " ") {
assert(sym2id_.count(sym) == 0);
}
assert(id2sym_.count(id) == 0);
sym2id_.insert({sym, id});