diff --git a/CMakeLists.txt b/CMakeLists.txt index 90b50bb7..7c8840ee 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-onnx) -set(SHERPA_ONNX_VERSION "1.7.15") +set(SHERPA_ONNX_VERSION "1.7.16") # Disable warning about # diff --git a/sherpa-onnx/csrc/symbol-table.cc b/sherpa-onnx/csrc/symbol-table.cc index 692783b4..6fb69d9b 100644 --- a/sherpa-onnx/csrc/symbol-table.cc +++ b/sherpa-onnx/csrc/symbol-table.cc @@ -46,6 +46,19 @@ void SymbolTable::Init(std::istream &is) { } } + // for byte-level BPE + // id 0 is blank, id 1 is sos/eos, id 2 is unk + if (id >= 3 && id <= 258 && sym.size() == 6 && sym[0] == '<' && + sym[1] == '0' && sym[2] == 'x' && sym[5] == '>') { + std::ostringstream os; + os << std::hex << (id - 3); + + if (std::string(sym.data() + 3, sym.data() + 5) == os.str()) { + uint8_t i = id - 3; + sym = std::string(&i, &i + 1); + } + } + assert(!sym.empty()); assert(sym2id_.count(sym) == 0); assert(id2sym_.count(id) == 0);