Fix Byte BPE string results for Python. (#512)

It ignores invalid UTF8 strings.
This commit is contained in:
Fangjun Kuang
2024-01-03 16:03:24 +08:00
committed by GitHub
parent d01142173a
commit e215d0c39a
6 changed files with 54 additions and 3 deletions

View File

@@ -23,8 +23,12 @@ Args:
static void PybindOfflineRecognitionResult(py::module *m) { // NOLINT
using PyClass = OfflineRecognitionResult;
py::class_<PyClass>(*m, "OfflineRecognitionResult")
.def_property_readonly("text",
[](const PyClass &self) { return self.text; })
.def_property_readonly(
"text",
[](const PyClass &self) -> py::str {
return py::str(PyUnicode_DecodeUTF8(self.text.c_str(),
self.text.size(), "ignore"));
})
.def_property_readonly("tokens",
[](const PyClass &self) { return self.tokens; })
.def_property_readonly(