Support removing invalid utf-8 sequences. (#1648)

This commit is contained in:
Fangjun Kuang
2024-12-25 19:32:13 +08:00
committed by GitHub
parent 08d771337b
commit b6f0f5fc2e
6 changed files with 164 additions and 0 deletions

View File

@@ -194,6 +194,8 @@ OnlineRecognizerImpl::OnlineRecognizerImpl(Manager *mgr,
std::string OnlineRecognizerImpl::ApplyInverseTextNormalization(
std::string text) const {
text = RemoveInvalidUtf8Sequences(text);
if (!itn_list_.empty()) {
for (const auto &tn : itn_list_) {
text = tn->Normalize(text);