diff --git a/CMakeLists.txt b/CMakeLists.txt index 5f3ccc85..d42cc8ba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.13 FATAL_ERROR) project(sherpa-onnx) -set(SHERPA_ONNX_VERSION "1.9.18") +set(SHERPA_ONNX_VERSION "1.9.19") # Disable warning about # diff --git a/sherpa-onnx/csrc/offline-punctuation-ct-transformer-impl.h b/sherpa-onnx/csrc/offline-punctuation-ct-transformer-impl.h index 134b8807..393feba9 100644 --- a/sherpa-onnx/csrc/offline-punctuation-ct-transformer-impl.h +++ b/sherpa-onnx/csrc/offline-punctuation-ct-transformer-impl.h @@ -98,7 +98,7 @@ class OfflinePunctuationCtTransformerImpl : public OfflinePunctuationImpl { int32_t dot_index = -1; int32_t comma_index = -1; - for (int32_t m = this_punctuations.size() - 1; m >= 1; --m) { + for (int32_t m = this_punctuations.size() - 2; m >= 1; --m) { int32_t punct_id = this_punctuations[m]; if (punct_id == meta_data.dot_id || punct_id == meta_data.quest_id) { @@ -126,27 +126,20 @@ class OfflinePunctuationCtTransformerImpl : public OfflinePunctuationImpl { } } else { last = this_start + dot_index + 1; + } + if (dot_index != 1) { punctuations.insert(punctuations.end(), this_punctuations.begin(), this_punctuations.begin() + (dot_index + 1)); } } // for (int32_t i = 0; i != num_segments; ++i) - if (punctuations.size() != token_ids.size() && - punctuations.size() + 1 == token_ids.size()) { - punctuations.push_back(meta_data.dot_id); - } - - if (punctuations.size() != token_ids.size()) { - SHERPA_ONNX_LOGE("%s, %d, %d. Some unexpected things happened", - text.c_str(), static_cast(punctuations.size()), - static_cast(token_ids.size())); - return text; - } - std::string ans; for (int32_t i = 0; i != static_cast(punctuations.size()); ++i) { + if (i > tokens.size()) { + break; + } const std::string &w = tokens[i]; if (i > 0 && !(ans.back() & 0x80) && !(w[0] & 0x80)) { ans.push_back(' '); @@ -156,6 +149,9 @@ class OfflinePunctuationCtTransformerImpl : public OfflinePunctuationImpl { ans.append(meta_data.id2punct[punctuations[i]]); } } + if (ans.back() != meta_data.dot_id && ans.back() != meta_data.quest_id) { + ans.push_back(meta_data.dot_id); + } return ans; }