From a8fed2a9ce76d21aa2c15fab19deee998a5cb2b8 Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Thu, 26 Oct 2023 13:07:30 +0800 Subject: [PATCH] Fix splitting words containing ', e.g., I've (#389) --- sherpa-onnx/csrc/text-utils.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sherpa-onnx/csrc/text-utils.cc b/sherpa-onnx/csrc/text-utils.cc index c4b34772..81da9c50 100644 --- a/sherpa-onnx/csrc/text-utils.cc +++ b/sherpa-onnx/csrc/text-utils.cc @@ -163,6 +163,8 @@ template bool SplitStringToFloats(const std::string &full, const char *delim, bool omit_empty_strings, std::vector *out); +static bool IsPunct(char c) { return c != '\'' && std::ispunct(c); } + static std::vector MergeCharactersIntoWords( const std::vector &words) { std::vector ans; @@ -174,7 +176,7 @@ static std::vector MergeCharactersIntoWords( while (i < n) { const auto &w = words[i]; if (w.size() > 1 || - (w.size() == 1 && (std::ispunct(w[0]) || std::isspace(w[0])))) { + (w.size() == 1 && (IsPunct(w[0]) || std::isspace(w[0])))) { if (prev != -1) { std::string t; for (; prev < i; ++prev) {