Fix spliting text by languages for kokoro tts. (#1849)

This commit is contained in:
Fangjun Kuang
2025-02-13 18:19:34 +08:00
committed by GitHub
parent 115e9c2247
commit 944400e399
7 changed files with 204 additions and 36 deletions

View File

@@ -124,6 +124,8 @@ std::vector<std::string> SplitUtf8(const std::string &text);
std::string ToLowerCase(const std::string &s);
void ToLowerCase(std::string *in_out);
std::wstring ToLowerCase(const std::wstring &s);
std::string RemoveInvalidUtf8Sequences(const std::string &text,
bool show_debug_msg = false);
@@ -139,6 +141,10 @@ bool IsGB2312(const std::string &text);
std::string Gb2312ToUtf8(const std::string &text);
#endif
std::wstring ToWideString(const std::string &s);
std::string ToString(const std::wstring &s);
} // namespace sherpa_onnx
#endif // SHERPA_ONNX_CSRC_TEXT_UTILS_H_