Fix spliting text by languages for kokoro tts. (#1849)

This commit is contained in:
Fangjun Kuang
2025-02-13 18:19:34 +08:00
committed by GitHub
parent 115e9c2247
commit 944400e399
7 changed files with 204 additions and 36 deletions

View File

@@ -8,6 +8,14 @@
namespace sherpa_onnx {
TEST(ToLowerCase, WideString) {
std::string text =
"Hallo! Übeltäter übergibt Ärzten öfters äußerst ätzende Öle 3€";
auto t = ToLowerCase(text);
std::cout << text << "\n";
std::cout << t << "\n";
}
TEST(RemoveInvalidUtf8Sequences, Case1) {
std::vector<uint8_t> v = {
0xe4, 0xbb, 0x8a, // 今