diff --git a/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc b/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc index 1dab60c4..0e54af41 100644 --- a/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc +++ b/sherpa-onnx/csrc/kokoro-multi-lang-lexicon.cc @@ -104,7 +104,8 @@ class KokoroMultiLangLexicon::Impl { // https://en.cppreference.com/w/cpp/regex // https://stackoverflow.com/questions/37989081/how-to-use-unicode-range-in-c-regex std::string expr = - "([;:,.?!'\"…\\(\\)“”])|([\\u4e00-\\u9fff]+)|([\\u0000-\\u007f]+)"; + "([;:,.?!'\"…\\(\\)“”])|([\\u4e00-\\u9fff]+)|([äöüßÄÖÜ\\u0000-\\u007f]+" + ")"; auto ws = ToWideString(text); std::wstring wexpr = ToWideString(expr); @@ -127,7 +128,7 @@ class KokoroMultiLangLexicon::Impl { if (debug_) { SHERPA_ONNX_LOGE("Non-Chinese: %s", ms.c_str()); } - ids_vec = ConvertEnglishToTokenIDs(ms); + ids_vec = ConvertEnglishToTokenIDs(ms, meta_data_.voice); } else { if (debug_) { SHERPA_ONNX_LOGE("Chinese: %s", ms.c_str()); @@ -257,7 +258,7 @@ class KokoroMultiLangLexicon::Impl { } std::vector> ConvertEnglishToTokenIDs( - const std::string &text) const { + const std::string &text, const std::string &voice) const { std::vector words = SplitUtf8(text); if (debug_) { std::ostringstream os; @@ -315,7 +316,7 @@ class KokoroMultiLangLexicon::Impl { piper::eSpeakPhonemeConfig config; - config.voice = "en-us"; + config.voice = voice; std::vector> phonemes; diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-impl.h b/sherpa-onnx/csrc/offline-tts-kokoro-impl.h index fe8f2331..d74cb80d 100644 --- a/sherpa-onnx/csrc/offline-tts-kokoro-impl.h +++ b/sherpa-onnx/csrc/offline-tts-kokoro-impl.h @@ -221,7 +221,7 @@ class OfflineTtsKokoroImpl : public OfflineTtsImpl { } std::vector token_ids = - frontend_->ConvertTextToTokenIds(text, "en-us"); + frontend_->ConvertTextToTokenIds(text, meta_data.voice); if (token_ids.empty() || (token_ids.size() == 1 && token_ids[0].tokens.empty())) { diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h b/sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h index 64b70851..b37babb3 100644 --- a/sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h +++ b/sherpa-onnx/csrc/offline-tts-kokoro-model-meta-data.h @@ -18,6 +18,8 @@ struct OfflineTtsKokoroModelMetaData { int32_t version = 1; int32_t has_espeak = 1; int32_t max_token_len = 0; + + std::string voice; }; } // namespace sherpa_onnx diff --git a/sherpa-onnx/csrc/offline-tts-kokoro-model.cc b/sherpa-onnx/csrc/offline-tts-kokoro-model.cc index 7f7c9013..9f77207b 100644 --- a/sherpa-onnx/csrc/offline-tts-kokoro-model.cc +++ b/sherpa-onnx/csrc/offline-tts-kokoro-model.cc @@ -138,6 +138,8 @@ class OfflineTtsKokoroModel::Impl { SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.version, "version", 1); SHERPA_ONNX_READ_META_DATA(meta_data_.num_speakers, "n_speakers"); SHERPA_ONNX_READ_META_DATA(meta_data_.has_espeak, "has_espeak"); + SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.voice, "voice", + "en-us"); if (config_.debug) { std::vector speaker_names;