From 4ae9382baef4a2f5035cf4376f4c66f9b43493ff Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 17 Jun 2025 14:16:48 +0800 Subject: [PATCH] Update TTS Engine APK to support multi-lang (#2294) --- .../java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt | 6 ++++++ .../java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt | 9 ++++++--- scripts/apk/build-apk-tts-engine.sh.in | 7 ++++++- scripts/apk/build-apk-tts.sh.in | 2 +- scripts/apk/generate-tts-apk-script.py | 8 ++++++++ 5 files changed, 27 insertions(+), 5 deletions(-) diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt index 6c576167..53e69b4e 100644 --- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt +++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsEngine.kt @@ -23,6 +23,9 @@ object TtsEngine { // cmn for Mandarin var lang: String? = null + // if a model supports two languages, set also lang2 + var lang2: String? = null + val speedState: MutableState = mutableFloatStateOf(1.0F) val speakerIdState: MutableState = mutableIntStateOf(0) @@ -76,6 +79,7 @@ object TtsEngine { dataDir = null dictDir = null lang = null + lang2 = null // Please enable one and only one of the examples below @@ -125,6 +129,7 @@ object TtsEngine { // lexicon = "lexicon.txt" // dictDir = "vits-melo-tts-zh_en/dict" // lang = "zho" + // lang2 = "eng" // Example 7 // matcha-icefall-zh-baker @@ -162,6 +167,7 @@ object TtsEngine { // dictDir = "kokoro-multi-lang-v1_0/dict" // lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt" // lang = "eng" + // lang2 = "zho" // ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst" // // This model supports many languages, e.g., English, Chinese, etc. diff --git a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt index 3075d628..53a9ef2d 100644 --- a/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt +++ b/android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine/TtsService.kt @@ -60,6 +60,9 @@ class TtsService : TextToSpeechService() { // see https://github.com/Miserlou/Android-SDK-Samples/blob/master/TtsEngine/src/com/example/android/ttsengine/RobotSpeakTtsService.java#L68 onLoadLanguage(TtsEngine.lang, "", "") + if (TtsEngine.lang2 != null) { + onLoadLanguage(TtsEngine.lang2, "", "") + } } override fun onDestroy() { @@ -71,7 +74,7 @@ class TtsService : TextToSpeechService() { override fun onIsLanguageAvailable(_lang: String?, _country: String?, _variant: String?): Int { val lang = _lang ?: "" - if (lang == TtsEngine.lang) { + if (lang == TtsEngine.lang || lang == TtsEngine.lang2) { return TextToSpeech.LANG_AVAILABLE } @@ -87,12 +90,12 @@ class TtsService : TextToSpeechService() { Log.i(TAG, "onLoadLanguage: $_lang, $_country") val lang = _lang ?: "" - return if (lang == TtsEngine.lang) { + return if (lang == TtsEngine.lang || lang == TtsEngine.lang2) { Log.i(TAG, "creating tts, lang :$lang") TtsEngine.createTts(application) TextToSpeech.LANG_AVAILABLE } else { - Log.i(TAG, "lang $lang not supported, tts engine lang: ${TtsEngine.lang}") + Log.i(TAG, "lang $lang not supported, tts engine lang: ${TtsEngine.lang}, ${TtsEngine.lang2}") TextToSpeech.LANG_NOT_SUPPORTED } } diff --git a/scripts/apk/build-apk-tts-engine.sh.in b/scripts/apk/build-apk-tts-engine.sh.in index c301531e..813ba4d0 100644 --- a/scripts/apk/build-apk-tts-engine.sh.in +++ b/scripts/apk/build-apk-tts-engine.sh.in @@ -42,6 +42,7 @@ vocoder={{ tts_model.vocoder }} voices={{ tts_model.voices }} lang={{ tts_model.lang }} lang_iso_639_3={{ tts_model.lang_iso_639_3 }} +lang_iso_639_3_2={{ tts_model.lang_iso_639_3_2 }} wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2 tar xf $model_dir.tar.bz2 @@ -59,6 +60,10 @@ pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/en sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt +{% if tts_model.lang2 %} + sed -i.bak s/"lang2 = null"/"lang2 = \"$lang_iso_639_3_2\""/ ./TtsEngine.kt +{% endif %} + {% if tts_model.model_name %} sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt {% endif %} @@ -109,7 +114,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then lang=zh_en fi -if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then +if [[ $model_dir == kokoro-multi-lang-v1_0 || $model_dir == kokoro-multi-lang-v1_1 || $model_dir == kokoro-int8-multi-lang-v1_1 ]]; then lang=zh_en fi diff --git a/scripts/apk/build-apk-tts.sh.in b/scripts/apk/build-apk-tts.sh.in index 400f3404..19f928e0 100644 --- a/scripts/apk/build-apk-tts.sh.in +++ b/scripts/apk/build-apk-tts.sh.in @@ -108,7 +108,7 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then lang=zh_en fi -if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then +if [[ $model_dir == kokoro-multi-lang-v1_0 || $model_dir == kokoro-multi-lang-v1_1 || $model_dir == kokoro-int8-multi-lang-v1_1 ]]; then lang=zh_en fi diff --git a/scripts/apk/generate-tts-apk-script.py b/scripts/apk/generate-tts-apk-script.py index af5469a4..60881031 100755 --- a/scripts/apk/generate-tts-apk-script.py +++ b/scripts/apk/generate-tts-apk-script.py @@ -35,12 +35,14 @@ class TtsModel: vocoder: str = "" # for matcha voices: str = "" # for kokoro lang: str = "" # en, zh, fr, de, etc. + lang2: str = "" # en, zh, fr, de, etc. rule_fsts: Optional[List[str]] = None rule_fars: Optional[List[str]] = None data_dir: Optional[str] = None dict_dir: Optional[str] = None is_char: bool = False lang_iso_639_3: str = "" + lang_iso_639_3_2: str = "" lexicon: str = "" @@ -48,6 +50,8 @@ def convert_lang_to_iso_639_3(models: List[TtsModel]): for m in models: if m.lang_iso_639_3 == "": m.lang_iso_639_3 = Lang(m.lang).pt3 + if m.lang2 != "": + m.lang_iso_639_3_2 = Lang(m.lang2).pt3 def get_coqui_models() -> List[TtsModel]: @@ -322,6 +326,7 @@ def get_vits_models() -> List[TtsModel]: model_dir="vits-melo-tts-zh_en", model_name="model.onnx", lang="zh", + lang2="en", ), TtsModel( model_dir="vits-zh-hf-fanchen-C", @@ -438,16 +443,19 @@ def get_kokoro_models() -> List[TtsModel]: model_dir="kokoro-multi-lang-v1_0", model_name="model.onnx", lang="en", + lang2="zh", ), TtsModel( model_dir="kokoro-multi-lang-v1_1", model_name="model.onnx", lang="en", + lang2="zh", ), TtsModel( model_dir="kokoro-int8-multi-lang-v1_1", model_name="model.int8.onnx", lang="en", + lang2="zh", ), ] for m in multi_lingual_models: