Add Android demo for MatchaTTS models. (#1683)

This commit is contained in:
Fangjun Kuang
2025-01-06 06:44:09 +08:00
committed by GitHub
parent 3eced3e7ee
commit 1fe5fe495f
9 changed files with 222 additions and 38 deletions

View File

@@ -37,6 +37,8 @@ mkdir -p apks
pushd ./android/SherpaOnnxTtsEngine/app/src/main/assets/
model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
acoustic_model_name={{ tts_model.acoustic_model_name }}
vocoder={{ tts_model.vocoder }}
lang={{ tts_model.lang }}
lang_iso_639_3={{ tts_model.lang_iso_639_3 }}
@@ -44,15 +46,30 @@ wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$mod
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2
{% if tts_model.vocoder %}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
{% endif %}
popd
# Now we are at the project root directory
git checkout .
pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
{% if tts_model.model_name %}
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
{% endif %}
{% if tts_model.model_name %}
sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./TtsEngine.kt
{% endif %}
{% if tts_model.vocoder %}
sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./TtsEngine.kt
{% endif %}
{% if tts_model.rule_fsts %}
rule_fsts={{ tts_model.rule_fsts }}
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt
@@ -109,6 +126,7 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
done
rm -rf ./android/SherpaOnnxTtsEngine/app/src/main/assets/$model_dir
rm -fv ./android/SherpaOnnxTtsEngine/app/src/main/assets/*.onnx
{% endfor %}
git checkout .

View File

@@ -37,19 +37,38 @@ mkdir -p apks
pushd ./android/SherpaOnnxTts/app/src/main/assets/
model_dir={{ tts_model.model_dir }}
model_name={{ tts_model.model_name }}
acoustic_model_name={{ tts_model.acoustic_model_name }}
vocoder={{ tts_model.vocoder }}
lang={{ tts_model.lang }}
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
tar xf $model_dir.tar.bz2
rm $model_dir.tar.bz2
{% if tts_model.vocoder %}
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
{% endif %}
popd
# Now we are at the project root directory
git checkout .
pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
{% if tts_model.model_name %}
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
{% endif %}
{% if tts_model.acoustic_model_name %}
sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./MainActivity.kt
{% endif %}
{% if tts_model.vocoder %}
sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./MainActivity.kt
{% endif %}
{% if tts_model.rule_fsts %}
rule_fsts={{ tts_model.rule_fsts }}
@@ -107,6 +126,8 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
done
rm -rf ./android/SherpaOnnxTts/app/src/main/assets/$model_dir
rm -fv ./android/SherpaOnnxTts/app/src/main/assets/*.onnx
{% endfor %}
git checkout .

View File

@@ -30,7 +30,9 @@ def get_args():
@dataclass
class TtsModel:
model_dir: str
model_name: str = ""
model_name: str = "" # for vits
acoustic_model_name: str = "" # for matcha
vocoder: str = "" # for matcha
lang: str = "" # en, zh, fr, de, etc.
rule_fsts: Optional[List[str]] = None
rule_fars: Optional[List[str]] = None
@@ -378,6 +380,35 @@ def get_vits_models() -> List[TtsModel]:
return all_models
def get_matcha_models() -> List[TtsModel]:
chinese_models = [
TtsModel(
model_dir="matcha-icefall-zh-baker",
acoustic_model_name="model-steps-3.onnx",
lang="zh",
)
]
rule_fsts = ["phone.fst", "date.fst", "number.fst"]
for m in chinese_models:
s = [f"{m.model_dir}/{r}" for r in rule_fsts]
m.rule_fsts = ",".join(s)
m.dict_dir = m.model_dir + "/dict"
m.vocoder = "hifigan_v2.onnx"
english_models = [
TtsModel(
model_dir="matcha-icefall-en_US-ljspeech",
acoustic_model_name="model-steps-3.onnx",
lang="en",
)
]
for m in english_models:
m.data_dir = f"{m.model_dir}/espeak-ng-data"
m.vocoder = "hifigan_v2.onnx"
return chinese_models + english_models
def main():
args = get_args()
index = args.index
@@ -389,7 +420,10 @@ def main():
all_model_list += get_piper_models()
all_model_list += get_mimic3_models()
all_model_list += get_coqui_models()
all_model_list += get_matcha_models()
convert_lang_to_iso_639_3(all_model_list)
print(all_model_list)
num_models = len(all_model_list)