Use piper-phonemize to convert text to token IDs (#453)
This commit is contained in:
@@ -37,13 +37,9 @@ model_dir={{ tts_model.model_dir }}
|
||||
model_name={{ tts_model.model_name }}
|
||||
lang={{ tts_model.lang }}
|
||||
|
||||
mkdir $model_dir
|
||||
cd $model_dir
|
||||
wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/$model_name
|
||||
wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/lexicon.txt
|
||||
wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/tokens.txt
|
||||
wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/MODEL_CARD 2>/dev/null || true
|
||||
wget -qq https://huggingface.co/csukuangfj/$model_dir/resolve/main/rule.fst 2>/dev/null || true
|
||||
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
|
||||
tar xf $model_dir.tar.bz2
|
||||
rm $model_dir.tar.bz2
|
||||
|
||||
popd
|
||||
# Now we are at the project root directory
|
||||
@@ -52,11 +48,19 @@ git checkout .
|
||||
pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
|
||||
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
|
||||
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
|
||||
|
||||
{% if tts_model.rule_fsts %}
|
||||
rule_fsts={{ tts_model.rule_fsts }}
|
||||
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./MainActivity.kt
|
||||
{% endif %}
|
||||
|
||||
{% if tts_model.data_dir %}
|
||||
data_dir={{ tts_model.data_dir }}
|
||||
sed -i.bak s%"dataDir = null"%"dataDir = \"$data_dir\""% ./MainActivity.kt
|
||||
{% else %}
|
||||
sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt
|
||||
{% endif %}
|
||||
|
||||
git diff
|
||||
popd
|
||||
|
||||
|
||||
@@ -27,9 +27,122 @@ def get_args():
|
||||
@dataclass
|
||||
class TtsModel:
|
||||
model_dir: str
|
||||
model_name: str
|
||||
lang: str # en, zh, fr, de, etc.
|
||||
model_name: str = ""
|
||||
lang: str = "" # en, zh, fr, de, etc.
|
||||
rule_fsts: Optional[List[str]] = None
|
||||
data_dir: Optional[str] = None
|
||||
|
||||
|
||||
def get_piper_models() -> List[TtsModel]:
|
||||
models = [
|
||||
TtsModel(model_dir="vits-piper-ar_JO-kareem-low"),
|
||||
TtsModel(model_dir="vits-piper-ar_JO-kareem-medium"),
|
||||
TtsModel(model_dir="vits-piper-ca_ES-upc_ona-medium"),
|
||||
TtsModel(model_dir="vits-piper-ca_ES-upc_ona-x_low"),
|
||||
TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"),
|
||||
TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"),
|
||||
TtsModel(model_dir="vits-piper-cs_CZ-jirka-medium"),
|
||||
TtsModel(model_dir="vits-piper-da_DK-talesyntese-medium"),
|
||||
TtsModel(model_dir="vits-piper-de_DE-eva_k-x_low"),
|
||||
TtsModel(model_dir="vits-piper-de_DE-karlsson-low"),
|
||||
TtsModel(model_dir="vits-piper-de_DE-kerstin-low"),
|
||||
TtsModel(model_dir="vits-piper-de_DE-pavoque-low"),
|
||||
TtsModel(model_dir="vits-piper-de_DE-ramona-low"),
|
||||
TtsModel(model_dir="vits-piper-de_DE-thorsten-high"),
|
||||
TtsModel(model_dir="vits-piper-de_DE-thorsten-low"),
|
||||
TtsModel(model_dir="vits-piper-de_DE-thorsten-medium"),
|
||||
TtsModel(model_dir="vits-piper-de_DE-thorsten_emotional-medium"),
|
||||
TtsModel(model_dir="vits-piper-el_GR-rapunzelina-low"),
|
||||
TtsModel(model_dir="vits-piper-en_GB-alan-low"),
|
||||
TtsModel(model_dir="vits-piper-en_GB-alan-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_GB-alba-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_GB-jenny_dioco-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_GB-northern_english_male-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_GB-semaine-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_GB-southern_english_female-low"),
|
||||
TtsModel(model_dir="vits-piper-en_GB-sweetbbak-amy"),
|
||||
TtsModel(model_dir="vits-piper-en_GB-vctk-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_US-amy-low"),
|
||||
TtsModel(model_dir="vits-piper-en_US-amy-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_US-arctic-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_US-danny-low"),
|
||||
TtsModel(model_dir="vits-piper-en_US-hfc_male-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_US-joe-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_US-kathleen-low"),
|
||||
TtsModel(model_dir="vits-piper-en_US-kusal-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_US-l2arctic-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_US-lessac-high"),
|
||||
TtsModel(model_dir="vits-piper-en_US-lessac-low"),
|
||||
TtsModel(model_dir="vits-piper-en_US-lessac-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_US-libritts-high"),
|
||||
TtsModel(model_dir="vits-piper-en_US-libritts_r-medium"),
|
||||
TtsModel(model_dir="vits-piper-en_US-ryan-high"),
|
||||
TtsModel(model_dir="vits-piper-en_US-ryan-low"),
|
||||
TtsModel(model_dir="vits-piper-en_US-ryan-medium"),
|
||||
TtsModel(model_dir="vits-piper-es_ES-carlfm-x_low"),
|
||||
TtsModel(model_dir="vits-piper-es_ES-davefx-medium"),
|
||||
TtsModel(model_dir="vits-piper-es_ES-mls_10246-low"),
|
||||
TtsModel(model_dir="vits-piper-es_ES-mls_9972-low"),
|
||||
TtsModel(model_dir="vits-piper-es_ES-sharvard-medium"),
|
||||
TtsModel(model_dir="vits-piper-es_MX-ald-medium"),
|
||||
TtsModel(model_dir="vits-piper-fi_FI-harri-low"),
|
||||
TtsModel(model_dir="vits-piper-fi_FI-harri-medium"),
|
||||
TtsModel(model_dir="vits-piper-fr_FR-siwis-low"),
|
||||
TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"),
|
||||
TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"),
|
||||
TtsModel(model_dir="vits-piper-hu_HU-anna-medium"),
|
||||
TtsModel(model_dir="vits-piper-hu_HU-berta-medium"),
|
||||
TtsModel(model_dir="vits-piper-hu_HU-imre-medium"),
|
||||
TtsModel(model_dir="vits-piper-is_IS-bui-medium"),
|
||||
TtsModel(model_dir="vits-piper-is_IS-salka-medium"),
|
||||
TtsModel(model_dir="vits-piper-is_IS-steinn-medium"),
|
||||
TtsModel(model_dir="vits-piper-is_IS-ugla-medium"),
|
||||
TtsModel(model_dir="vits-piper-it_IT-riccardo-x_low"),
|
||||
TtsModel(model_dir="vits-piper-ka_GE-natia-medium"),
|
||||
TtsModel(model_dir="vits-piper-kk_KZ-iseke-x_low"),
|
||||
TtsModel(model_dir="vits-piper-kk_KZ-issai-high"),
|
||||
TtsModel(model_dir="vits-piper-kk_KZ-raya-x_low"),
|
||||
TtsModel(model_dir="vits-piper-lb_LU-marylux-medium"),
|
||||
TtsModel(model_dir="vits-piper-ne_NP-google-medium"),
|
||||
TtsModel(model_dir="vits-piper-ne_NP-google-x_low"),
|
||||
TtsModel(model_dir="vits-piper-nl_BE-nathalie-medium"),
|
||||
TtsModel(model_dir="vits-piper-nl_BE-nathalie-x_low"),
|
||||
TtsModel(model_dir="vits-piper-nl_BE-rdh-medium"),
|
||||
TtsModel(model_dir="vits-piper-nl_BE-rdh-x_low"),
|
||||
TtsModel(model_dir="vits-piper-nl_NL-mls_5809-low"),
|
||||
TtsModel(model_dir="vits-piper-nl_NL-mls_7432-low"),
|
||||
TtsModel(model_dir="vits-piper-no_NO-talesyntese-medium"),
|
||||
TtsModel(model_dir="vits-piper-pl_PL-darkman-medium"),
|
||||
TtsModel(model_dir="vits-piper-pl_PL-gosia-medium"),
|
||||
TtsModel(model_dir="vits-piper-pl_PL-mc_speech-medium"),
|
||||
TtsModel(model_dir="vits-piper-pl_PL-mls_6892-low"),
|
||||
TtsModel(model_dir="vits-piper-pt_BR-edresson-low"),
|
||||
TtsModel(model_dir="vits-piper-pt_BR-faber-medium"),
|
||||
TtsModel(model_dir="vits-piper-pt_PT-tugao-medium"),
|
||||
TtsModel(model_dir="vits-piper-ro_RO-mihai-medium"),
|
||||
TtsModel(model_dir="vits-piper-ru_RU-denis-medium"),
|
||||
TtsModel(model_dir="vits-piper-ru_RU-dmitri-medium"),
|
||||
TtsModel(model_dir="vits-piper-ru_RU-irina-medium"),
|
||||
TtsModel(model_dir="vits-piper-ru_RU-ruslan-medium"),
|
||||
TtsModel(model_dir="vits-piper-sk_SK-lili-medium"),
|
||||
TtsModel(model_dir="vits-piper-sr_RS-serbski_institut-medium"),
|
||||
TtsModel(model_dir="vits-piper-sv_SE-nst-medium"),
|
||||
TtsModel(model_dir="vits-piper-sw_CD-lanfrica-medium"),
|
||||
TtsModel(model_dir="vits-piper-tr_TR-dfki-medium"),
|
||||
TtsModel(model_dir="vits-piper-tr_TR-fahrettin-medium"),
|
||||
TtsModel(model_dir="vits-piper-uk_UA-lada-x_low"),
|
||||
TtsModel(model_dir="vits-piper-uk_UA-ukrainian_tts-medium"),
|
||||
TtsModel(model_dir="vits-piper-vi_VN-25hours_single-low"),
|
||||
TtsModel(model_dir="vits-piper-vi_VN-vais1000-medium"),
|
||||
TtsModel(model_dir="vits-piper-vi_VN-vivos-x_low"),
|
||||
TtsModel(model_dir="vits-piper-zh_CN-huayan-medium"),
|
||||
]
|
||||
for m in models:
|
||||
m.data_dir = m.model_dir + "/" + "espeak-ng-data"
|
||||
m.model_name = m.model_dir[len("vits-piper-") :] + ".onnx"
|
||||
m.lang = "en"
|
||||
|
||||
return models
|
||||
|
||||
|
||||
def get_all_models() -> List[TtsModel]:
|
||||
@@ -98,56 +211,6 @@ def get_all_models() -> List[TtsModel]:
|
||||
# English (US)
|
||||
TtsModel(model_dir="vits-vctk", model_name="vits-vctk.onnx", lang="en"),
|
||||
TtsModel(model_dir="vits-ljs", model_name="vits-ljs.onnx", lang="en"),
|
||||
TtsModel(model_dir="vits-piper-en_US-amy-low", model_name="en_US-amy-low.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-amy-medium", model_name="en_US-amy-medium.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-arctic-medium", model_name="en_US-arctic-medium.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-danny-low", model_name="en_US-danny-low.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-hfc_male-medium", model_name="en_US-hfc_male-medium.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-joe-medium", model_name="en_US-joe-medium.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-kathleen-low", model_name="en_US-kathleen-low.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-kusal-medium", model_name="en_US-kusal-medium.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-l2arctic-medium", model_name="en_US-l2arctic-medium.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-lessac-low", model_name="en_US-lessac-low.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-lessac-medium", model_name="en_US-lessac-medium.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-lessac-high", model_name="en_US-lessac-high.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-libritts-high", model_name="en_US-libritts-high.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-libritts_r-medium", model_name="en_US-libritts_r-medium.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-ryan-low", model_name="en_US-ryan-low.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-ryan-medium", model_name="en_US-ryan-medium.onnx", lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_US-ryan-high", model_name="en_US-ryan-high.onnx", lang="en",),
|
||||
# English (GB)
|
||||
TtsModel(model_dir="vits-piper-en_GB-alan-low", model_name="en_GB-alan-low.onnx",lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_GB-alan-medium", model_name="en_GB-alan-medium.onnx",lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_GB-alba-medium", model_name="en_GB-alba-medium.onnx",lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_GB-jenny_dioco-medium", model_name="en_GB-jenny_dioco-medium.onnx",lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_GB-northern_english_male-medium", model_name="en_GB-northern_english_male-medium.onnx",lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_GB-semaine-medium", model_name="en_GB-semaine-medium.onnx",lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_GB-southern_english_female-low", model_name="en_GB-southern_english_female-low.onnx",lang="en",),
|
||||
TtsModel(model_dir="vits-piper-en_GB-vctk-medium", model_name="en_GB-vctk-medium.onnx",lang="en",),
|
||||
# German (DE)
|
||||
TtsModel(model_dir="vits-piper-de_DE-eva_k-x_low", model_name="de_DE-eva_k-x_low.onnx",lang="de",),
|
||||
TtsModel(model_dir="vits-piper-de_DE-karlsson-low", model_name="de_DE-karlsson-low.onnx",lang="de",),
|
||||
TtsModel(model_dir="vits-piper-de_DE-kerstin-low", model_name="de_DE-kerstin-low.onnx",lang="de",),
|
||||
TtsModel(model_dir="vits-piper-de_DE-pavoque-low", model_name="de_DE-pavoque-low.onnx",lang="de",),
|
||||
TtsModel(model_dir="vits-piper-de_DE-ramona-low", model_name="de_DE-ramona-low.onnx",lang="de",),
|
||||
TtsModel(model_dir="vits-piper-de_DE-thorsten-low", model_name="de_DE-thorsten-low.onnx",lang="de",),
|
||||
TtsModel(model_dir="vits-piper-de_DE-thorsten-medium", model_name="de_DE-thorsten-medium.onnx",lang="de",),
|
||||
TtsModel(model_dir="vits-piper-de_DE-thorsten-high", model_name="de_DE-thorsten-high.onnx",lang="de",),
|
||||
TtsModel(model_dir="vits-piper-de_DE-thorsten_emotional-medium", model_name="de_DE-thorsten_emotional-medium.onnx",lang="de",),
|
||||
# French (FR)
|
||||
TtsModel(model_dir="vits-piper-fr_FR-upmc-medium", model_name="fr_FR-upmc-medium.onnx",lang="fr",),
|
||||
TtsModel(model_dir="vits-piper-fr_FR-siwis-low", model_name="fr_FR-siwis-low.onnx",lang="fr",),
|
||||
TtsModel(model_dir="vits-piper-fr_FR-siwis-medium", model_name="fr_FR-siwis-medium.onnx",lang="fr",),
|
||||
|
||||
# Spanish (ES)
|
||||
TtsModel(model_dir="vits-piper-es_ES-carlfm-x_low", model_name="es_ES-carlfm-x_low.onnx",lang="es",),
|
||||
TtsModel(model_dir="vits-piper-es_ES-davefx-medium", model_name="es_ES-davefx-medium.onnx",lang="es",),
|
||||
TtsModel(model_dir="vits-piper-es_ES-mls_10246-low", model_name="es_ES-mls_10246-low.onnx",lang="es",),
|
||||
TtsModel(model_dir="vits-piper-es_ES-mls_9972-low", model_name="es_ES-mls_9972-low.onnx",lang="es",),
|
||||
TtsModel(model_dir="vits-piper-es_ES-sharvard-medium", model_name="es_ES-sharvard-medium.onnx",lang="es",),
|
||||
|
||||
# Spanish (MX)
|
||||
TtsModel(model_dir="vits-piper-es_MX-ald-medium", model_name="es_MX-ald-medium.onnx",lang="es",),
|
||||
# fmt: on
|
||||
]
|
||||
|
||||
@@ -162,7 +225,8 @@ def main():
|
||||
s = f.read()
|
||||
template = environment.from_string(s)
|
||||
d = dict()
|
||||
all_model_list = get_all_models()
|
||||
# all_model_list = get_all_models()
|
||||
all_model_list = get_piper_models()
|
||||
num_models = len(all_model_list)
|
||||
|
||||
num_per_runner = num_models // total
|
||||
|
||||
Reference in New Issue
Block a user