add more text-to-speech models from piper (#988)

2024-06-11 15:22:48 +08:00
parent fc09227cd1
commit 09efe54808
4 changed files with 81 additions and 71 deletions
--- a/README.md
+++ b/README.md
@@ -31,6 +31,7 @@ with the following APIs
  - C++, C, Python, Go, ``C#``
  - Java, Kotlin, JavaScript
  - Swift
+  - Dart

 ## Links for pre-built Android APKs

--- a/scripts/apk/generate-asr-2pass-apk-script.py
+++ b/scripts/apk/generate-asr-2pass-apk-script.py
@@ -52,13 +52,13 @@ def get_2nd_models():
            short_name="whisper_tiny",
            cmd="""
            pushd $model_name
-            rm -v tiny.en-encoder.onnx
-            rm -v tiny.en-decoder.onnx
+            rm -fv tiny.en-encoder.onnx
+            rm -fv tiny.en-decoder.onnx
            rm -rf test_wavs
-            rm -v *.py
-            rm -v requirements.txt
-            rm -v .gitignore
-            rm -v README.md
+            rm -fv *.py
+            rm -fv requirements.txt
+            rm -fv .gitignore
+            rm -fv README.md

            ls -lh

@@ -73,7 +73,7 @@ def get_2nd_models():
            cmd="""
            pushd $model_name

-            rm -v README.md
+            rm -fv README.md
            rm -rfv test_wavs
            rm model.onnx

@@ -91,7 +91,7 @@ def get_2nd_models():
            pushd $model_name

            rm -rfv test_wavs
-            rm -v README.md
+            rm -fv README.md
            mv -v data/lang_char/tokens.txt ./
            rm -rfv data/lang_char

@@ -119,15 +119,15 @@ def get_1st_models():
            short_name="zipformer",
            cmd="""
            pushd $model_name
-            rm -v decoder-epoch-99-avg-1.int8.onnx
-            rm -v encoder-epoch-99-avg-1.onnx
-            rm -v joiner-epoch-99-avg-1.onnx
+            rm -fv decoder-epoch-99-avg-1.int8.onnx
+            rm -fv encoder-epoch-99-avg-1.onnx
+            rm -fv joiner-epoch-99-avg-1.onnx

-            rm -v *.sh
-            rm -v bpe.model
-            rm -v README.md
-            rm -v .gitattributes
-            rm -v *state*
+            rm -fv *.sh
+            rm -fv bpe.model
+            rm -fv README.md
+            rm -fv .gitattributes
+            rm -fv *state*
            rm -rfv test_wavs

            ls -lh
@@ -142,12 +142,12 @@ def get_1st_models():
            short_name="zipformer2",
            cmd="""
            pushd $model_name
-            rm -v encoder-epoch-99-avg-1-chunk-16-left-128.onnx
-            rm -v decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
-            rm -v joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx
+            rm -fv encoder-epoch-99-avg-1-chunk-16-left-128.onnx
+            rm -fv decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
+            rm -fv joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx

-            rm -v README.md
-            rm -v bpe.model
+            rm -fv README.md
+            rm -fv bpe.model
            rm -rfv test_wavs

            ls -lh
@@ -162,14 +162,14 @@ def get_1st_models():
            short_name="zipformer2",
            cmd="""
            pushd $model_name
-            rm -v exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
-            rm -v exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
-            rm -v exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx
+            rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
+            rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
+            rm -fv exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx

-            rm -v data/lang_char/lexicon.txt
-            rm -v data/lang_char/words.txt
+            rm -fv data/lang_char/lexicon.txt
+            rm -fv data/lang_char/words.txt
            rm -rfv test_wavs
-            rm -v README.md
+            rm -fv README.md

            ls -lh exp/
            ls -lh data/lang_char
@@ -184,11 +184,11 @@ def get_1st_models():
            short_name="zipformer",
            cmd="""
            pushd $model_name
-            rm -v encoder-epoch-29-avg-9-with-averaged-model.onnx
-            rm -v decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
-            rm -v joiner-epoch-29-avg-9-with-averaged-model.int8.onnx
+            rm -fv encoder-epoch-29-avg-9-with-averaged-model.onnx
+            rm -fv decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
+            rm -fv joiner-epoch-29-avg-9-with-averaged-model.int8.onnx

-            rm -v *.sh
+            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

@@ -204,11 +204,11 @@ def get_1st_models():
            short_name="small_zipformer",
            cmd="""
            pushd $model_name
-            rm -v encoder-epoch-99-avg-1.onnx
-            rm -v decoder-epoch-99-avg-1.int8.onnx
-            rm -v joiner-epoch-99-avg-1.onnx
+            rm -fv encoder-epoch-99-avg-1.onnx
+            rm -fv decoder-epoch-99-avg-1.int8.onnx
+            rm -fv joiner-epoch-99-avg-1.onnx

-            rm -v *.sh
+            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

@@ -224,11 +224,11 @@ def get_1st_models():
            short_name="small_zipformer",
            cmd="""
            pushd $model_name
-            rm -v encoder-epoch-99-avg-1.onnx
-            rm -v decoder-epoch-99-avg-1.int8.onnx
-            rm -v joiner-epoch-99-avg-1.onnx
+            rm -fv encoder-epoch-99-avg-1.onnx
+            rm -fv decoder-epoch-99-avg-1.int8.onnx
+            rm -fv joiner-epoch-99-avg-1.onnx

-            rm -v *.sh
+            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

--- a/scripts/apk/generate-asr-apk-script.py
+++ b/scripts/apk/generate-asr-apk-script.py
@@ -52,15 +52,15 @@ def get_models():
            short_name="zipformer",
            cmd="""
            pushd $model_name
-            rm -v decoder-epoch-99-avg-1.int8.onnx
-            rm -v encoder-epoch-99-avg-1.onnx
-            rm -v joiner-epoch-99-avg-1.onnx
+            rm -fv decoder-epoch-99-avg-1.int8.onnx
+            rm -fv encoder-epoch-99-avg-1.onnx
+            rm -fv joiner-epoch-99-avg-1.onnx

-            rm -v *.sh
-            rm -v bpe.model
-            rm -v README.md
-            rm -v .gitattributes
-            rm -v *state*
+            rm -fv *.sh
+            rm -fv bpe.model
+            rm -fv README.md
+            rm -fv .gitattributes
+            rm -fv *state*
            rm -rfv test_wavs

            ls -lh
@@ -75,12 +75,12 @@ def get_models():
            short_name="zipformer2",
            cmd="""
            pushd $model_name
-            rm -v encoder-epoch-99-avg-1-chunk-16-left-128.onnx
-            rm -v decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
-            rm -v joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx
+            rm -fv encoder-epoch-99-avg-1-chunk-16-left-128.onnx
+            rm -fv decoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx
+            rm -fv joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx

-            rm -v README.md
-            rm -v bpe.model
+            rm -fv README.md
+            rm -fv bpe.model
            rm -rfv test_wavs

            ls -lh
@@ -95,14 +95,14 @@ def get_models():
            short_name="zipformer2",
            cmd="""
            pushd $model_name
-            rm -v exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
-            rm -v exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
-            rm -v exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx
+            rm -fv exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx
+            rm -fv exp/decoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx
+            rm -fv exp/joiner-epoch-12-avg-4-chunk-16-left-128.int8.onnx

-            rm -v data/lang_char/lexicon.txt
-            rm -v data/lang_char/words.txt
+            rm -fv data/lang_char/lexicon.txt
+            rm -fv data/lang_char/words.txt
            rm -rfv test_wavs
-            rm -v README.md
+            rm -fv README.md

            ls -lh exp/
            ls -lh data/lang_char
@@ -117,12 +117,12 @@ def get_models():
            short_name="zipformer",
            cmd="""
            pushd $model_name
-            rm -v encoder-epoch-29-avg-9-with-averaged-model.onnx
-            rm -v decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
-            rm -v joiner-epoch-29-avg-9-with-averaged-model.int8.onnx
+            rm -fv encoder-epoch-29-avg-9-with-averaged-model.onnx
+            rm -fv decoder-epoch-29-avg-9-with-averaged-model.int8.onnx
+            rm -fv joiner-epoch-29-avg-9-with-averaged-model.int8.onnx

-            rm -v *.sh
-            rm -rf test_wavs
+            rm -fv *.sh
+            rm -rfv test_wavs
            rm README.md

            ls -lh
@@ -137,11 +137,11 @@ def get_models():
            short_name="small_zipformer",
            cmd="""
            pushd $model_name
-            rm -v encoder-epoch-99-avg-1.onnx
-            rm -v decoder-epoch-99-avg-1.int8.onnx
-            rm -v joiner-epoch-99-avg-1.onnx
+            rm -fv encoder-epoch-99-avg-1.onnx
+            rm -fv decoder-epoch-99-avg-1.int8.onnx
+            rm -fv joiner-epoch-99-avg-1.onnx

-            rm -v *.sh
+            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

@@ -157,11 +157,11 @@ def get_models():
            short_name="small_zipformer",
            cmd="""
            pushd $model_name
-            rm -v encoder-epoch-99-avg-1.onnx
-            rm -v decoder-epoch-99-avg-1.int8.onnx
-            rm -v joiner-epoch-99-avg-1.onnx
+            rm -fv encoder-epoch-99-avg-1.onnx
+            rm -fv decoder-epoch-99-avg-1.int8.onnx
+            rm -fv joiner-epoch-99-avg-1.onnx

-            rm -v *.sh
+            rm -fv *.sh
            rm -rf test_wavs
            rm README.md

--- a/scripts/apk/generate-tts-apk-script.py
+++ b/scripts/apk/generate-tts-apk-script.py
@@ -103,6 +103,7 @@ def get_piper_models() -> List[TtsModel]:
        TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"),
        TtsModel(model_dir="vits-piper-ca_ES-upc_pau-x_low"),
        TtsModel(model_dir="vits-piper-cs_CZ-jirka-medium"),
+        TtsModel(model_dir="vits-piper-cy_GB-gwryw_gogleddol-medium"),
        TtsModel(model_dir="vits-piper-da_DK-talesyntese-medium"),
        TtsModel(model_dir="vits-piper-de_DE-eva_k-x_low"),
        TtsModel(model_dir="vits-piper-de_DE-karlsson-low"),
@@ -126,15 +127,19 @@ def get_piper_models() -> List[TtsModel]:
        TtsModel(model_dir="vits-piper-en_GB-semaine-medium"),
        TtsModel(model_dir="vits-piper-en_GB-southern_english_female-low"),
        TtsModel(model_dir="vits-piper-en_GB-southern_english_female-medium"),
+        TtsModel(model_dir="vits-piper-en_GB-southern_english_male-medium"),
        TtsModel(model_dir="vits-piper-en_GB-sweetbbak-amy"),
        TtsModel(model_dir="vits-piper-en_GB-vctk-medium"),
        TtsModel(model_dir="vits-piper-en_US-amy-low"),
        TtsModel(model_dir="vits-piper-en_US-amy-medium"),
        TtsModel(model_dir="vits-piper-en_US-arctic-medium"),
+        TtsModel(model_dir="vits-piper-en_US-bryce-medium"),
        TtsModel(model_dir="vits-piper-en_US-danny-low"),
        TtsModel(model_dir="vits-piper-en_US-glados"),
+        TtsModel(model_dir="vits-piper-en_US-hfc_female-medium"),
        TtsModel(model_dir="vits-piper-en_US-hfc_male-medium"),
        TtsModel(model_dir="vits-piper-en_US-joe-medium"),
+        TtsModel(model_dir="vits-piper-en_US-john-medium"),
        TtsModel(model_dir="vits-piper-en_US-kathleen-low"),
        TtsModel(model_dir="vits-piper-en_US-kristin-medium"),
        TtsModel(model_dir="vits-piper-en_US-kusal-medium"),
@@ -146,6 +151,7 @@ def get_piper_models() -> List[TtsModel]:
        TtsModel(model_dir="vits-piper-en_US-libritts_r-medium"),
        TtsModel(model_dir="vits-piper-en_US-ljspeech-high"),
        TtsModel(model_dir="vits-piper-en_US-ljspeech-medium"),
+        TtsModel(model_dir="vits-piper-en_US-norman-medium"),
        TtsModel(model_dir="vits-piper-en_US-ryan-high"),
        TtsModel(model_dir="vits-piper-en_US-ryan-low"),
        TtsModel(model_dir="vits-piper-en_US-ryan-medium"),
@@ -162,6 +168,7 @@ def get_piper_models() -> List[TtsModel]:
        #  TtsModel(model_dir="vits-piper-fr_FR-mls-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-siwis-low"),
        TtsModel(model_dir="vits-piper-fr_FR-siwis-medium"),
+        TtsModel(model_dir="vits-piper-fr_FR-tom-medium"),
        TtsModel(model_dir="vits-piper-fr_FR-upmc-medium"),
        TtsModel(model_dir="vits-piper-hu_HU-anna-medium"),
        TtsModel(model_dir="vits-piper-hu_HU-berta-medium"),
@@ -170,6 +177,7 @@ def get_piper_models() -> List[TtsModel]:
        TtsModel(model_dir="vits-piper-is_IS-salka-medium"),
        TtsModel(model_dir="vits-piper-is_IS-steinn-medium"),
        TtsModel(model_dir="vits-piper-is_IS-ugla-medium"),
+        TtsModel(model_dir="vits-piper-it_IT-paola-medium"),
        TtsModel(model_dir="vits-piper-it_IT-riccardo-x_low"),
        TtsModel(model_dir="vits-piper-ka_GE-natia-medium"),
        TtsModel(model_dir="vits-piper-kk_KZ-iseke-x_low"),
@@ -204,6 +212,7 @@ def get_piper_models() -> List[TtsModel]:
        TtsModel(model_dir="vits-piper-sw_CD-lanfrica-medium"),
        TtsModel(model_dir="vits-piper-tr_TR-dfki-medium"),
        TtsModel(model_dir="vits-piper-tr_TR-fahrettin-medium"),
+        TtsModel(model_dir="vits-piper-tr_TR-fettah-medium"),
        TtsModel(model_dir="vits-piper-uk_UA-lada-x_low"),
        TtsModel(model_dir="vits-piper-uk_UA-ukrainian_tts-medium"),
        TtsModel(model_dir="vits-piper-vi_VN-25hours_single-low"),