Add Android demo for Kokoro TTS 1.0 (#1799)
This commit is contained in:
1
.github/workflows/apk-tts-engine.yaml
vendored
1
.github/workflows/apk-tts-engine.yaml
vendored
@@ -26,7 +26,6 @@ jobs:
|
||||
total: ["40"]
|
||||
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
|
||||
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
|
||||
12
.github/workflows/export-kokoro.yaml
vendored
12
.github/workflows/export-kokoro.yaml
vendored
@@ -193,7 +193,7 @@ jobs:
|
||||
cp -v ../scripts/kokoro/v1.0/README.md ./README.md
|
||||
cp -v ../LICENSE ./
|
||||
cp -av ../dict ./
|
||||
cp -v ../*.fst $d/
|
||||
cp -v ../*.fst ./
|
||||
|
||||
git lfs track "*.onnx"
|
||||
git add .
|
||||
@@ -206,6 +206,7 @@ jobs:
|
||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
|
||||
|
||||
- name: Release
|
||||
if: github.repository_owner == 'csukuangfj'
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
@@ -214,3 +215,12 @@ jobs:
|
||||
repo_name: k2-fsa/sherpa-onnx
|
||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||
tag: tts-models
|
||||
|
||||
- name: Release
|
||||
if: github.repository_owner == 'k2-fsa'
|
||||
uses: svenstaro/upload-release-action@v2
|
||||
with:
|
||||
file_glob: true
|
||||
file: ./*.tar.bz2
|
||||
overwrite: true
|
||||
tag: tts-models
|
||||
|
||||
@@ -281,6 +281,16 @@ class MainActivity : AppCompatActivity() {
|
||||
// voices = "voices.bin"
|
||||
// dataDir = "kokoro-en-v0_19/espeak-ng-data"
|
||||
|
||||
// Example 10
|
||||
// kokoro-multi-lang-v1_0
|
||||
// modelDir = "kokoro-multi-lang-v1_0"
|
||||
// modelName = "model.onnx"
|
||||
// voices = "voices.bin"
|
||||
// dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
|
||||
// dictDir = "kokoro-multi-lang-v1_0/dict"
|
||||
// lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
|
||||
// ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
|
||||
|
||||
if (dataDir != null) {
|
||||
val newDir = copyDataDir(dataDir!!)
|
||||
dataDir = "$newDir/$dataDir"
|
||||
@@ -289,7 +299,9 @@ class MainActivity : AppCompatActivity() {
|
||||
if (dictDir != null) {
|
||||
val newDir = copyDataDir(dictDir!!)
|
||||
dictDir = "$newDir/$dictDir"
|
||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||
if (ruleFsts == null) {
|
||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||
}
|
||||
}
|
||||
|
||||
val config = getOfflineTtsConfig(
|
||||
|
||||
@@ -152,6 +152,20 @@ object TtsEngine {
|
||||
// voices = "voices.bin"
|
||||
// dataDir = "kokoro-en-v0_19/espeak-ng-data"
|
||||
// lang = "eng"
|
||||
|
||||
// Example 10
|
||||
// kokoro-multi-lang-v1_0
|
||||
// modelDir = "kokoro-multi-lang-v1_0"
|
||||
// modelName = "model.onnx"
|
||||
// voices = "voices.bin"
|
||||
// dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
|
||||
// dictDir = "kokoro-multi-lang-v1_0/dict"
|
||||
// lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
|
||||
// lang = "eng"
|
||||
// ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
|
||||
//
|
||||
// This model supports many languages, e.g., English, Chinese, etc.
|
||||
// We set lang to eng here.
|
||||
}
|
||||
|
||||
fun createTts(context: Context) {
|
||||
@@ -172,7 +186,9 @@ object TtsEngine {
|
||||
if (dictDir != null) {
|
||||
val newDir = copyDataDir(context, dictDir!!)
|
||||
dictDir = "$newDir/$dictDir"
|
||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||
if (ruleFsts == null) {
|
||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||
}
|
||||
}
|
||||
|
||||
val config = getOfflineTtsConfig(
|
||||
|
||||
@@ -97,6 +97,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
|
||||
sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt
|
||||
{% endif %}
|
||||
|
||||
{% if tts_model.lexicon %}
|
||||
lexicon={{ tts_model.lexicon }}
|
||||
sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./TtsEngine.kt
|
||||
{% endif %}
|
||||
|
||||
git diff
|
||||
popd
|
||||
|
||||
@@ -104,6 +109,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
|
||||
lang=zh_en
|
||||
fi
|
||||
|
||||
if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then
|
||||
lang=zh_en
|
||||
fi
|
||||
|
||||
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
||||
log "------------------------------------------------------------"
|
||||
log "build tts apk for $arch"
|
||||
|
||||
@@ -96,6 +96,11 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
|
||||
sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt
|
||||
{% endif %}
|
||||
|
||||
{% if tts_model.lexicon %}
|
||||
lexicon={{ tts_model.lexicon }}
|
||||
sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt
|
||||
{% endif %}
|
||||
|
||||
git diff
|
||||
popd
|
||||
|
||||
@@ -103,6 +108,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
|
||||
lang=zh_en
|
||||
fi
|
||||
|
||||
if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then
|
||||
lang=zh_en
|
||||
fi
|
||||
|
||||
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
||||
log "------------------------------------------------------------"
|
||||
log "build tts apk for $arch"
|
||||
|
||||
@@ -41,6 +41,7 @@ class TtsModel:
|
||||
dict_dir: Optional[str] = None
|
||||
is_char: bool = False
|
||||
lang_iso_639_3: str = ""
|
||||
lexicon: str = ""
|
||||
|
||||
|
||||
def convert_lang_to_iso_639_3(models: List[TtsModel]):
|
||||
@@ -422,7 +423,21 @@ def get_kokoro_models() -> List[TtsModel]:
|
||||
m.data_dir = f"{m.model_dir}/espeak-ng-data"
|
||||
m.voices = "voices.bin"
|
||||
|
||||
return english_models
|
||||
multi_lingual_models = [
|
||||
TtsModel(
|
||||
model_dir="kokoro-multi-lang-v1_0",
|
||||
model_name="model.onnx",
|
||||
lang="en",
|
||||
)
|
||||
]
|
||||
for m in multi_lingual_models:
|
||||
m.data_dir = f"{m.model_dir}/espeak-ng-data"
|
||||
m.dict_dir = f"{m.model_dir}/dict"
|
||||
m.voices = "voices.bin"
|
||||
m.lexicon = f"{m.model_dir}/lexicon-us-en.txt,{m.model_dir}/lexicon-zh.txt"
|
||||
m.rule_fsts = f"{m.model_dir}/phone-zh.fst,{m.model_dir}/date-zh.fst,{m.model_dir}/number-zh.fst"
|
||||
|
||||
return english_models + multi_lingual_models
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
@@ -256,7 +256,11 @@ fun getOfflineTtsConfig(
|
||||
voices = "$modelDir/$voices",
|
||||
tokens = "$modelDir/tokens.txt",
|
||||
dataDir = dataDir,
|
||||
lexicon = if ("," in lexicon) lexicon else "$modelDir/$lexicon",
|
||||
lexicon = when {
|
||||
lexicon == "" -> lexicon
|
||||
"," in lexicon -> lexicon
|
||||
else -> "$modelDir/$lexicon"
|
||||
},
|
||||
dictDir = dictDir,
|
||||
)
|
||||
} else {
|
||||
|
||||
Reference in New Issue
Block a user