Add Android demo for Kokoro TTS 1.0 (#1799)
This commit is contained in:
1
.github/workflows/apk-tts-engine.yaml
vendored
1
.github/workflows/apk-tts-engine.yaml
vendored
@@ -26,7 +26,6 @@ jobs:
|
|||||||
total: ["40"]
|
total: ["40"]
|
||||||
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
|
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
|
||||||
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
|
|||||||
12
.github/workflows/export-kokoro.yaml
vendored
12
.github/workflows/export-kokoro.yaml
vendored
@@ -193,7 +193,7 @@ jobs:
|
|||||||
cp -v ../scripts/kokoro/v1.0/README.md ./README.md
|
cp -v ../scripts/kokoro/v1.0/README.md ./README.md
|
||||||
cp -v ../LICENSE ./
|
cp -v ../LICENSE ./
|
||||||
cp -av ../dict ./
|
cp -av ../dict ./
|
||||||
cp -v ../*.fst $d/
|
cp -v ../*.fst ./
|
||||||
|
|
||||||
git lfs track "*.onnx"
|
git lfs track "*.onnx"
|
||||||
git add .
|
git add .
|
||||||
@@ -206,6 +206,7 @@ jobs:
|
|||||||
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
|
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/kokoro-multi-lang-v1_0 main || true
|
||||||
|
|
||||||
- name: Release
|
- name: Release
|
||||||
|
if: github.repository_owner == 'csukuangfj'
|
||||||
uses: svenstaro/upload-release-action@v2
|
uses: svenstaro/upload-release-action@v2
|
||||||
with:
|
with:
|
||||||
file_glob: true
|
file_glob: true
|
||||||
@@ -214,3 +215,12 @@ jobs:
|
|||||||
repo_name: k2-fsa/sherpa-onnx
|
repo_name: k2-fsa/sherpa-onnx
|
||||||
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
||||||
tag: tts-models
|
tag: tts-models
|
||||||
|
|
||||||
|
- name: Release
|
||||||
|
if: github.repository_owner == 'k2-fsa'
|
||||||
|
uses: svenstaro/upload-release-action@v2
|
||||||
|
with:
|
||||||
|
file_glob: true
|
||||||
|
file: ./*.tar.bz2
|
||||||
|
overwrite: true
|
||||||
|
tag: tts-models
|
||||||
|
|||||||
@@ -281,6 +281,16 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// voices = "voices.bin"
|
// voices = "voices.bin"
|
||||||
// dataDir = "kokoro-en-v0_19/espeak-ng-data"
|
// dataDir = "kokoro-en-v0_19/espeak-ng-data"
|
||||||
|
|
||||||
|
// Example 10
|
||||||
|
// kokoro-multi-lang-v1_0
|
||||||
|
// modelDir = "kokoro-multi-lang-v1_0"
|
||||||
|
// modelName = "model.onnx"
|
||||||
|
// voices = "voices.bin"
|
||||||
|
// dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
|
||||||
|
// dictDir = "kokoro-multi-lang-v1_0/dict"
|
||||||
|
// lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
|
||||||
|
// ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
|
||||||
|
|
||||||
if (dataDir != null) {
|
if (dataDir != null) {
|
||||||
val newDir = copyDataDir(dataDir!!)
|
val newDir = copyDataDir(dataDir!!)
|
||||||
dataDir = "$newDir/$dataDir"
|
dataDir = "$newDir/$dataDir"
|
||||||
@@ -289,7 +299,9 @@ class MainActivity : AppCompatActivity() {
|
|||||||
if (dictDir != null) {
|
if (dictDir != null) {
|
||||||
val newDir = copyDataDir(dictDir!!)
|
val newDir = copyDataDir(dictDir!!)
|
||||||
dictDir = "$newDir/$dictDir"
|
dictDir = "$newDir/$dictDir"
|
||||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
if (ruleFsts == null) {
|
||||||
|
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val config = getOfflineTtsConfig(
|
val config = getOfflineTtsConfig(
|
||||||
|
|||||||
@@ -152,6 +152,20 @@ object TtsEngine {
|
|||||||
// voices = "voices.bin"
|
// voices = "voices.bin"
|
||||||
// dataDir = "kokoro-en-v0_19/espeak-ng-data"
|
// dataDir = "kokoro-en-v0_19/espeak-ng-data"
|
||||||
// lang = "eng"
|
// lang = "eng"
|
||||||
|
|
||||||
|
// Example 10
|
||||||
|
// kokoro-multi-lang-v1_0
|
||||||
|
// modelDir = "kokoro-multi-lang-v1_0"
|
||||||
|
// modelName = "model.onnx"
|
||||||
|
// voices = "voices.bin"
|
||||||
|
// dataDir = "kokoro-multi-lang-v1_0/espeak-ng-data"
|
||||||
|
// dictDir = "kokoro-multi-lang-v1_0/dict"
|
||||||
|
// lexicon = "kokoro-multi-lang-v1_0/lexicon-us-en.txt,kokoro-multi-lang-v1_0/lexicon-zh.txt"
|
||||||
|
// lang = "eng"
|
||||||
|
// ruleFsts = "$modelDir/phone-zh.fst,$modelDir/date-zh.fst,$modelDir/number-zh.fst"
|
||||||
|
//
|
||||||
|
// This model supports many languages, e.g., English, Chinese, etc.
|
||||||
|
// We set lang to eng here.
|
||||||
}
|
}
|
||||||
|
|
||||||
fun createTts(context: Context) {
|
fun createTts(context: Context) {
|
||||||
@@ -172,7 +186,9 @@ object TtsEngine {
|
|||||||
if (dictDir != null) {
|
if (dictDir != null) {
|
||||||
val newDir = copyDataDir(context, dictDir!!)
|
val newDir = copyDataDir(context, dictDir!!)
|
||||||
dictDir = "$newDir/$dictDir"
|
dictDir = "$newDir/$dictDir"
|
||||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
if (ruleFsts == null) {
|
||||||
|
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
val config = getOfflineTtsConfig(
|
val config = getOfflineTtsConfig(
|
||||||
|
|||||||
@@ -97,6 +97,11 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
|
|||||||
sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt
|
sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./TtsEngine.kt
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
{% if tts_model.lexicon %}
|
||||||
|
lexicon={{ tts_model.lexicon }}
|
||||||
|
sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./TtsEngine.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
git diff
|
git diff
|
||||||
popd
|
popd
|
||||||
|
|
||||||
@@ -104,6 +109,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
|
|||||||
lang=zh_en
|
lang=zh_en
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then
|
||||||
|
lang=zh_en
|
||||||
|
fi
|
||||||
|
|
||||||
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
log "build tts apk for $arch"
|
log "build tts apk for $arch"
|
||||||
|
|||||||
@@ -96,6 +96,11 @@ sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
|
|||||||
sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt
|
sed -i.bak s/"lexicon = null"/"lexicon = \"lexicon.txt\""/ ./MainActivity.kt
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
{% if tts_model.lexicon %}
|
||||||
|
lexicon={{ tts_model.lexicon }}
|
||||||
|
sed -i.bak s%"lexicon = null"%"lexicon = \"$lexicon\""% ./MainActivity.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
git diff
|
git diff
|
||||||
popd
|
popd
|
||||||
|
|
||||||
@@ -103,6 +108,10 @@ if [[ $model_dir == vits-melo-tts-zh_en ]]; then
|
|||||||
lang=zh_en
|
lang=zh_en
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ $model_dir == kokoro-multi-lang-v1_0 ]]; then
|
||||||
|
lang=zh_en
|
||||||
|
fi
|
||||||
|
|
||||||
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
||||||
log "------------------------------------------------------------"
|
log "------------------------------------------------------------"
|
||||||
log "build tts apk for $arch"
|
log "build tts apk for $arch"
|
||||||
|
|||||||
@@ -41,6 +41,7 @@ class TtsModel:
|
|||||||
dict_dir: Optional[str] = None
|
dict_dir: Optional[str] = None
|
||||||
is_char: bool = False
|
is_char: bool = False
|
||||||
lang_iso_639_3: str = ""
|
lang_iso_639_3: str = ""
|
||||||
|
lexicon: str = ""
|
||||||
|
|
||||||
|
|
||||||
def convert_lang_to_iso_639_3(models: List[TtsModel]):
|
def convert_lang_to_iso_639_3(models: List[TtsModel]):
|
||||||
@@ -422,7 +423,21 @@ def get_kokoro_models() -> List[TtsModel]:
|
|||||||
m.data_dir = f"{m.model_dir}/espeak-ng-data"
|
m.data_dir = f"{m.model_dir}/espeak-ng-data"
|
||||||
m.voices = "voices.bin"
|
m.voices = "voices.bin"
|
||||||
|
|
||||||
return english_models
|
multi_lingual_models = [
|
||||||
|
TtsModel(
|
||||||
|
model_dir="kokoro-multi-lang-v1_0",
|
||||||
|
model_name="model.onnx",
|
||||||
|
lang="en",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
for m in multi_lingual_models:
|
||||||
|
m.data_dir = f"{m.model_dir}/espeak-ng-data"
|
||||||
|
m.dict_dir = f"{m.model_dir}/dict"
|
||||||
|
m.voices = "voices.bin"
|
||||||
|
m.lexicon = f"{m.model_dir}/lexicon-us-en.txt,{m.model_dir}/lexicon-zh.txt"
|
||||||
|
m.rule_fsts = f"{m.model_dir}/phone-zh.fst,{m.model_dir}/date-zh.fst,{m.model_dir}/number-zh.fst"
|
||||||
|
|
||||||
|
return english_models + multi_lingual_models
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
@@ -256,7 +256,11 @@ fun getOfflineTtsConfig(
|
|||||||
voices = "$modelDir/$voices",
|
voices = "$modelDir/$voices",
|
||||||
tokens = "$modelDir/tokens.txt",
|
tokens = "$modelDir/tokens.txt",
|
||||||
dataDir = dataDir,
|
dataDir = dataDir,
|
||||||
lexicon = if ("," in lexicon) lexicon else "$modelDir/$lexicon",
|
lexicon = when {
|
||||||
|
lexicon == "" -> lexicon
|
||||||
|
"," in lexicon -> lexicon
|
||||||
|
else -> "$modelDir/$lexicon"
|
||||||
|
},
|
||||||
dictDir = dictDir,
|
dictDir = dictDir,
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
Reference in New Issue
Block a user