Add Android demo for MatchaTTS models. (#1683)
This commit is contained in:
1
.github/workflows/apk-tts-engine.yaml
vendored
1
.github/workflows/apk-tts-engine.yaml
vendored
@@ -26,6 +26,7 @@ jobs:
|
|||||||
total: ["40"]
|
total: ["40"]
|
||||||
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
|
index: ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39"]
|
||||||
|
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
|
|||||||
@@ -183,6 +183,8 @@ class MainActivity : AppCompatActivity() {
|
|||||||
private fun initTts() {
|
private fun initTts() {
|
||||||
var modelDir: String?
|
var modelDir: String?
|
||||||
var modelName: String?
|
var modelName: String?
|
||||||
|
var acousticModelName: String?
|
||||||
|
var vocoder: String?
|
||||||
var ruleFsts: String?
|
var ruleFsts: String?
|
||||||
var ruleFars: String?
|
var ruleFars: String?
|
||||||
var lexicon: String?
|
var lexicon: String?
|
||||||
@@ -193,8 +195,18 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// The purpose of such a design is to make the CI test easier
|
// The purpose of such a design is to make the CI test easier
|
||||||
// Please see
|
// Please see
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
|
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
|
||||||
modelDir = null
|
|
||||||
|
// VITS -- begin
|
||||||
modelName = null
|
modelName = null
|
||||||
|
// VITS -- end
|
||||||
|
|
||||||
|
// Matcha -- begin
|
||||||
|
acousticModelName = null
|
||||||
|
vocoder = null
|
||||||
|
// Matcha -- end
|
||||||
|
|
||||||
|
|
||||||
|
modelDir = null
|
||||||
ruleFsts = null
|
ruleFsts = null
|
||||||
ruleFars = null
|
ruleFars = null
|
||||||
lexicon = null
|
lexicon = null
|
||||||
@@ -217,7 +229,6 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
||||||
// modelDir = "vits-icefall-zh-aishell3"
|
// modelDir = "vits-icefall-zh-aishell3"
|
||||||
// modelName = "model.onnx"
|
// modelName = "model.onnx"
|
||||||
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
|
|
||||||
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
|
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
|
||||||
// lexicon = "lexicon.txt"
|
// lexicon = "lexicon.txt"
|
||||||
|
|
||||||
@@ -233,24 +244,47 @@ class MainActivity : AppCompatActivity() {
|
|||||||
// modelDir = "vits-coqui-de-css10"
|
// modelDir = "vits-coqui-de-css10"
|
||||||
// modelName = "model.onnx"
|
// modelName = "model.onnx"
|
||||||
|
|
||||||
|
// Example 6
|
||||||
|
// vits-melo-tts-zh_en
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
|
||||||
|
// modelDir = "vits-melo-tts-zh_en"
|
||||||
|
// modelName = "model.onnx"
|
||||||
|
// lexicon = "lexicon.txt"
|
||||||
|
// dictDir = "vits-melo-tts-zh_en/dict"
|
||||||
|
|
||||||
|
// Example 7
|
||||||
|
// matcha-icefall-zh-baker
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||||
|
// modelDir = "matcha-icefall-zh-baker"
|
||||||
|
// acousticModelName = "model-steps-3.onnx"
|
||||||
|
// vocoder = "hifigan_v2.onnx"
|
||||||
|
// lexicon = "lexicon.txt"
|
||||||
|
// dictDir = "matcha-icefall-zh-baker/dict"
|
||||||
|
|
||||||
|
// Example 8
|
||||||
|
// matcha-icefall-en_US-ljspeech
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
// modelDir = "matcha-icefall-en_US-ljspeech"
|
||||||
|
// acousticModelName = "model-steps-3.onnx"
|
||||||
|
// vocoder = "hifigan_v2.onnx"
|
||||||
|
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
|
||||||
|
|
||||||
if (dataDir != null) {
|
if (dataDir != null) {
|
||||||
val newDir = copyDataDir(modelDir!!)
|
val newDir = copyDataDir(dataDir!!)
|
||||||
modelDir = newDir + "/" + modelDir
|
dataDir = "$newDir/$dataDir"
|
||||||
dataDir = newDir + "/" + dataDir
|
|
||||||
assets = null
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictDir != null) {
|
if (dictDir != null) {
|
||||||
val newDir = copyDataDir(modelDir!!)
|
val newDir = copyDataDir(dictDir!!)
|
||||||
modelDir = newDir + "/" + modelDir
|
dictDir = "$newDir/$dictDir"
|
||||||
dictDir = modelDir + "/" + "dict"
|
|
||||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||||
assets = null
|
|
||||||
}
|
}
|
||||||
|
|
||||||
val config = getOfflineTtsConfig(
|
val config = getOfflineTtsConfig(
|
||||||
modelDir = modelDir!!,
|
modelDir = modelDir!!,
|
||||||
modelName = modelName!!,
|
modelName = modelName ?: "",
|
||||||
|
acousticModelName = acousticModelName ?: "",
|
||||||
|
vocoder = vocoder ?: "",
|
||||||
lexicon = lexicon ?: "",
|
lexicon = lexicon ?: "",
|
||||||
dataDir = dataDir ?: "",
|
dataDir = dataDir ?: "",
|
||||||
dictDir = dictDir ?: "",
|
dictDir = dictDir ?: "",
|
||||||
|
|||||||
@@ -57,7 +57,7 @@ class MainActivity : ComponentActivity() {
|
|||||||
color = MaterialTheme.colorScheme.background
|
color = MaterialTheme.colorScheme.background
|
||||||
) {
|
) {
|
||||||
Scaffold(topBar = {
|
Scaffold(topBar = {
|
||||||
TopAppBar(title = { Text("Next-gen Kaldi: TTS") })
|
TopAppBar(title = { Text("Next-gen Kaldi: TTS Engine") })
|
||||||
}) {
|
}) {
|
||||||
Box(modifier = Modifier.padding(it)) {
|
Box(modifier = Modifier.padding(it)) {
|
||||||
Column(modifier = Modifier.padding(16.dp)) {
|
Column(modifier = Modifier.padding(16.dp)) {
|
||||||
@@ -65,8 +65,8 @@ class MainActivity : ComponentActivity() {
|
|||||||
Text("Speed " + String.format("%.1f", TtsEngine.speed))
|
Text("Speed " + String.format("%.1f", TtsEngine.speed))
|
||||||
Slider(
|
Slider(
|
||||||
value = TtsEngine.speedState.value,
|
value = TtsEngine.speedState.value,
|
||||||
onValueChange = {
|
onValueChange = {
|
||||||
TtsEngine.speed = it
|
TtsEngine.speed = it
|
||||||
preferenceHelper.setSpeed(it)
|
preferenceHelper.setSpeed(it)
|
||||||
},
|
},
|
||||||
valueRange = 0.2F..3.0F,
|
valueRange = 0.2F..3.0F,
|
||||||
@@ -138,7 +138,9 @@ class MainActivity : ComponentActivity() {
|
|||||||
val filename =
|
val filename =
|
||||||
application.filesDir.absolutePath + "/generated.wav"
|
application.filesDir.absolutePath + "/generated.wav"
|
||||||
val ok =
|
val ok =
|
||||||
audio.samples.isNotEmpty() && audio.save(filename)
|
audio.samples.isNotEmpty() && audio.save(
|
||||||
|
filename
|
||||||
|
)
|
||||||
|
|
||||||
if (ok) {
|
if (ok) {
|
||||||
stopMediaPlayer()
|
stopMediaPlayer()
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
package com.k2fsa.sherpa.onnx.tts.engine
|
package com.k2fsa.sherpa.onnx.tts.engine
|
||||||
|
|
||||||
|
import PreferenceHelper
|
||||||
import android.content.Context
|
import android.content.Context
|
||||||
import android.content.res.AssetManager
|
import android.content.res.AssetManager
|
||||||
import android.util.Log
|
import android.util.Log
|
||||||
@@ -11,7 +12,6 @@ import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
|
|||||||
import java.io.File
|
import java.io.File
|
||||||
import java.io.FileOutputStream
|
import java.io.FileOutputStream
|
||||||
import java.io.IOException
|
import java.io.IOException
|
||||||
import PreferenceHelper
|
|
||||||
|
|
||||||
object TtsEngine {
|
object TtsEngine {
|
||||||
var tts: OfflineTts? = null
|
var tts: OfflineTts? = null
|
||||||
@@ -41,6 +41,8 @@ object TtsEngine {
|
|||||||
|
|
||||||
private var modelDir: String? = null
|
private var modelDir: String? = null
|
||||||
private var modelName: String? = null
|
private var modelName: String? = null
|
||||||
|
private var acousticModelName: String? = null
|
||||||
|
private var vocoder: String? = null
|
||||||
private var ruleFsts: String? = null
|
private var ruleFsts: String? = null
|
||||||
private var ruleFars: String? = null
|
private var ruleFars: String? = null
|
||||||
private var lexicon: String? = null
|
private var lexicon: String? = null
|
||||||
@@ -52,8 +54,17 @@ object TtsEngine {
|
|||||||
// The purpose of such a design is to make the CI test easier
|
// The purpose of such a design is to make the CI test easier
|
||||||
// Please see
|
// Please see
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
|
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
|
||||||
modelDir = null
|
//
|
||||||
|
// For VITS -- begin
|
||||||
modelName = null
|
modelName = null
|
||||||
|
// For VITS -- end
|
||||||
|
|
||||||
|
// For Matcha -- begin
|
||||||
|
acousticModelName = null
|
||||||
|
vocoder = null
|
||||||
|
// For Matcha -- end
|
||||||
|
|
||||||
|
modelDir = null
|
||||||
ruleFsts = null
|
ruleFsts = null
|
||||||
ruleFars = null
|
ruleFars = null
|
||||||
lexicon = null
|
lexicon = null
|
||||||
@@ -82,7 +93,6 @@ object TtsEngine {
|
|||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
||||||
// modelDir = "vits-icefall-zh-aishell3"
|
// modelDir = "vits-icefall-zh-aishell3"
|
||||||
// modelName = "model.onnx"
|
// modelName = "model.onnx"
|
||||||
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
|
|
||||||
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
|
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
|
||||||
// lexicon = "lexicon.txt"
|
// lexicon = "lexicon.txt"
|
||||||
// lang = "zho"
|
// lang = "zho"
|
||||||
@@ -101,8 +111,35 @@ object TtsEngine {
|
|||||||
// modelDir = "vits-coqui-de-css10"
|
// modelDir = "vits-coqui-de-css10"
|
||||||
// modelName = "model.onnx"
|
// modelName = "model.onnx"
|
||||||
// lang = "deu"
|
// lang = "deu"
|
||||||
}
|
|
||||||
|
|
||||||
|
// Example 6
|
||||||
|
// vits-melo-tts-zh_en
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
|
||||||
|
// modelDir = "vits-melo-tts-zh_en"
|
||||||
|
// modelName = "model.onnx"
|
||||||
|
// lexicon = "lexicon.txt"
|
||||||
|
// dictDir = "vits-melo-tts-zh_en/dict"
|
||||||
|
// lang = "zho"
|
||||||
|
|
||||||
|
// Example 7
|
||||||
|
// matcha-icefall-zh-baker
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||||
|
// modelDir = "matcha-icefall-zh-baker"
|
||||||
|
// acousticModelName = "model-steps-3.onnx"
|
||||||
|
// vocoder = "hifigan_v2.onnx"
|
||||||
|
// lexicon = "lexicon.txt"
|
||||||
|
// dictDir = "matcha-icefall-zh-baker/dict"
|
||||||
|
// lang = "zho"
|
||||||
|
|
||||||
|
// Example 8
|
||||||
|
// matcha-icefall-en_US-ljspeech
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
// modelDir = "matcha-icefall-en_US-ljspeech"
|
||||||
|
// acousticModelName = "model-steps-3.onnx"
|
||||||
|
// vocoder = "hifigan_v2.onnx"
|
||||||
|
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
|
||||||
|
// lang = "eng"
|
||||||
|
}
|
||||||
|
|
||||||
fun createTts(context: Context) {
|
fun createTts(context: Context) {
|
||||||
Log.i(TAG, "Init Next-gen Kaldi TTS")
|
Log.i(TAG, "Init Next-gen Kaldi TTS")
|
||||||
@@ -115,22 +152,22 @@ object TtsEngine {
|
|||||||
assets = context.assets
|
assets = context.assets
|
||||||
|
|
||||||
if (dataDir != null) {
|
if (dataDir != null) {
|
||||||
val newDir = copyDataDir(context, modelDir!!)
|
val newDir = copyDataDir(context, dataDir!!)
|
||||||
modelDir = "$newDir/$modelDir"
|
|
||||||
dataDir = "$newDir/$dataDir"
|
dataDir = "$newDir/$dataDir"
|
||||||
assets = null
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dictDir != null) {
|
if (dictDir != null) {
|
||||||
val newDir = copyDataDir(context, modelDir!!)
|
val newDir = copyDataDir(context, dictDir!!)
|
||||||
modelDir = "$newDir/$modelDir"
|
dictDir = "$newDir/$dictDir"
|
||||||
dictDir = "$modelDir/dict"
|
|
||||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||||
assets = null
|
|
||||||
}
|
}
|
||||||
|
|
||||||
val config = getOfflineTtsConfig(
|
val config = getOfflineTtsConfig(
|
||||||
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
|
modelDir = modelDir!!,
|
||||||
|
modelName = modelName ?: "",
|
||||||
|
acousticModelName = acousticModelName ?: "",
|
||||||
|
vocoder = vocoder ?: "",
|
||||||
|
lexicon = lexicon ?: "",
|
||||||
dataDir = dataDir ?: "",
|
dataDir = dataDir ?: "",
|
||||||
dictDir = dictDir ?: "",
|
dictDir = dictDir ?: "",
|
||||||
ruleFsts = ruleFsts ?: "",
|
ruleFsts = ruleFsts ?: "",
|
||||||
|
|||||||
@@ -37,6 +37,8 @@ mkdir -p apks
|
|||||||
pushd ./android/SherpaOnnxTtsEngine/app/src/main/assets/
|
pushd ./android/SherpaOnnxTtsEngine/app/src/main/assets/
|
||||||
model_dir={{ tts_model.model_dir }}
|
model_dir={{ tts_model.model_dir }}
|
||||||
model_name={{ tts_model.model_name }}
|
model_name={{ tts_model.model_name }}
|
||||||
|
acoustic_model_name={{ tts_model.acoustic_model_name }}
|
||||||
|
vocoder={{ tts_model.vocoder }}
|
||||||
lang={{ tts_model.lang }}
|
lang={{ tts_model.lang }}
|
||||||
lang_iso_639_3={{ tts_model.lang_iso_639_3 }}
|
lang_iso_639_3={{ tts_model.lang_iso_639_3 }}
|
||||||
|
|
||||||
@@ -44,15 +46,30 @@ wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$mod
|
|||||||
tar xf $model_dir.tar.bz2
|
tar xf $model_dir.tar.bz2
|
||||||
rm $model_dir.tar.bz2
|
rm $model_dir.tar.bz2
|
||||||
|
|
||||||
|
{% if tts_model.vocoder %}
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
popd
|
popd
|
||||||
# Now we are at the project root directory
|
# Now we are at the project root directory
|
||||||
|
|
||||||
git checkout .
|
git checkout .
|
||||||
pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine
|
pushd android/SherpaOnnxTtsEngine/app/src/main/java/com/k2fsa/sherpa/onnx/tts/engine
|
||||||
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt
|
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./TtsEngine.kt
|
||||||
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
|
|
||||||
sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
|
sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
|
||||||
|
|
||||||
|
{% if tts_model.model_name %}
|
||||||
|
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./TtsEngine.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if tts_model.model_name %}
|
||||||
|
sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./TtsEngine.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if tts_model.vocoder %}
|
||||||
|
sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./TtsEngine.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
{% if tts_model.rule_fsts %}
|
{% if tts_model.rule_fsts %}
|
||||||
rule_fsts={{ tts_model.rule_fsts }}
|
rule_fsts={{ tts_model.rule_fsts }}
|
||||||
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt
|
sed -i.bak s%"ruleFsts = null"%"ruleFsts = \"$rule_fsts\""% ./TtsEngine.kt
|
||||||
@@ -109,6 +126,7 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
rm -rf ./android/SherpaOnnxTtsEngine/app/src/main/assets/$model_dir
|
rm -rf ./android/SherpaOnnxTtsEngine/app/src/main/assets/$model_dir
|
||||||
|
rm -fv ./android/SherpaOnnxTtsEngine/app/src/main/assets/*.onnx
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
git checkout .
|
git checkout .
|
||||||
|
|||||||
@@ -37,19 +37,38 @@ mkdir -p apks
|
|||||||
pushd ./android/SherpaOnnxTts/app/src/main/assets/
|
pushd ./android/SherpaOnnxTts/app/src/main/assets/
|
||||||
model_dir={{ tts_model.model_dir }}
|
model_dir={{ tts_model.model_dir }}
|
||||||
model_name={{ tts_model.model_name }}
|
model_name={{ tts_model.model_name }}
|
||||||
|
acoustic_model_name={{ tts_model.acoustic_model_name }}
|
||||||
|
vocoder={{ tts_model.vocoder }}
|
||||||
lang={{ tts_model.lang }}
|
lang={{ tts_model.lang }}
|
||||||
|
|
||||||
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
|
wget -qq https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/$model_dir.tar.bz2
|
||||||
tar xf $model_dir.tar.bz2
|
tar xf $model_dir.tar.bz2
|
||||||
rm $model_dir.tar.bz2
|
rm $model_dir.tar.bz2
|
||||||
|
|
||||||
|
{% if tts_model.vocoder %}
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/vocoder-models/$vocoder
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
popd
|
popd
|
||||||
# Now we are at the project root directory
|
# Now we are at the project root directory
|
||||||
|
|
||||||
git checkout .
|
git checkout .
|
||||||
pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
|
pushd android/SherpaOnnxTts/app/src/main/java/com/k2fsa/sherpa/onnx
|
||||||
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
|
sed -i.bak s/"modelDir = null"/"modelDir = \"$model_dir\""/ ./MainActivity.kt
|
||||||
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
|
|
||||||
|
|
||||||
|
{% if tts_model.model_name %}
|
||||||
|
sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if tts_model.acoustic_model_name %}
|
||||||
|
sed -i.bak s/"acousticModelName = null"/"acousticModelName = \"$acoustic_model_name\""/ ./MainActivity.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if tts_model.vocoder %}
|
||||||
|
sed -i.bak s/"vocoder = null"/"vocoder = \"$vocoder\""/ ./MainActivity.kt
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
|
||||||
{% if tts_model.rule_fsts %}
|
{% if tts_model.rule_fsts %}
|
||||||
rule_fsts={{ tts_model.rule_fsts }}
|
rule_fsts={{ tts_model.rule_fsts }}
|
||||||
@@ -107,6 +126,8 @@ for arch in arm64-v8a armeabi-v7a x86_64 x86; do
|
|||||||
done
|
done
|
||||||
|
|
||||||
rm -rf ./android/SherpaOnnxTts/app/src/main/assets/$model_dir
|
rm -rf ./android/SherpaOnnxTts/app/src/main/assets/$model_dir
|
||||||
|
rm -fv ./android/SherpaOnnxTts/app/src/main/assets/*.onnx
|
||||||
|
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
|
|
||||||
git checkout .
|
git checkout .
|
||||||
|
|||||||
@@ -30,7 +30,9 @@ def get_args():
|
|||||||
@dataclass
|
@dataclass
|
||||||
class TtsModel:
|
class TtsModel:
|
||||||
model_dir: str
|
model_dir: str
|
||||||
model_name: str = ""
|
model_name: str = "" # for vits
|
||||||
|
acoustic_model_name: str = "" # for matcha
|
||||||
|
vocoder: str = "" # for matcha
|
||||||
lang: str = "" # en, zh, fr, de, etc.
|
lang: str = "" # en, zh, fr, de, etc.
|
||||||
rule_fsts: Optional[List[str]] = None
|
rule_fsts: Optional[List[str]] = None
|
||||||
rule_fars: Optional[List[str]] = None
|
rule_fars: Optional[List[str]] = None
|
||||||
@@ -378,6 +380,35 @@ def get_vits_models() -> List[TtsModel]:
|
|||||||
return all_models
|
return all_models
|
||||||
|
|
||||||
|
|
||||||
|
def get_matcha_models() -> List[TtsModel]:
|
||||||
|
chinese_models = [
|
||||||
|
TtsModel(
|
||||||
|
model_dir="matcha-icefall-zh-baker",
|
||||||
|
acoustic_model_name="model-steps-3.onnx",
|
||||||
|
lang="zh",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
rule_fsts = ["phone.fst", "date.fst", "number.fst"]
|
||||||
|
for m in chinese_models:
|
||||||
|
s = [f"{m.model_dir}/{r}" for r in rule_fsts]
|
||||||
|
m.rule_fsts = ",".join(s)
|
||||||
|
m.dict_dir = m.model_dir + "/dict"
|
||||||
|
m.vocoder = "hifigan_v2.onnx"
|
||||||
|
|
||||||
|
english_models = [
|
||||||
|
TtsModel(
|
||||||
|
model_dir="matcha-icefall-en_US-ljspeech",
|
||||||
|
acoustic_model_name="model-steps-3.onnx",
|
||||||
|
lang="en",
|
||||||
|
)
|
||||||
|
]
|
||||||
|
for m in english_models:
|
||||||
|
m.data_dir = f"{m.model_dir}/espeak-ng-data"
|
||||||
|
m.vocoder = "hifigan_v2.onnx"
|
||||||
|
|
||||||
|
return chinese_models + english_models
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
args = get_args()
|
args = get_args()
|
||||||
index = args.index
|
index = args.index
|
||||||
@@ -389,7 +420,10 @@ def main():
|
|||||||
all_model_list += get_piper_models()
|
all_model_list += get_piper_models()
|
||||||
all_model_list += get_mimic3_models()
|
all_model_list += get_mimic3_models()
|
||||||
all_model_list += get_coqui_models()
|
all_model_list += get_coqui_models()
|
||||||
|
all_model_list += get_matcha_models()
|
||||||
|
|
||||||
convert_lang_to_iso_639_3(all_model_list)
|
convert_lang_to_iso_639_3(all_model_list)
|
||||||
|
print(all_model_list)
|
||||||
|
|
||||||
num_models = len(all_model_list)
|
num_models = len(all_model_list)
|
||||||
|
|
||||||
|
|||||||
@@ -348,6 +348,10 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
|||||||
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
|
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
|
||||||
config_.model.vits.dict_dir, model_->GetMetaData(),
|
config_.model.vits.dict_dir, model_->GetMetaData(),
|
||||||
config_.model.debug);
|
config_.model.debug);
|
||||||
|
} else if (meta_data.jieba && !config_.model.vits.dict_dir.empty()) {
|
||||||
|
frontend_ = std::make_unique<JiebaLexicon>(
|
||||||
|
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
|
||||||
|
config_.model.vits.dict_dir, config_.model.debug);
|
||||||
} else if (meta_data.is_melo_tts && meta_data.language == "English") {
|
} else if (meta_data.is_melo_tts && meta_data.language == "English") {
|
||||||
frontend_ = std::make_unique<MeloTtsLexicon>(
|
frontend_ = std::make_unique<MeloTtsLexicon>(
|
||||||
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
|
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
|
||||||
|
|||||||
@@ -173,22 +173,55 @@ class OfflineTts(
|
|||||||
// to download models
|
// to download models
|
||||||
fun getOfflineTtsConfig(
|
fun getOfflineTtsConfig(
|
||||||
modelDir: String,
|
modelDir: String,
|
||||||
modelName: String,
|
modelName: String, // for VITS
|
||||||
|
acousticModelName: String, // for Matcha
|
||||||
|
vocoder: String, // for Matcha
|
||||||
lexicon: String,
|
lexicon: String,
|
||||||
dataDir: String,
|
dataDir: String,
|
||||||
dictDir: String,
|
dictDir: String,
|
||||||
ruleFsts: String,
|
ruleFsts: String,
|
||||||
ruleFars: String
|
ruleFars: String
|
||||||
): OfflineTtsConfig {
|
): OfflineTtsConfig {
|
||||||
|
if (modelName.isEmpty() && acousticModelName.isEmpty()) {
|
||||||
|
throw IllegalArgumentException("Please specify a TTS model")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (modelName.isNotEmpty() && acousticModelName.isNotEmpty()) {
|
||||||
|
throw IllegalArgumentException("Please specify either a VITS or a Matcha model, but not both")
|
||||||
|
}
|
||||||
|
|
||||||
|
if (acousticModelName.isNotEmpty() && vocoder.isEmpty()) {
|
||||||
|
throw IllegalArgumentException("Please provide vocoder for Matcha TTS")
|
||||||
|
}
|
||||||
|
val vits = if (modelName.isNotEmpty()) {
|
||||||
|
OfflineTtsVitsModelConfig(
|
||||||
|
model = "$modelDir/$modelName",
|
||||||
|
lexicon = "$modelDir/$lexicon",
|
||||||
|
tokens = "$modelDir/tokens.txt",
|
||||||
|
dataDir = dataDir,
|
||||||
|
dictDir = dictDir,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
OfflineTtsVitsModelConfig()
|
||||||
|
}
|
||||||
|
|
||||||
|
val matcha = if (acousticModelName.isNotEmpty()) {
|
||||||
|
OfflineTtsMatchaModelConfig(
|
||||||
|
acousticModel = "$modelDir/$acousticModelName",
|
||||||
|
vocoder = vocoder,
|
||||||
|
lexicon = "$modelDir/$lexicon",
|
||||||
|
tokens = "$modelDir/tokens.txt",
|
||||||
|
dictDir = dictDir,
|
||||||
|
dataDir = dataDir,
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
OfflineTtsMatchaModelConfig()
|
||||||
|
}
|
||||||
|
|
||||||
return OfflineTtsConfig(
|
return OfflineTtsConfig(
|
||||||
model = OfflineTtsModelConfig(
|
model = OfflineTtsModelConfig(
|
||||||
vits = OfflineTtsVitsModelConfig(
|
vits = vits,
|
||||||
model = "$modelDir/$modelName",
|
matcha = matcha,
|
||||||
lexicon = "$modelDir/$lexicon",
|
|
||||||
tokens = "$modelDir/tokens.txt",
|
|
||||||
dataDir = dataDir,
|
|
||||||
dictDir = dictDir,
|
|
||||||
),
|
|
||||||
numThreads = 2,
|
numThreads = 2,
|
||||||
debug = true,
|
debug = true,
|
||||||
provider = "cpu",
|
provider = "cpu",
|
||||||
|
|||||||
Reference in New Issue
Block a user