Add Android demo for MatchaTTS models. (#1683)
This commit is contained in:
@@ -183,6 +183,8 @@ class MainActivity : AppCompatActivity() {
|
||||
private fun initTts() {
|
||||
var modelDir: String?
|
||||
var modelName: String?
|
||||
var acousticModelName: String?
|
||||
var vocoder: String?
|
||||
var ruleFsts: String?
|
||||
var ruleFars: String?
|
||||
var lexicon: String?
|
||||
@@ -193,8 +195,18 @@ class MainActivity : AppCompatActivity() {
|
||||
// The purpose of such a design is to make the CI test easier
|
||||
// Please see
|
||||
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
|
||||
modelDir = null
|
||||
|
||||
// VITS -- begin
|
||||
modelName = null
|
||||
// VITS -- end
|
||||
|
||||
// Matcha -- begin
|
||||
acousticModelName = null
|
||||
vocoder = null
|
||||
// Matcha -- end
|
||||
|
||||
|
||||
modelDir = null
|
||||
ruleFsts = null
|
||||
ruleFars = null
|
||||
lexicon = null
|
||||
@@ -217,7 +229,6 @@ class MainActivity : AppCompatActivity() {
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
||||
// modelDir = "vits-icefall-zh-aishell3"
|
||||
// modelName = "model.onnx"
|
||||
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
|
||||
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
|
||||
// lexicon = "lexicon.txt"
|
||||
|
||||
@@ -233,24 +244,47 @@ class MainActivity : AppCompatActivity() {
|
||||
// modelDir = "vits-coqui-de-css10"
|
||||
// modelName = "model.onnx"
|
||||
|
||||
// Example 6
|
||||
// vits-melo-tts-zh_en
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
|
||||
// modelDir = "vits-melo-tts-zh_en"
|
||||
// modelName = "model.onnx"
|
||||
// lexicon = "lexicon.txt"
|
||||
// dictDir = "vits-melo-tts-zh_en/dict"
|
||||
|
||||
// Example 7
|
||||
// matcha-icefall-zh-baker
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||
// modelDir = "matcha-icefall-zh-baker"
|
||||
// acousticModelName = "model-steps-3.onnx"
|
||||
// vocoder = "hifigan_v2.onnx"
|
||||
// lexicon = "lexicon.txt"
|
||||
// dictDir = "matcha-icefall-zh-baker/dict"
|
||||
|
||||
// Example 8
|
||||
// matcha-icefall-en_US-ljspeech
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||
// modelDir = "matcha-icefall-en_US-ljspeech"
|
||||
// acousticModelName = "model-steps-3.onnx"
|
||||
// vocoder = "hifigan_v2.onnx"
|
||||
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
|
||||
|
||||
if (dataDir != null) {
|
||||
val newDir = copyDataDir(modelDir!!)
|
||||
modelDir = newDir + "/" + modelDir
|
||||
dataDir = newDir + "/" + dataDir
|
||||
assets = null
|
||||
val newDir = copyDataDir(dataDir!!)
|
||||
dataDir = "$newDir/$dataDir"
|
||||
}
|
||||
|
||||
if (dictDir != null) {
|
||||
val newDir = copyDataDir(modelDir!!)
|
||||
modelDir = newDir + "/" + modelDir
|
||||
dictDir = modelDir + "/" + "dict"
|
||||
val newDir = copyDataDir(dictDir!!)
|
||||
dictDir = "$newDir/$dictDir"
|
||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||
assets = null
|
||||
}
|
||||
|
||||
val config = getOfflineTtsConfig(
|
||||
modelDir = modelDir!!,
|
||||
modelName = modelName!!,
|
||||
modelName = modelName ?: "",
|
||||
acousticModelName = acousticModelName ?: "",
|
||||
vocoder = vocoder ?: "",
|
||||
lexicon = lexicon ?: "",
|
||||
dataDir = dataDir ?: "",
|
||||
dictDir = dictDir ?: "",
|
||||
|
||||
@@ -57,7 +57,7 @@ class MainActivity : ComponentActivity() {
|
||||
color = MaterialTheme.colorScheme.background
|
||||
) {
|
||||
Scaffold(topBar = {
|
||||
TopAppBar(title = { Text("Next-gen Kaldi: TTS") })
|
||||
TopAppBar(title = { Text("Next-gen Kaldi: TTS Engine") })
|
||||
}) {
|
||||
Box(modifier = Modifier.padding(it)) {
|
||||
Column(modifier = Modifier.padding(16.dp)) {
|
||||
@@ -65,8 +65,8 @@ class MainActivity : ComponentActivity() {
|
||||
Text("Speed " + String.format("%.1f", TtsEngine.speed))
|
||||
Slider(
|
||||
value = TtsEngine.speedState.value,
|
||||
onValueChange = {
|
||||
TtsEngine.speed = it
|
||||
onValueChange = {
|
||||
TtsEngine.speed = it
|
||||
preferenceHelper.setSpeed(it)
|
||||
},
|
||||
valueRange = 0.2F..3.0F,
|
||||
@@ -138,7 +138,9 @@ class MainActivity : ComponentActivity() {
|
||||
val filename =
|
||||
application.filesDir.absolutePath + "/generated.wav"
|
||||
val ok =
|
||||
audio.samples.isNotEmpty() && audio.save(filename)
|
||||
audio.samples.isNotEmpty() && audio.save(
|
||||
filename
|
||||
)
|
||||
|
||||
if (ok) {
|
||||
stopMediaPlayer()
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.k2fsa.sherpa.onnx.tts.engine
|
||||
|
||||
import PreferenceHelper
|
||||
import android.content.Context
|
||||
import android.content.res.AssetManager
|
||||
import android.util.Log
|
||||
@@ -11,7 +12,6 @@ import com.k2fsa.sherpa.onnx.getOfflineTtsConfig
|
||||
import java.io.File
|
||||
import java.io.FileOutputStream
|
||||
import java.io.IOException
|
||||
import PreferenceHelper
|
||||
|
||||
object TtsEngine {
|
||||
var tts: OfflineTts? = null
|
||||
@@ -41,6 +41,8 @@ object TtsEngine {
|
||||
|
||||
private var modelDir: String? = null
|
||||
private var modelName: String? = null
|
||||
private var acousticModelName: String? = null
|
||||
private var vocoder: String? = null
|
||||
private var ruleFsts: String? = null
|
||||
private var ruleFars: String? = null
|
||||
private var lexicon: String? = null
|
||||
@@ -52,8 +54,17 @@ object TtsEngine {
|
||||
// The purpose of such a design is to make the CI test easier
|
||||
// Please see
|
||||
// https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/generate-tts-apk-script.py
|
||||
modelDir = null
|
||||
//
|
||||
// For VITS -- begin
|
||||
modelName = null
|
||||
// For VITS -- end
|
||||
|
||||
// For Matcha -- begin
|
||||
acousticModelName = null
|
||||
vocoder = null
|
||||
// For Matcha -- end
|
||||
|
||||
modelDir = null
|
||||
ruleFsts = null
|
||||
ruleFars = null
|
||||
lexicon = null
|
||||
@@ -82,7 +93,6 @@ object TtsEngine {
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
||||
// modelDir = "vits-icefall-zh-aishell3"
|
||||
// modelName = "model.onnx"
|
||||
// ruleFsts = "vits-icefall-zh-aishell3/phone.fst,vits-icefall-zh-aishell3/date.fst,vits-icefall-zh-aishell3/number.fst,vits-icefall-zh-aishell3/new_heteronym.fst"
|
||||
// ruleFars = "vits-icefall-zh-aishell3/rule.far"
|
||||
// lexicon = "lexicon.txt"
|
||||
// lang = "zho"
|
||||
@@ -101,8 +111,35 @@ object TtsEngine {
|
||||
// modelDir = "vits-coqui-de-css10"
|
||||
// modelName = "model.onnx"
|
||||
// lang = "deu"
|
||||
}
|
||||
|
||||
// Example 6
|
||||
// vits-melo-tts-zh_en
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-melo-tts-zh-en-chinese-english-1-speaker
|
||||
// modelDir = "vits-melo-tts-zh_en"
|
||||
// modelName = "model.onnx"
|
||||
// lexicon = "lexicon.txt"
|
||||
// dictDir = "vits-melo-tts-zh_en/dict"
|
||||
// lang = "zho"
|
||||
|
||||
// Example 7
|
||||
// matcha-icefall-zh-baker
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||
// modelDir = "matcha-icefall-zh-baker"
|
||||
// acousticModelName = "model-steps-3.onnx"
|
||||
// vocoder = "hifigan_v2.onnx"
|
||||
// lexicon = "lexicon.txt"
|
||||
// dictDir = "matcha-icefall-zh-baker/dict"
|
||||
// lang = "zho"
|
||||
|
||||
// Example 8
|
||||
// matcha-icefall-en_US-ljspeech
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||
// modelDir = "matcha-icefall-en_US-ljspeech"
|
||||
// acousticModelName = "model-steps-3.onnx"
|
||||
// vocoder = "hifigan_v2.onnx"
|
||||
// dataDir = "matcha-icefall-en_US-ljspeech/espeak-ng-data"
|
||||
// lang = "eng"
|
||||
}
|
||||
|
||||
fun createTts(context: Context) {
|
||||
Log.i(TAG, "Init Next-gen Kaldi TTS")
|
||||
@@ -115,22 +152,22 @@ object TtsEngine {
|
||||
assets = context.assets
|
||||
|
||||
if (dataDir != null) {
|
||||
val newDir = copyDataDir(context, modelDir!!)
|
||||
modelDir = "$newDir/$modelDir"
|
||||
val newDir = copyDataDir(context, dataDir!!)
|
||||
dataDir = "$newDir/$dataDir"
|
||||
assets = null
|
||||
}
|
||||
|
||||
if (dictDir != null) {
|
||||
val newDir = copyDataDir(context, modelDir!!)
|
||||
modelDir = "$newDir/$modelDir"
|
||||
dictDir = "$modelDir/dict"
|
||||
val newDir = copyDataDir(context, dictDir!!)
|
||||
dictDir = "$newDir/$dictDir"
|
||||
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
|
||||
assets = null
|
||||
}
|
||||
|
||||
val config = getOfflineTtsConfig(
|
||||
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
|
||||
modelDir = modelDir!!,
|
||||
modelName = modelName ?: "",
|
||||
acousticModelName = acousticModelName ?: "",
|
||||
vocoder = vocoder ?: "",
|
||||
lexicon = lexicon ?: "",
|
||||
dataDir = dataDir ?: "",
|
||||
dictDir = dictDir ?: "",
|
||||
ruleFsts = ruleFsts ?: "",
|
||||
|
||||
Reference in New Issue
Block a user