Refactor TTS Android code to support jieba for Chinese TTS models (#800)

This commit is contained in:
Fangjun Kuang
2024-04-22 17:21:05 +08:00
committed by GitHub
parent 494cb5c733
commit 7f3b9ffe5d
40 changed files with 352 additions and 285 deletions

View File

@@ -158,6 +158,7 @@ class MainActivity : AppCompatActivity() {
var ruleFars: String?
var lexicon: String?
var dataDir: String?
var dictDir: String?
var assets: AssetManager? = application.assets
// The purpose of such a design is to make the CI test easier
@@ -169,6 +170,7 @@ class MainActivity : AppCompatActivity() {
ruleFars = null
lexicon = null
dataDir = null
dictDir = null
// Example 1:
// modelDir = "vits-vctk"
@@ -191,21 +193,36 @@ class MainActivity : AppCompatActivity() {
// lexicon = "lexicon.txt"
// Example 4:
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
// modelDir = "vits-zh-hf-fanchen-C"
// modelName = "vits-zh-hf-fanchen-C.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-zh-hf-fanchen-C/dict"
// Example 5:
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
// modelDir = "vits-coqui-de-css10"
// modelName = "model.onnx"
// lang = "deu"
if (dataDir != null) {
val newDir = copyDataDir(modelDir)
val newDir = copyDataDir(modelDir!!)
modelDir = newDir + "/" + modelDir
dataDir = newDir + "/" + dataDir
assets = null
}
if (dictDir != null) {
val newDir = copyDataDir( modelDir!!)
modelDir = newDir + "/" + modelDir
dictDir = modelDir + "/" + "dict"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: "",
)!!

View File

@@ -8,6 +8,7 @@ data class OfflineTtsVitsModelConfig(
var lexicon: String = "",
var tokens: String,
var dataDir: String = "",
var dictDir: String = "",
var noiseScale: Float = 0.667f,
var noiseScaleW: Float = 0.8f,
var lengthScale: Float = 1.0f,
@@ -49,7 +50,7 @@ class OfflineTts(
init {
if (assetManager != null) {
ptr = new(assetManager, config)
ptr = newFromAsset(assetManager, config)
} else {
ptr = newFromFile(config)
}
@@ -87,7 +88,7 @@ class OfflineTts(
fun allocate(assetManager: AssetManager? = null) {
if (ptr == 0L) {
if (assetManager != null) {
ptr = new(assetManager, config)
ptr = newFromAsset(assetManager, config)
} else {
ptr = newFromFile(config)
}
@@ -105,7 +106,7 @@ class OfflineTts(
delete(ptr)
}
private external fun new(
private external fun newFromAsset(
assetManager: AssetManager,
config: OfflineTtsConfig,
): Long
@@ -152,6 +153,7 @@ fun getOfflineTtsConfig(
modelName: String,
lexicon: String,
dataDir: String,
dictDir: String,
ruleFsts: String,
ruleFars: String
): OfflineTtsConfig? {
@@ -161,7 +163,8 @@ fun getOfflineTtsConfig(
model = "$modelDir/$modelName",
lexicon = "$modelDir/$lexicon",
tokens = "$modelDir/tokens.txt",
dataDir = "$dataDir"
dataDir = dataDir,
dictDir = dictDir,
),
numThreads = 2,
debug = true,

View File

@@ -42,6 +42,7 @@ object TtsEngine {
private var ruleFars: String? = null
private var lexicon: String? = null
private var dataDir: String? = null
private var dictDir: String? = null
private var assets: AssetManager? = null
init {
@@ -54,6 +55,7 @@ object TtsEngine {
ruleFars = null
lexicon = null
dataDir = null
dictDir = null
lang = null
// Please enable one and only one of the examples below
@@ -83,6 +85,14 @@ object TtsEngine {
// lang = "zho"
// Example 4:
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
// modelDir = "vits-zh-hf-fanchen-C"
// modelName = "vits-zh-hf-fanchen-C.onnx"
// lexicon = "lexicon.txt"
// dictDir = "vits-zh-hf-fanchen-C/dict"
// lang = "zho"
// Example 5:
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
// This model does not need lexicon or dataDir
// modelDir = "vits-coqui-de-css10"
@@ -108,9 +118,18 @@ object TtsEngine {
assets = null
}
if (dictDir != null) {
val newDir = copyDataDir(context, modelDir!!)
modelDir = newDir + "/" + modelDir
dictDir = modelDir + "/" + "dict"
ruleFsts = "$modelDir/phone.fst,$modelDir/date.fst,$modelDir/number.fst"
assets = null
}
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
dictDir = dictDir ?: "",
ruleFsts = ruleFsts ?: "",
ruleFars = ruleFars ?: ""
)!!