Use piper-phonemize to convert text to token IDs (#453)
This commit is contained in:
@@ -2,6 +2,8 @@
|
||||
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
|
||||
xmlns:tools="http://schemas.android.com/tools">
|
||||
|
||||
<uses-permission android:name="android.permission.WRITE_INTERNAL_STORAGE" />
|
||||
|
||||
<application
|
||||
android:allowBackup="true"
|
||||
android:dataExtractionRules="@xml/data_extraction_rules"
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package com.k2fsa.sherpa.onnx
|
||||
|
||||
import android.content.res.AssetManager
|
||||
import android.media.MediaPlayer
|
||||
import android.net.Uri
|
||||
import android.os.Bundle
|
||||
@@ -9,6 +10,8 @@ import android.widget.EditText
|
||||
import android.widget.Toast
|
||||
import androidx.appcompat.app.AppCompatActivity
|
||||
import java.io.File
|
||||
import java.io.FileOutputStream
|
||||
import java.io.IOException
|
||||
|
||||
const val TAG = "sherpa-onnx"
|
||||
|
||||
@@ -19,7 +22,6 @@ class MainActivity : AppCompatActivity() {
|
||||
private lateinit var speed: EditText
|
||||
private lateinit var generate: Button
|
||||
private lateinit var play: Button
|
||||
private var hasFile: Boolean = false
|
||||
|
||||
override fun onCreate(savedInstanceState: Bundle?) {
|
||||
super.onCreate(savedInstanceState)
|
||||
@@ -46,10 +48,10 @@ class MainActivity : AppCompatActivity() {
|
||||
val sampleText = ""
|
||||
text.setText(sampleText)
|
||||
|
||||
play.isEnabled = false;
|
||||
play.isEnabled = false
|
||||
}
|
||||
|
||||
fun onClickGenerate() {
|
||||
private fun onClickGenerate() {
|
||||
val sidInt = sid.text.toString().toIntOrNull()
|
||||
if (sidInt == null || sidInt < 0) {
|
||||
Toast.makeText(
|
||||
@@ -77,7 +79,7 @@ class MainActivity : AppCompatActivity() {
|
||||
return
|
||||
}
|
||||
|
||||
play.isEnabled = false;
|
||||
play.isEnabled = false
|
||||
val audio = tts.generate(text = textStr, sid = sidInt, speed = speedFloat)
|
||||
|
||||
val filename = application.filesDir.absolutePath + "/generated.wav"
|
||||
@@ -89,7 +91,7 @@ class MainActivity : AppCompatActivity() {
|
||||
}
|
||||
}
|
||||
|
||||
fun onClickPlay() {
|
||||
private fun onClickPlay() {
|
||||
val filename = application.filesDir.absolutePath + "/generated.wav"
|
||||
val mediaPlayer = MediaPlayer.create(
|
||||
applicationContext,
|
||||
@@ -98,10 +100,13 @@ class MainActivity : AppCompatActivity() {
|
||||
mediaPlayer.start()
|
||||
}
|
||||
|
||||
fun initTts() {
|
||||
var modelDir :String?
|
||||
var modelName :String?
|
||||
private fun initTts() {
|
||||
var modelDir: String?
|
||||
var modelName: String?
|
||||
var ruleFsts: String?
|
||||
var lexicon: String?
|
||||
var dataDir: String?
|
||||
var assets: AssetManager? = application.assets
|
||||
|
||||
// The purpose of such a design is to make the CI test easier
|
||||
// Please see
|
||||
@@ -109,21 +114,90 @@ class MainActivity : AppCompatActivity() {
|
||||
modelDir = null
|
||||
modelName = null
|
||||
ruleFsts = null
|
||||
lexicon = null
|
||||
dataDir = null
|
||||
|
||||
// Example 1:
|
||||
// modelDir = "vits-vctk"
|
||||
// modelName = "vits-vctk.onnx"
|
||||
// lexicon = "lexicon.txt"
|
||||
|
||||
// Example 2:
|
||||
// modelDir = "vits-piper-en_US-lessac-medium"
|
||||
// modelName = "en_US-lessac-medium.onnx"
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
|
||||
// modelDir = "vits-piper-en_US-amy-low"
|
||||
// modelName = "en_US-amy-low.onnx"
|
||||
// dataDir = "vits-piper-en_US-amy-low/espeak-ng-data"
|
||||
|
||||
// Example 3:
|
||||
// modelDir = "vits-zh-aishell3"
|
||||
// modelName = "vits-aishell3.onnx"
|
||||
// ruleFsts = "vits-zh-aishell3/rule.fst"
|
||||
// lexcion = "lexicon.txt"
|
||||
|
||||
val config = getOfflineTtsConfig(modelDir = modelDir!!, modelName = modelName!!, ruleFsts = ruleFsts ?: "")!!
|
||||
tts = OfflineTts(assetManager = application.assets, config = config)
|
||||
if (dataDir != null) {
|
||||
val newDir = copyDataDir(modelDir)
|
||||
modelDir = newDir + "/" + modelDir
|
||||
dataDir = newDir + "/" + dataDir
|
||||
assets = null
|
||||
}
|
||||
|
||||
val config = getOfflineTtsConfig(
|
||||
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
|
||||
dataDir = dataDir ?: "",
|
||||
ruleFsts = ruleFsts ?: ""
|
||||
)!!
|
||||
|
||||
tts = OfflineTts(assetManager = assets, config = config)
|
||||
}
|
||||
|
||||
|
||||
private fun copyDataDir(dataDir: String): String {
|
||||
println("data dir is $dataDir")
|
||||
copyAssets(dataDir)
|
||||
|
||||
val newDataDir = application.getExternalFilesDir(null)!!.absolutePath
|
||||
println("newDataDir: $newDataDir")
|
||||
return newDataDir
|
||||
}
|
||||
|
||||
private fun copyAssets(path: String) {
|
||||
val assets: Array<String>?
|
||||
try {
|
||||
assets = application.assets.list(path)
|
||||
if (assets!!.isEmpty()) {
|
||||
copyFile(path)
|
||||
} else {
|
||||
val fullPath = "${application.getExternalFilesDir(null)}/$path"
|
||||
val dir = File(fullPath)
|
||||
dir.mkdirs()
|
||||
for (asset in assets.iterator()) {
|
||||
val p: String = if (path == "") "" else path + "/"
|
||||
copyAssets(p + asset)
|
||||
}
|
||||
}
|
||||
} catch (ex: IOException) {
|
||||
Log.e(TAG, "Failed to copy $path. ${ex.toString()}")
|
||||
}
|
||||
}
|
||||
|
||||
private fun copyFile(filename: String) {
|
||||
try {
|
||||
val istream = application.assets.open(filename)
|
||||
val newFilename = application.getExternalFilesDir(null).toString() + "/" + filename
|
||||
val ostream = FileOutputStream(newFilename)
|
||||
// Log.i(TAG, "Copying $filename to $newFilename")
|
||||
val buffer = ByteArray(1024)
|
||||
var read = 0
|
||||
while (read != -1) {
|
||||
ostream.write(buffer, 0, read)
|
||||
read = istream.read(buffer)
|
||||
}
|
||||
istream.close()
|
||||
ostream.flush()
|
||||
ostream.close()
|
||||
} catch (ex: Exception) {
|
||||
Log.e(TAG, "Failed to copy $filename, ${ex.toString()}")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,8 +5,9 @@ import android.content.res.AssetManager
|
||||
|
||||
data class OfflineTtsVitsModelConfig(
|
||||
var model: String,
|
||||
var lexicon: String,
|
||||
var lexicon: String = "",
|
||||
var tokens: String,
|
||||
var dataDir: String = "",
|
||||
var noiseScale: Float = 0.667f,
|
||||
var noiseScaleW: Float = 0.8f,
|
||||
var lengthScale: Float = 1.0f,
|
||||
@@ -22,6 +23,7 @@ data class OfflineTtsModelConfig(
|
||||
data class OfflineTtsConfig(
|
||||
var model: OfflineTtsModelConfig,
|
||||
var ruleFsts: String = "",
|
||||
var maxNumSentences: Int = 2,
|
||||
)
|
||||
|
||||
class GeneratedAudio(
|
||||
@@ -117,18 +119,25 @@ class OfflineTts(
|
||||
// please refer to
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
|
||||
// to download models
|
||||
fun getOfflineTtsConfig(modelDir: String, modelName: String, ruleFsts: String): OfflineTtsConfig? {
|
||||
fun getOfflineTtsConfig(
|
||||
modelDir: String,
|
||||
modelName: String,
|
||||
lexicon: String,
|
||||
dataDir: String,
|
||||
ruleFsts: String
|
||||
): OfflineTtsConfig? {
|
||||
return OfflineTtsConfig(
|
||||
model = OfflineTtsModelConfig(
|
||||
vits = OfflineTtsVitsModelConfig(
|
||||
model = "$modelDir/$modelName",
|
||||
lexicon = "$modelDir/lexicon.txt",
|
||||
tokens = "$modelDir/tokens.txt"
|
||||
lexicon = "$modelDir/$lexicon",
|
||||
tokens = "$modelDir/tokens.txt",
|
||||
dataDir = "$dataDir"
|
||||
),
|
||||
numThreads = 2,
|
||||
debug = true,
|
||||
provider = "cpu",
|
||||
),
|
||||
ruleFsts=ruleFsts,
|
||||
ruleFsts = ruleFsts,
|
||||
)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user