Use piper-phonemize to convert text to token IDs (#453)

This commit is contained in:
Fangjun Kuang
2023-11-30 23:57:43 +08:00
committed by GitHub
parent db41778e99
commit 62dc3c3e46
55 changed files with 1048 additions and 192 deletions

View File

@@ -2,6 +2,8 @@
<manifest xmlns:android="http://schemas.android.com/apk/res/android"
xmlns:tools="http://schemas.android.com/tools">
<uses-permission android:name="android.permission.WRITE_INTERNAL_STORAGE" />
<application
android:allowBackup="true"
android:dataExtractionRules="@xml/data_extraction_rules"

View File

@@ -1,5 +1,6 @@
package com.k2fsa.sherpa.onnx
import android.content.res.AssetManager
import android.media.MediaPlayer
import android.net.Uri
import android.os.Bundle
@@ -9,6 +10,8 @@ import android.widget.EditText
import android.widget.Toast
import androidx.appcompat.app.AppCompatActivity
import java.io.File
import java.io.FileOutputStream
import java.io.IOException
const val TAG = "sherpa-onnx"
@@ -19,7 +22,6 @@ class MainActivity : AppCompatActivity() {
private lateinit var speed: EditText
private lateinit var generate: Button
private lateinit var play: Button
private var hasFile: Boolean = false
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
@@ -46,10 +48,10 @@ class MainActivity : AppCompatActivity() {
val sampleText = ""
text.setText(sampleText)
play.isEnabled = false;
play.isEnabled = false
}
fun onClickGenerate() {
private fun onClickGenerate() {
val sidInt = sid.text.toString().toIntOrNull()
if (sidInt == null || sidInt < 0) {
Toast.makeText(
@@ -77,7 +79,7 @@ class MainActivity : AppCompatActivity() {
return
}
play.isEnabled = false;
play.isEnabled = false
val audio = tts.generate(text = textStr, sid = sidInt, speed = speedFloat)
val filename = application.filesDir.absolutePath + "/generated.wav"
@@ -89,7 +91,7 @@ class MainActivity : AppCompatActivity() {
}
}
fun onClickPlay() {
private fun onClickPlay() {
val filename = application.filesDir.absolutePath + "/generated.wav"
val mediaPlayer = MediaPlayer.create(
applicationContext,
@@ -98,10 +100,13 @@ class MainActivity : AppCompatActivity() {
mediaPlayer.start()
}
fun initTts() {
var modelDir :String?
var modelName :String?
private fun initTts() {
var modelDir: String?
var modelName: String?
var ruleFsts: String?
var lexicon: String?
var dataDir: String?
var assets: AssetManager? = application.assets
// The purpose of such a design is to make the CI test easier
// Please see
@@ -109,21 +114,90 @@ class MainActivity : AppCompatActivity() {
modelDir = null
modelName = null
ruleFsts = null
lexicon = null
dataDir = null
// Example 1:
// modelDir = "vits-vctk"
// modelName = "vits-vctk.onnx"
// lexicon = "lexicon.txt"
// Example 2:
// modelDir = "vits-piper-en_US-lessac-medium"
// modelName = "en_US-lessac-medium.onnx"
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
// modelDir = "vits-piper-en_US-amy-low"
// modelName = "en_US-amy-low.onnx"
// dataDir = "vits-piper-en_US-amy-low/espeak-ng-data"
// Example 3:
// modelDir = "vits-zh-aishell3"
// modelName = "vits-aishell3.onnx"
// ruleFsts = "vits-zh-aishell3/rule.fst"
// lexcion = "lexicon.txt"
val config = getOfflineTtsConfig(modelDir = modelDir!!, modelName = modelName!!, ruleFsts = ruleFsts ?: "")!!
tts = OfflineTts(assetManager = application.assets, config = config)
if (dataDir != null) {
val newDir = copyDataDir(modelDir)
modelDir = newDir + "/" + modelDir
dataDir = newDir + "/" + dataDir
assets = null
}
val config = getOfflineTtsConfig(
modelDir = modelDir!!, modelName = modelName!!, lexicon = lexicon ?: "",
dataDir = dataDir ?: "",
ruleFsts = ruleFsts ?: ""
)!!
tts = OfflineTts(assetManager = assets, config = config)
}
private fun copyDataDir(dataDir: String): String {
println("data dir is $dataDir")
copyAssets(dataDir)
val newDataDir = application.getExternalFilesDir(null)!!.absolutePath
println("newDataDir: $newDataDir")
return newDataDir
}
private fun copyAssets(path: String) {
val assets: Array<String>?
try {
assets = application.assets.list(path)
if (assets!!.isEmpty()) {
copyFile(path)
} else {
val fullPath = "${application.getExternalFilesDir(null)}/$path"
val dir = File(fullPath)
dir.mkdirs()
for (asset in assets.iterator()) {
val p: String = if (path == "") "" else path + "/"
copyAssets(p + asset)
}
}
} catch (ex: IOException) {
Log.e(TAG, "Failed to copy $path. ${ex.toString()}")
}
}
private fun copyFile(filename: String) {
try {
val istream = application.assets.open(filename)
val newFilename = application.getExternalFilesDir(null).toString() + "/" + filename
val ostream = FileOutputStream(newFilename)
// Log.i(TAG, "Copying $filename to $newFilename")
val buffer = ByteArray(1024)
var read = 0
while (read != -1) {
ostream.write(buffer, 0, read)
read = istream.read(buffer)
}
istream.close()
ostream.flush()
ostream.close()
} catch (ex: Exception) {
Log.e(TAG, "Failed to copy $filename, ${ex.toString()}")
}
}
}

View File

@@ -5,8 +5,9 @@ import android.content.res.AssetManager
data class OfflineTtsVitsModelConfig(
var model: String,
var lexicon: String,
var lexicon: String = "",
var tokens: String,
var dataDir: String = "",
var noiseScale: Float = 0.667f,
var noiseScaleW: Float = 0.8f,
var lengthScale: Float = 1.0f,
@@ -22,6 +23,7 @@ data class OfflineTtsModelConfig(
data class OfflineTtsConfig(
var model: OfflineTtsModelConfig,
var ruleFsts: String = "",
var maxNumSentences: Int = 2,
)
class GeneratedAudio(
@@ -117,18 +119,25 @@ class OfflineTts(
// please refer to
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
// to download models
fun getOfflineTtsConfig(modelDir: String, modelName: String, ruleFsts: String): OfflineTtsConfig? {
fun getOfflineTtsConfig(
modelDir: String,
modelName: String,
lexicon: String,
dataDir: String,
ruleFsts: String
): OfflineTtsConfig? {
return OfflineTtsConfig(
model = OfflineTtsModelConfig(
vits = OfflineTtsVitsModelConfig(
model = "$modelDir/$modelName",
lexicon = "$modelDir/lexicon.txt",
tokens = "$modelDir/tokens.txt"
lexicon = "$modelDir/$lexicon",
tokens = "$modelDir/tokens.txt",
dataDir = "$dataDir"
),
numThreads = 2,
debug = true,
provider = "cpu",
),
ruleFsts=ruleFsts,
ruleFsts = ruleFsts,
)
}