Use piper-phonemize to convert text to token IDs (#453)

This commit is contained in:
Fangjun Kuang
2023-11-30 23:57:43 +08:00
committed by GitHub
parent db41778e99
commit 62dc3c3e46
55 changed files with 1048 additions and 192 deletions

View File

@@ -578,6 +578,7 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
model: String,
lexicon: String,
tokens: String,
dataDir: String = "",
noiseScale: Float = 0.667,
noiseScaleW: Float = 0.8,
lengthScale: Float = 1.0
@@ -586,6 +587,7 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
model: toCPointer(model),
lexicon: toCPointer(lexicon),
tokens: toCPointer(tokens),
data_dir: toCPointer(dataDir),
noise_scale: noiseScale,
noise_scale_w: noiseScaleW,
length_scale: lengthScale)
@@ -607,11 +609,13 @@ func sherpaOnnxOfflineTtsModelConfig(
func sherpaOnnxOfflineTtsConfig(
model: SherpaOnnxOfflineTtsModelConfig,
ruleFsts: String = ""
ruleFsts: String = "",
maxNumSenetences: Int = 2
) -> SherpaOnnxOfflineTtsConfig {
return SherpaOnnxOfflineTtsConfig(
model: model,
rule_fsts: toCPointer(ruleFsts)
rule_fsts: toCPointer(ruleFsts),
max_num_sentences: Int32(maxNumSenetences)
)
}

View File

@@ -7,17 +7,12 @@ if [ ! -d ../build-swift-macos ]; then
exit 1
fi
if [ ! -d ./vits-vctk ]; then
echo "Please download the pre-trained model for testing."
echo "You can refer to"
echo ""
echo "https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers"
echo ""
echo "for help"
if [ ! -d ./vits-piper-en_US-amy-low ]; then
echo "Download a pre-trained model for testing."
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-vctk.tar.bz2
tar xvf vits-vctk.tar.bz2
rm vits-vctk.tar.bz2
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits-piper-en_US-amy-low.tar.bz2
rm vits-piper-en_US-amy-low.tar.bz2
fi
if [ ! -e ./tts ]; then

View File

@@ -1,18 +1,20 @@
func run() {
let model = "./vits-vctk/vits-vctk.onnx"
let lexicon = "./vits-vctk/lexicon.txt"
let tokens = "./vits-vctk/tokens.txt"
let model = "./vits-piper-en_US-amy-low/en_US-amy-low.onnx"
let tokens = "./vits-piper-en_US-amy-low/tokens.txt"
let dataDir = "./vits-piper-en_US-amy-low/espeak-ng-data"
let vits = sherpaOnnxOfflineTtsVitsModelConfig(
model: model,
lexicon: lexicon,
tokens: tokens
lexicon: "",
tokens: tokens,
dataDir: dataDir
)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
var ttsConfig = sherpaOnnxOfflineTtsConfig(model: modelConfig)
let tts = SherpaOnnxOfflineTtsWrapper(config: &ttsConfig)
let text = "How are you doing? Fantastic!"
let text =
"“Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar.”"
let sid = 99
let speed: Float = 1.0