Use piper-phonemize to convert text to token IDs (#453)

This commit is contained in:
Fangjun Kuang
2023-11-30 23:57:43 +08:00
committed by GitHub
parent db41778e99
commit 62dc3c3e46
55 changed files with 1048 additions and 192 deletions

View File

@@ -65,7 +65,7 @@ struct ContentView: View {
self.filename = tempDirectoryURL.appendingPathComponent("test.wav")
}
let ret = audio.save(filename: filename.path)
let _ = audio.save(filename: filename.path)
self.audioPlayer = try! AVAudioPlayer(contentsOf: filename)
self.audioPlayer.play()

View File

@@ -7,6 +7,12 @@
import Foundation
// used to get the path to espeak-ng-data
func resourceURL(to path: String) -> String {
return URL(string: path, relativeTo: Bundle.main.resourceURL)!.path
}
func getResource(_ forResource: String, _ ofType: String) -> String {
let path = Bundle.main.path(forResource: forResource, ofType: ofType)
precondition(
@@ -59,8 +65,30 @@ func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
return SherpaOnnxOfflineTtsWrapper(config: &config)
}
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
func getTtsFor_en_US_amy_low() -> SherpaOnnxOfflineTtsWrapper {
// please see https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
// vits-vctk.onnx
let model = getResource("en_US-amy-low", "onnx")
// tokens.txt
let tokens = getResource("tokens", "txt")
// in this case, we don't need lexicon.txt
let dataDir = resourceURL(to: "espeak-ng-data")
let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: "", tokens: tokens, dataDir: dataDir)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
return SherpaOnnxOfflineTtsWrapper(config: &config)
}
func createOfflineTts() -> SherpaOnnxOfflineTtsWrapper {
return getTtsForVCTK()
return getTtsFor_en_US_amy_low()
// return getTtsForVCTK()
// return getTtsForAishell3()