Text-to-speech for iOS (#443)

This commit is contained in:
Fangjun Kuang
2023-11-23 21:38:32 +08:00
committed by GitHub
parent 2f22e6ed63
commit 94ef6929bb
12 changed files with 614 additions and 2 deletions

View File

@@ -0,0 +1,11 @@
{
"colors" : [
{
"idiom" : "universal"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,13 @@
{
"images" : [
{
"idiom" : "universal",
"platform" : "ios",
"size" : "1024x1024"
}
],
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,95 @@
//
// ContentView.swift
// SherpaOnnxTts
//
// Created by fangjun on 2023/11/23.
//
// Speech-to-text with Next-gen Kaldi on iOS without Internet connection
import SwiftUI
import AVFoundation
struct ContentView: View {
@State private var sid = "0"
@State private var speed = 1.0
@State private var text = ""
@State private var showAlert = false
@State var filename: URL = NSURL() as URL
@State var audioPlayer: AVAudioPlayer!
private var tts = createOfflineTts()
var body: some View {
VStack(alignment: .leading) {
HStack {
Spacer()
Text("Next-gen Kaldi: TTS").font(.title)
Spacer()
}
HStack{
Text("Speaker ID")
TextField("Please input a speaker ID", text: $sid).textFieldStyle(.roundedBorder)
.keyboardType(.numberPad)
}
HStack{
Text("Speed \(String(format: "%.1f", speed))")
.padding(.trailing)
Slider(value: $speed, in: 0.5...2.0, step: 0.1) {
Text("Speech speed")
}
}
Text("Please input your text below").padding([.trailing, .top, .bottom])
TextEditor(text: $text)
.font(.body)
.opacity(self.text.isEmpty ? 0.25 : 1)
.disableAutocorrection(true)
.border(Color.black)
Spacer()
HStack {
Spacer()
Button(action: {
let speakerId = Int(self.sid) ?? 0
let t = self.text.trimmingCharacters(in: .whitespacesAndNewlines)
if t.isEmpty {
self.showAlert = true
return
}
let audio = tts.generate(text: t, sid: speakerId, speed: Float(self.speed))
if self.filename.absoluteString.isEmpty {
let tempDirectoryURL = NSURL.fileURL(withPath: NSTemporaryDirectory(), isDirectory: true)
self.filename = tempDirectoryURL.appendingPathComponent("test.wav")
}
let ret = audio.save(filename: filename.path)
self.audioPlayer = try! AVAudioPlayer(contentsOf: filename)
self.audioPlayer.play()
}) {
Text("Generate")
}.alert(isPresented: $showAlert) {
Alert(title: Text("Empty text"), message: Text("Please input your text before clicking the Generate button"))
}
Spacer()
Button (action: {
self.audioPlayer.play()
}) {
Text("Play")
}.disabled(filename.absoluteString.isEmpty)
Spacer()
}
Spacer()
}
.padding()
}
}
struct ContentView_Previews: PreviewProvider {
static var previews: some View {
ContentView()
}
}

View File

@@ -0,0 +1,6 @@
{
"info" : {
"author" : "xcode",
"version" : 1
}
}

View File

@@ -0,0 +1,17 @@
//
// SherpaOnnxTtsApp.swift
// SherpaOnnxTts
//
// Created by fangjun on 2023/11/23.
//
import SwiftUI
@main
struct SherpaOnnxTtsApp: App {
var body: some Scene {
WindowGroup {
ContentView()
}
}
}

View File

@@ -0,0 +1,68 @@
//
// ViewModel.swift
// SherpaOnnxTts
//
// Created by fangjun on 2023/11/23.
//
import Foundation
func getResource(_ forResource: String, _ ofType: String) -> String {
let path = Bundle.main.path(forResource: forResource, ofType: ofType)
precondition(
path != nil,
"\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
+ " Build Phases -> Copy Bundle Resources\n" + "to add it!"
)
return path!
}
/// Please refer to
/// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
/// to download pre-trained models
func getTtsForVCTK() -> SherpaOnnxOfflineTtsWrapper {
// See the following link
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers
// vits-vctk.onnx
let model = getResource("vits-vctk", "onnx")
// lexicon.txt
let lexicon = getResource("lexicon", "txt")
// tokens.txt
let tokens = getResource("tokens", "txt")
let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
return SherpaOnnxOfflineTtsWrapper(config: &config)
}
func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
// See the following link
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3
// vits-vctk.onnx
let model = getResource("vits-aishell3", "onnx")
// lexicon.txt
let lexicon = getResource("lexicon", "txt")
// tokens.txt
let tokens = getResource("tokens", "txt")
let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
return SherpaOnnxOfflineTtsWrapper(config: &config)
}
func createOfflineTts() -> SherpaOnnxOfflineTtsWrapper {
return getTtsForVCTK()
// return getTtsForAishell3()
// please add more models on need by following the above two examples
}