Text-to-speech for iOS (#443)
This commit is contained in:
@@ -0,0 +1,11 @@
|
||||
{
|
||||
"colors" : [
|
||||
{
|
||||
"idiom" : "universal"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"images" : [
|
||||
{
|
||||
"idiom" : "universal",
|
||||
"platform" : "ios",
|
||||
"size" : "1024x1024"
|
||||
}
|
||||
],
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
||||
95
ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ContentView.swift
Normal file
95
ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ContentView.swift
Normal file
@@ -0,0 +1,95 @@
|
||||
//
|
||||
// ContentView.swift
|
||||
// SherpaOnnxTts
|
||||
//
|
||||
// Created by fangjun on 2023/11/23.
|
||||
//
|
||||
// Speech-to-text with Next-gen Kaldi on iOS without Internet connection
|
||||
|
||||
import SwiftUI
|
||||
import AVFoundation
|
||||
|
||||
struct ContentView: View {
|
||||
@State private var sid = "0"
|
||||
@State private var speed = 1.0
|
||||
@State private var text = ""
|
||||
@State private var showAlert = false
|
||||
@State var filename: URL = NSURL() as URL
|
||||
@State var audioPlayer: AVAudioPlayer!
|
||||
|
||||
private var tts = createOfflineTts()
|
||||
|
||||
var body: some View {
|
||||
|
||||
VStack(alignment: .leading) {
|
||||
HStack {
|
||||
Spacer()
|
||||
Text("Next-gen Kaldi: TTS").font(.title)
|
||||
Spacer()
|
||||
}
|
||||
HStack{
|
||||
Text("Speaker ID")
|
||||
TextField("Please input a speaker ID", text: $sid).textFieldStyle(.roundedBorder)
|
||||
.keyboardType(.numberPad)
|
||||
}
|
||||
HStack{
|
||||
Text("Speed \(String(format: "%.1f", speed))")
|
||||
.padding(.trailing)
|
||||
Slider(value: $speed, in: 0.5...2.0, step: 0.1) {
|
||||
Text("Speech speed")
|
||||
}
|
||||
}
|
||||
|
||||
Text("Please input your text below").padding([.trailing, .top, .bottom])
|
||||
|
||||
TextEditor(text: $text)
|
||||
.font(.body)
|
||||
.opacity(self.text.isEmpty ? 0.25 : 1)
|
||||
.disableAutocorrection(true)
|
||||
.border(Color.black)
|
||||
|
||||
Spacer()
|
||||
HStack {
|
||||
Spacer()
|
||||
Button(action: {
|
||||
let speakerId = Int(self.sid) ?? 0
|
||||
let t = self.text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
if t.isEmpty {
|
||||
self.showAlert = true
|
||||
return
|
||||
}
|
||||
|
||||
let audio = tts.generate(text: t, sid: speakerId, speed: Float(self.speed))
|
||||
if self.filename.absoluteString.isEmpty {
|
||||
let tempDirectoryURL = NSURL.fileURL(withPath: NSTemporaryDirectory(), isDirectory: true)
|
||||
self.filename = tempDirectoryURL.appendingPathComponent("test.wav")
|
||||
}
|
||||
|
||||
let ret = audio.save(filename: filename.path)
|
||||
|
||||
self.audioPlayer = try! AVAudioPlayer(contentsOf: filename)
|
||||
self.audioPlayer.play()
|
||||
}) {
|
||||
Text("Generate")
|
||||
}.alert(isPresented: $showAlert) {
|
||||
Alert(title: Text("Empty text"), message: Text("Please input your text before clicking the Generate button"))
|
||||
}
|
||||
Spacer()
|
||||
Button (action: {
|
||||
self.audioPlayer.play()
|
||||
}) {
|
||||
Text("Play")
|
||||
}.disabled(filename.absoluteString.isEmpty)
|
||||
Spacer()
|
||||
}
|
||||
Spacer()
|
||||
}
|
||||
.padding()
|
||||
}
|
||||
}
|
||||
|
||||
struct ContentView_Previews: PreviewProvider {
|
||||
static var previews: some View {
|
||||
ContentView()
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"info" : {
|
||||
"author" : "xcode",
|
||||
"version" : 1
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,17 @@
|
||||
//
|
||||
// SherpaOnnxTtsApp.swift
|
||||
// SherpaOnnxTts
|
||||
//
|
||||
// Created by fangjun on 2023/11/23.
|
||||
//
|
||||
|
||||
import SwiftUI
|
||||
|
||||
@main
|
||||
struct SherpaOnnxTtsApp: App {
|
||||
var body: some Scene {
|
||||
WindowGroup {
|
||||
ContentView()
|
||||
}
|
||||
}
|
||||
}
|
||||
68
ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift
Normal file
68
ios-swiftui/SherpaOnnxTts/SherpaOnnxTts/ViewModel.swift
Normal file
@@ -0,0 +1,68 @@
|
||||
//
|
||||
// ViewModel.swift
|
||||
// SherpaOnnxTts
|
||||
//
|
||||
// Created by fangjun on 2023/11/23.
|
||||
//
|
||||
|
||||
import Foundation
|
||||
|
||||
func getResource(_ forResource: String, _ ofType: String) -> String {
|
||||
let path = Bundle.main.path(forResource: forResource, ofType: ofType)
|
||||
precondition(
|
||||
path != nil,
|
||||
"\(forResource).\(ofType) does not exist!\n" + "Remember to change \n"
|
||||
+ " Build Phases -> Copy Bundle Resources\n" + "to add it!"
|
||||
)
|
||||
return path!
|
||||
}
|
||||
|
||||
/// Please refer to
|
||||
/// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/index.html
|
||||
/// to download pre-trained models
|
||||
|
||||
func getTtsForVCTK() -> SherpaOnnxOfflineTtsWrapper {
|
||||
// See the following link
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vctk-english-multi-speaker-109-speakers
|
||||
|
||||
// vits-vctk.onnx
|
||||
let model = getResource("vits-vctk", "onnx")
|
||||
|
||||
// lexicon.txt
|
||||
let lexicon = getResource("lexicon", "txt")
|
||||
|
||||
// tokens.txt
|
||||
let tokens = getResource("tokens", "txt")
|
||||
|
||||
let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
|
||||
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
|
||||
var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
|
||||
return SherpaOnnxOfflineTtsWrapper(config: &config)
|
||||
}
|
||||
|
||||
func getTtsForAishell3() -> SherpaOnnxOfflineTtsWrapper {
|
||||
// See the following link
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#vits-model-aishell3
|
||||
|
||||
// vits-vctk.onnx
|
||||
let model = getResource("vits-aishell3", "onnx")
|
||||
|
||||
// lexicon.txt
|
||||
let lexicon = getResource("lexicon", "txt")
|
||||
|
||||
// tokens.txt
|
||||
let tokens = getResource("tokens", "txt")
|
||||
|
||||
let vits = sherpaOnnxOfflineTtsVitsModelConfig(model: model, lexicon: lexicon, tokens: tokens)
|
||||
let modelConfig = sherpaOnnxOfflineTtsModelConfig(vits: vits)
|
||||
var config = sherpaOnnxOfflineTtsConfig(model: modelConfig)
|
||||
return SherpaOnnxOfflineTtsWrapper(config: &config)
|
||||
}
|
||||
|
||||
func createOfflineTts() -> SherpaOnnxOfflineTtsWrapper {
|
||||
return getTtsForVCTK()
|
||||
|
||||
// return getTtsForAishell3()
|
||||
|
||||
// please add more models on need by following the above two examples
|
||||
}
|
||||
Reference in New Issue
Block a user