2023-10-13 19:30:38 +08:00
// sherpa-onnx/csrc/sherpa-onnx-offline-tts.cc
//
// Copyright (c) 2023 Xiaomi Corporation
2023-12-02 15:35:11 +08:00
# include <chrono> // NOLINT
2023-10-13 19:30:38 +08:00
# include <fstream>
# include "sherpa-onnx/csrc/offline-tts.h"
# include "sherpa-onnx/csrc/parse-options.h"
2023-10-13 23:36:03 +08:00
# include "sherpa-onnx/csrc/wave-writer.h"
2023-10-13 19:30:38 +08:00
int main ( int32_t argc , char * argv [ ] ) {
const char * kUsageMessage = R " usage(
Offline text - to - speech with sherpa - onnx
2023-12-02 15:35:11 +08:00
Usage example :
wget https : //github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
tar xf vits - piper - en_US - amy - low . tar . bz2
2023-10-13 19:30:38 +08:00
. / bin / sherpa - onnx - offline - tts \
2023-12-02 15:35:11 +08:00
- - vits - model = . / vits - piper - en_US - amy - low / en_US - amy - low . onnx \
- - vits - tokens = . / vits - piper - en_US - amy - low / tokens . txt \
- - vits - data - dir = . / vits - piper - en_US - amy - low / espeak - ng - data \
2023-10-16 17:22:30 +08:00
- - output - filename = . / generated . wav \
2023-12-02 15:35:11 +08:00
" Today as always, men fall into two groups: slaves and free men. Whoever does not have two-thirds of his day for himself, is a slave, whatever he may be: a statesman, a businessman, an official, or a scholar. "
2023-10-13 19:30:38 +08:00
2023-10-13 23:36:03 +08:00
It will generate a file . / generated . wav as specified by - - output - filename .
2023-12-02 15:35:11 +08:00
You can find more models at
https : //github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
2023-10-16 17:22:30 +08:00
Please see
https : //k2-fsa.github.io/sherpa/onnx/tts/index.html
2023-10-19 17:38:23 +08:00
or details .
2023-10-13 19:30:38 +08:00
) usage " ;
sherpa_onnx : : ParseOptions po ( kUsageMessage ) ;
2023-10-13 23:36:03 +08:00
std : : string output_filename = " ./generated.wav " ;
2023-10-16 17:22:30 +08:00
int32_t sid = 0 ;
2023-10-13 23:36:03 +08:00
po . Register ( " output-filename " , & output_filename ,
" Path to save the generated audio " ) ;
2023-10-16 17:22:30 +08:00
po . Register ( " sid " , & sid ,
" Speaker ID. Used only for multi-speaker models, e.g., models "
" trained using the VCTK dataset. Not used for single-speaker "
" models, e.g., models trained using the LJSpeech dataset " ) ;
2023-10-13 19:30:38 +08:00
sherpa_onnx : : OfflineTtsConfig config ;
2023-10-13 23:36:03 +08:00
2023-10-13 19:30:38 +08:00
config . Register ( & po ) ;
po . Read ( argc , argv ) ;
if ( po . NumArgs ( ) = = 0 ) {
fprintf ( stderr , " Error: Please provide the text to generate audio. \n \n " ) ;
po . PrintUsage ( ) ;
exit ( EXIT_FAILURE ) ;
}
if ( po . NumArgs ( ) > 1 ) {
fprintf ( stderr ,
" Error: Accept only one positional argument. Please use single "
" quotes to wrap your text \n " ) ;
po . PrintUsage ( ) ;
exit ( EXIT_FAILURE ) ;
}
if ( ! config . Validate ( ) ) {
fprintf ( stderr , " Errors in config! \n " ) ;
exit ( EXIT_FAILURE ) ;
}
sherpa_onnx : : OfflineTts tts ( config ) ;
2023-12-02 15:35:11 +08:00
const auto begin = std : : chrono : : steady_clock : : now ( ) ;
2023-10-16 17:22:30 +08:00
auto audio = tts . Generate ( po . GetArg ( 1 ) , sid ) ;
2023-12-02 15:35:11 +08:00
const auto end = std : : chrono : : steady_clock : : now ( ) ;
2023-10-18 14:02:01 +08:00
if ( audio . samples . empty ( ) ) {
fprintf (
stderr ,
2023-12-02 15:35:11 +08:00
" Error in generating audio. Please read previous error messages. \n " ) ;
2023-10-18 14:02:01 +08:00
exit ( EXIT_FAILURE ) ;
}
2023-10-13 19:30:38 +08:00
2023-12-02 15:35:11 +08:00
float elapsed_seconds =
std : : chrono : : duration_cast < std : : chrono : : milliseconds > ( end - begin )
. count ( ) /
1000. ;
float duration = audio . samples . size ( ) / static_cast < float > ( audio . sample_rate ) ;
float rtf = elapsed_seconds / duration ;
fprintf ( stderr , " Elapsed seconds: %.3f s \n " , elapsed_seconds ) ;
fprintf ( stderr , " Audio duration: %.3f s \n " , duration ) ;
fprintf ( stderr , " Real-time factor (RTF): %.3f/%.3f = %.3f \n " , elapsed_seconds ,
duration , rtf ) ;
2023-10-13 23:36:03 +08:00
bool ok = sherpa_onnx : : WriteWave ( output_filename , audio . sample_rate ,
audio . samples . data ( ) , audio . samples . size ( ) ) ;
if ( ! ok ) {
fprintf ( stderr , " Failed to write wave to %s \n " , output_filename . c_str ( ) ) ;
exit ( EXIT_FAILURE ) ;
}
2023-10-13 19:30:38 +08:00
2023-10-16 17:22:30 +08:00
fprintf ( stderr , " The text is: %s. Speaker ID: %d \n " , po . GetArg ( 1 ) . c_str ( ) ,
sid ) ;
2023-10-13 23:36:03 +08:00
fprintf ( stderr , " Saved to %s successfully! \n " , output_filename . c_str ( ) ) ;
2023-10-13 19:30:38 +08:00
return 0 ;
}