// sherpa-onnx/csrc/offline-tts.h // // Copyright (c) 2023 Xiaomi Corporation #ifndef SHERPA_ONNX_CSRC_OFFLINE_TTS_H_ #define SHERPA_ONNX_CSRC_OFFLINE_TTS_H_ #include #include #include #include #include "sherpa-onnx/csrc/offline-tts-model-config.h" #include "sherpa-onnx/csrc/parse-options.h" namespace sherpa_onnx { struct OfflineTtsConfig { OfflineTtsModelConfig model; OfflineTtsConfig() = default; explicit OfflineTtsConfig(const OfflineTtsModelConfig &model) : model(model) {} void Register(ParseOptions *po); bool Validate() const; std::string ToString() const; }; struct GeneratedAudio { std::vector samples; int32_t sample_rate; }; class OfflineTtsImpl; class OfflineTts { public: ~OfflineTts(); explicit OfflineTts(const OfflineTtsConfig &config); // @param text A string containing words separated by spaces // @param sid Speaker ID. Used only for multi-speaker models, e.g., models // trained using the VCTK dataset. It is not used for // single-speaker models, e.g., models trained using the ljspeech // dataset. GeneratedAudio Generate(const std::string &text, int64_t sid = 0) const; private: std::unique_ptr impl_; }; } // namespace sherpa_onnx #endif // SHERPA_ONNX_CSRC_OFFLINE_TTS_H_