Add MFC TTS example on Windows (#378)
This commit is contained in:
@@ -568,8 +568,9 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
|
||||
void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts) { delete tts; }
|
||||
|
||||
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid) {
|
||||
sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid);
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
|
||||
float speed) {
|
||||
sherpa_onnx::GeneratedAudio audio = tts->impl->Generate(text, sid, speed);
|
||||
|
||||
if (audio.samples.empty()) {
|
||||
return nullptr;
|
||||
|
||||
@@ -639,7 +639,8 @@ SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTts(SherpaOnnxOfflineTts *tts);
|
||||
// The user has to use DestroyOfflineTtsGeneratedAudio() to free the returned
|
||||
// pointer to avoid memory leak.
|
||||
SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerate(
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid);
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid,
|
||||
float speed);
|
||||
|
||||
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineTtsGeneratedAudio(
|
||||
const SherpaOnnxGeneratedAudio *p);
|
||||
|
||||
@@ -18,8 +18,8 @@ class OfflineTtsImpl {
|
||||
|
||||
static std::unique_ptr<OfflineTtsImpl> Create(const OfflineTtsConfig &config);
|
||||
|
||||
virtual GeneratedAudio Generate(const std::string &text,
|
||||
int64_t sid = 0) const = 0;
|
||||
virtual GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
|
||||
float speed = 1.0) const = 0;
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
@@ -24,8 +24,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
model_->Punctuations(), model_->Language(),
|
||||
config.model.debug) {}
|
||||
|
||||
GeneratedAudio Generate(const std::string &text,
|
||||
int64_t sid = 0) const override {
|
||||
GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
|
||||
float speed = 1.0) const override {
|
||||
int32_t num_speakers = model_->NumSpeakers();
|
||||
if (num_speakers == 0 && sid != 0) {
|
||||
SHERPA_ONNX_LOGE(
|
||||
@@ -66,7 +66,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
Ort::Value x_tensor = Ort::Value::CreateTensor(
|
||||
memory_info, x.data(), x.size(), x_shape.data(), x_shape.size());
|
||||
|
||||
Ort::Value audio = model_->Run(std::move(x_tensor), sid);
|
||||
Ort::Value audio = model_->Run(std::move(x_tensor), sid, speed);
|
||||
|
||||
std::vector<int64_t> audio_shape =
|
||||
audio.GetTensorTypeAndShapeInfo().GetShape();
|
||||
|
||||
@@ -17,7 +17,7 @@ void OfflineTtsVitsModelConfig::Register(ParseOptions *po) {
|
||||
po->Register("vits-noise-scale-w", &noise_scale_w,
|
||||
"noise_scale_w for VITS models");
|
||||
po->Register("vits-length-scale", &length_scale,
|
||||
"length_scale for VITS models");
|
||||
"Speech speed. Larger->Slower; Smaller->faster.");
|
||||
}
|
||||
|
||||
bool OfflineTtsVitsModelConfig::Validate() const {
|
||||
|
||||
@@ -26,7 +26,7 @@ class OfflineTtsVitsModel::Impl {
|
||||
Init(buf.data(), buf.size());
|
||||
}
|
||||
|
||||
Ort::Value Run(Ort::Value x, int64_t sid) {
|
||||
Ort::Value Run(Ort::Value x, int64_t sid, float speed) {
|
||||
auto memory_info =
|
||||
Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);
|
||||
|
||||
@@ -48,6 +48,10 @@ class OfflineTtsVitsModel::Impl {
|
||||
float length_scale = config_.vits.length_scale;
|
||||
float noise_scale_w = config_.vits.noise_scale_w;
|
||||
|
||||
if (speed != 1 && speed > 0) {
|
||||
length_scale = 1. / speed;
|
||||
}
|
||||
|
||||
Ort::Value noise_scale_tensor =
|
||||
Ort::Value::CreateTensor(memory_info, &noise_scale, 1, &scale_shape, 1);
|
||||
|
||||
@@ -139,8 +143,9 @@ OfflineTtsVitsModel::OfflineTtsVitsModel(const OfflineTtsModelConfig &config)
|
||||
|
||||
OfflineTtsVitsModel::~OfflineTtsVitsModel() = default;
|
||||
|
||||
Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, int64_t sid /*=0*/) {
|
||||
return impl_->Run(std::move(x), sid);
|
||||
Ort::Value OfflineTtsVitsModel::Run(Ort::Value x, int64_t sid /*=0*/,
|
||||
float speed /*= 1.0*/) {
|
||||
return impl_->Run(std::move(x), sid, speed);
|
||||
}
|
||||
|
||||
int32_t OfflineTtsVitsModel::SampleRate() const { return impl_->SampleRate(); }
|
||||
|
||||
@@ -29,7 +29,7 @@ class OfflineTtsVitsModel {
|
||||
* @return Return a float32 tensor containing audio samples. You can flatten
|
||||
* it to a 1-D tensor.
|
||||
*/
|
||||
Ort::Value Run(Ort::Value x, int64_t sid = 0);
|
||||
Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0);
|
||||
|
||||
// Sample rate of the generated audio
|
||||
int32_t SampleRate() const;
|
||||
|
||||
@@ -28,9 +28,9 @@ OfflineTts::OfflineTts(const OfflineTtsConfig &config)
|
||||
|
||||
OfflineTts::~OfflineTts() = default;
|
||||
|
||||
GeneratedAudio OfflineTts::Generate(const std::string &text,
|
||||
int64_t sid /*=0*/) const {
|
||||
return impl_->Generate(text, sid);
|
||||
GeneratedAudio OfflineTts::Generate(const std::string &text, int64_t sid /*=0*/,
|
||||
float speed /*= 1.0*/) const {
|
||||
return impl_->Generate(text, sid, speed);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
@@ -43,7 +43,8 @@ class OfflineTts {
|
||||
// trained using the VCTK dataset. It is not used for
|
||||
// single-speaker models, e.g., models trained using the ljspeech
|
||||
// dataset.
|
||||
GeneratedAudio Generate(const std::string &text, int64_t sid = 0) const;
|
||||
GeneratedAudio Generate(const std::string &text, int64_t sid = 0,
|
||||
float speed = 1.0) const;
|
||||
|
||||
private:
|
||||
std::unique_ptr<OfflineTtsImpl> impl_;
|
||||
|
||||
@@ -40,7 +40,8 @@ void PybindOfflineTts(py::module *m) {
|
||||
using PyClass = OfflineTts;
|
||||
py::class_<PyClass>(*m, "OfflineTts")
|
||||
.def(py::init<const OfflineTtsConfig &>(), py::arg("config"))
|
||||
.def("generate", &PyClass::Generate, py::arg("text"), py::arg("sid") = 0);
|
||||
.def("generate", &PyClass::Generate, py::arg("text"), py::arg("sid") = 0,
|
||||
py::arg("speed") = 1.0);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
Reference in New Issue
Block a user