Use espeak-ng for coqui-ai/TTS VITS English models. (#466)

2023-12-06 11:00:38 +08:00
parent 3b90e85ef2
commit 23cf92daf7
10 changed files with 230 additions and 93 deletions
--- a/sherpa-onnx/csrc/offline-tts-vits-model.h
+++ b/sherpa-onnx/csrc/offline-tts-vits-model.h
@@ -15,6 +15,7 @@

 #include "onnxruntime_cxx_api.h"  // NOLINT
 #include "sherpa-onnx/csrc/offline-tts-model-config.h"
+#include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"

 namespace sherpa_onnx {

@@ -39,17 +40,7 @@ class OfflineTtsVitsModel {
   */
  Ort::Value Run(Ort::Value x, int64_t sid = 0, float speed = 1.0);

-  // Sample rate of the generated audio
-  int32_t SampleRate() const;
-
-  // true to insert a blank between each token
-  bool AddBlank() const;
-
-  std::string Punctuations() const;
-  std::string Language() const;  // e.g., Chinese, English, German, etc.
-  std::string Voice() const;     // e.g., en-us, for espeak-ng
-  bool IsPiper() const;
-  int32_t NumSpeakers() const;
+  const OfflineTtsVitsModelMetaData &GetMetaData() const;

 private:
  class Impl;