Support VITS models from icefall. (#625)

This commit is contained in:
Fangjun Kuang
2024-03-01 19:48:38 +08:00
committed by GitHub
parent 93836ff451
commit d56964371c
5 changed files with 13 additions and 4 deletions

View File

@@ -205,7 +205,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
if (meta_data.frontend == "characters") { if (meta_data.frontend == "characters") {
frontend_ = std::make_unique<OfflineTtsCharacterFrontend>( frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
mgr, config_.model.vits.tokens, meta_data); mgr, config_.model.vits.tokens, meta_data);
} else if ((meta_data.is_piper || meta_data.is_coqui) && } else if ((meta_data.is_piper || meta_data.is_coqui ||
meta_data.is_icefall) &&
!config_.model.vits.data_dir.empty()) { !config_.model.vits.data_dir.empty()) {
frontend_ = std::make_unique<PiperPhonemizeLexicon>( frontend_ = std::make_unique<PiperPhonemizeLexicon>(
mgr, config_.model.vits.tokens, config_.model.vits.data_dir, mgr, config_.model.vits.tokens, config_.model.vits.data_dir,
@@ -231,7 +232,8 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
if (meta_data.frontend == "characters") { if (meta_data.frontend == "characters") {
frontend_ = std::make_unique<OfflineTtsCharacterFrontend>( frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
config_.model.vits.tokens, meta_data); config_.model.vits.tokens, meta_data);
} else if ((meta_data.is_piper || meta_data.is_coqui) && } else if ((meta_data.is_piper || meta_data.is_coqui ||
meta_data.is_icefall) &&
!config_.model.vits.data_dir.empty()) { !config_.model.vits.data_dir.empty()) {
frontend_ = std::make_unique<PiperPhonemizeLexicon>( frontend_ = std::make_unique<PiperPhonemizeLexicon>(
config_.model.vits.tokens, config_.model.vits.data_dir, config_.model.vits.tokens, config_.model.vits.data_dir,

View File

@@ -20,6 +20,7 @@ struct OfflineTtsVitsModelMetaData {
bool is_piper = false; bool is_piper = false;
bool is_coqui = false; bool is_coqui = false;
bool is_icefall = false;
// the following options are for models from coqui-ai/TTS // the following options are for models from coqui-ai/TTS
int32_t blank_id = 0; int32_t blank_id = 0;

View File

@@ -110,6 +110,10 @@ class OfflineTtsVitsModel::Impl {
if (comment.find("coqui") != std::string::npos) { if (comment.find("coqui") != std::string::npos) {
meta_data_.is_coqui = true; meta_data_.is_coqui = true;
} }
if (comment.find("icefall") != std::string::npos) {
meta_data_.is_icefall = true;
}
} }
Ort::Value RunVitsPiperOrCoqui(Ort::Value x, int64_t sid, float speed) { Ort::Value RunVitsPiperOrCoqui(Ort::Value x, int64_t sid, float speed) {

View File

@@ -236,7 +236,7 @@ std::vector<std::vector<int64_t>> PiperPhonemizeLexicon::ConvertTextToTokenIds(
std::vector<int64_t> phoneme_ids; std::vector<int64_t> phoneme_ids;
if (meta_data_.is_piper) { if (meta_data_.is_piper || meta_data_.is_icefall) {
for (const auto &p : phonemes) { for (const auto &p : phonemes) {
phoneme_ids = PiperPhonemesToIds(token2id_, p); phoneme_ids = PiperPhonemesToIds(token2id_, p);
ans.push_back(std::move(phoneme_ids)); ans.push_back(std::move(phoneme_ids));

View File

@@ -105,11 +105,13 @@ static Ort::SessionOptions GetSessionOptionsImpl(int32_t num_threads,
} else { } else {
SHERPA_ONNX_LOGE("Use nnapi"); SHERPA_ONNX_LOGE("Use nnapi");
} }
#else #elif defined(__ANDROID_API__)
SHERPA_ONNX_LOGE( SHERPA_ONNX_LOGE(
"Android NNAPI requires API level >= 27. Current API level %d " "Android NNAPI requires API level >= 27. Current API level %d "
"Fallback to cpu!", "Fallback to cpu!",
(int32_t)__ANDROID_API__); (int32_t)__ANDROID_API__);
#else
SHERPA_ONNX_LOGE("NNAPI is for Android only. Fallback to cpu");
#endif #endif
break; break;
} }