Support VITS TTS models from coqui-ai/TTS (#416)

* Support VITS TTS models from coqui-ai/TTS

* release v1.8.9
This commit is contained in:
Fangjun Kuang
2023-11-10 16:24:11 +08:00
committed by GitHub
parent ab0e830bee
commit 61341b7187
3 changed files with 25 additions and 9 deletions

View File

@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.13 FATAL_ERROR) cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
project(sherpa-onnx) project(sherpa-onnx)
set(SHERPA_ONNX_VERSION "1.8.8") set(SHERPA_ONNX_VERSION "1.8.9")
# Disable warning about # Disable warning about
# #

View File

@@ -196,20 +196,27 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsChinese(
std::vector<int64_t> ans; std::vector<int64_t> ans;
int32_t blank = -1;
if (token2id_.count(" ")) {
blank = token2id_.at(" ");
}
int32_t sil = -1; int32_t sil = -1;
int32_t eos = -1; int32_t eos = -1;
if (token2id_.count("sil")) { if (token2id_.count("sil")) {
sil = token2id_.at("sil"); sil = token2id_.at("sil");
eos = token2id_.at("eos"); eos = token2id_.at("eos");
} else {
sil = 0;
} }
if (sil != -1) {
ans.push_back(sil); ans.push_back(sil);
}
for (const auto &w : words) { for (const auto &w : words) {
if (punctuations_.count(w)) { if (punctuations_.count(w)) {
if (sil != -1) {
ans.push_back(sil); ans.push_back(sil);
}
continue; continue;
} }
@@ -220,11 +227,19 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsChinese(
const auto &token_ids = word2ids_.at(w); const auto &token_ids = word2ids_.at(w);
ans.insert(ans.end(), token_ids.begin(), token_ids.end()); ans.insert(ans.end(), token_ids.begin(), token_ids.end());
if (blank != -1) {
ans.push_back(blank);
} }
}
if (sil != -1) {
ans.push_back(sil); ans.push_back(sil);
}
if (eos != -1) { if (eos != -1) {
ans.push_back(eos); ans.push_back(eos);
} }
return ans; return ans;
} }
@@ -252,7 +267,7 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsEnglish(
int32_t blank = token2id_.at(" "); int32_t blank = token2id_.at(" ");
std::vector<int64_t> ans; std::vector<int64_t> ans;
if (is_piper_) { if (is_piper_ && token2id_.count("^")) {
ans.push_back(token2id_.at("^")); // sos ans.push_back(token2id_.at("^")); // sos
} }
@@ -277,7 +292,7 @@ std::vector<int64_t> Lexicon::ConvertTextToTokenIdsEnglish(
ans.resize(ans.size() - 1); ans.resize(ans.size() - 1);
} }
if (is_piper_) { if (is_piper_ && token2id_.count("$")) {
ans.push_back(token2id_.at("$")); // eos ans.push_back(token2id_.at("$")); // eos
} }

View File

@@ -81,7 +81,8 @@ class OfflineTtsVitsModel::Impl {
std::string comment; std::string comment;
SHERPA_ONNX_READ_META_DATA_STR(comment, "comment"); SHERPA_ONNX_READ_META_DATA_STR(comment, "comment");
if (comment.find("piper") != std::string::npos) { if (comment.find("piper") != std::string::npos ||
comment.find("coqui") != std::string::npos) {
is_piper_ = true; is_piper_ = true;
} }
} }