Add C++ runtime for Matcha-TTS (#1627)

This commit is contained in:
Fangjun Kuang
2024-12-31 12:44:14 +08:00
committed by GitHub
parent 5c2cc48f50
commit 2c2926af7d
33 changed files with 1397 additions and 86 deletions

View File

@@ -19,9 +19,8 @@ namespace sherpa_onnx {
class JiebaLexicon::Impl {
public:
Impl(const std::string &lexicon, const std::string &tokens,
const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data, bool debug)
: meta_data_(meta_data), debug_(debug) {
const std::string &dict_dir, bool debug)
: debug_(debug) {
std::string dict = dict_dir + "/jieba.dict.utf8";
std::string hmm = dict_dir + "/hmm_model.utf8";
std::string user_dict = dict_dir + "/user.dict.utf8";
@@ -84,7 +83,6 @@ class JiebaLexicon::Impl {
std::vector<TokenIDs> ans;
std::vector<int64_t> this_sentence;
int32_t blank = token2id_.at(" ");
for (const auto &w : words) {
auto ids = ConvertWordToIds(w);
if (ids.empty()) {
@@ -93,7 +91,6 @@ class JiebaLexicon::Impl {
}
this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
this_sentence.push_back(blank);
if (w == "" || w == "" || w == "" || w == "") {
ans.emplace_back(std::move(this_sentence));
@@ -135,7 +132,9 @@ class JiebaLexicon::Impl {
token2id_ = ReadTokens(is);
std::vector<std::pair<std::string, std::string>> puncts = {
{",", ""}, {".", ""}, {"!", ""}, {"?", ""}};
{",", ""}, {".", ""}, {"!", ""}, {"?", ""}, {":", ""},
{"\"", ""}, {"\"", ""}, {"'", ""}, {"'", ""}, {";", ""},
};
for (const auto &p : puncts) {
if (token2id_.count(p.first) && !token2id_.count(p.second)) {
@@ -150,6 +149,10 @@ class JiebaLexicon::Impl {
if (!token2id_.count("") && token2id_.count("")) {
token2id_[""] = token2id_[""];
}
if (!token2id_.count(";") && token2id_.count(",")) {
token2id_[";"] = token2id_[","];
}
}
void InitLexicon(std::istream &is) {
@@ -195,8 +198,6 @@ class JiebaLexicon::Impl {
// tokens.txt is saved in token2id_
std::unordered_map<std::string, int32_t> token2id_;
OfflineTtsVitsModelMetaData meta_data_;
std::unique_ptr<cppjieba::Jieba> jieba_;
bool debug_ = false;
};
@@ -205,11 +206,8 @@ JiebaLexicon::~JiebaLexicon() = default;
JiebaLexicon::JiebaLexicon(const std::string &lexicon,
const std::string &tokens,
const std::string &dict_dir,
const OfflineTtsVitsModelMetaData &meta_data,
bool debug)
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, meta_data,
debug)) {}
const std::string &dict_dir, bool debug)
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, debug)) {}
std::vector<TokenIDs> JiebaLexicon::ConvertTextToTokenIds(
const std::string &text, const std::string & /*unused_voice = ""*/) const {