Add C++ runtime for Matcha-TTS (#1627)
This commit is contained in:
@@ -19,9 +19,8 @@ namespace sherpa_onnx {
|
||||
class JiebaLexicon::Impl {
|
||||
public:
|
||||
Impl(const std::string &lexicon, const std::string &tokens,
|
||||
const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug)
|
||||
: meta_data_(meta_data), debug_(debug) {
|
||||
const std::string &dict_dir, bool debug)
|
||||
: debug_(debug) {
|
||||
std::string dict = dict_dir + "/jieba.dict.utf8";
|
||||
std::string hmm = dict_dir + "/hmm_model.utf8";
|
||||
std::string user_dict = dict_dir + "/user.dict.utf8";
|
||||
@@ -84,7 +83,6 @@ class JiebaLexicon::Impl {
|
||||
std::vector<TokenIDs> ans;
|
||||
std::vector<int64_t> this_sentence;
|
||||
|
||||
int32_t blank = token2id_.at(" ");
|
||||
for (const auto &w : words) {
|
||||
auto ids = ConvertWordToIds(w);
|
||||
if (ids.empty()) {
|
||||
@@ -93,7 +91,6 @@ class JiebaLexicon::Impl {
|
||||
}
|
||||
|
||||
this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
|
||||
this_sentence.push_back(blank);
|
||||
|
||||
if (w == "。" || w == "!" || w == "?" || w == ",") {
|
||||
ans.emplace_back(std::move(this_sentence));
|
||||
@@ -135,7 +132,9 @@ class JiebaLexicon::Impl {
|
||||
token2id_ = ReadTokens(is);
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> puncts = {
|
||||
{",", ","}, {".", "。"}, {"!", "!"}, {"?", "?"}};
|
||||
{",", ","}, {".", "。"}, {"!", "!"}, {"?", "?"}, {":", ":"},
|
||||
{"\"", "“"}, {"\"", "”"}, {"'", "‘"}, {"'", "’"}, {";", ";"},
|
||||
};
|
||||
|
||||
for (const auto &p : puncts) {
|
||||
if (token2id_.count(p.first) && !token2id_.count(p.second)) {
|
||||
@@ -150,6 +149,10 @@ class JiebaLexicon::Impl {
|
||||
if (!token2id_.count("、") && token2id_.count(",")) {
|
||||
token2id_["、"] = token2id_[","];
|
||||
}
|
||||
|
||||
if (!token2id_.count(";") && token2id_.count(",")) {
|
||||
token2id_[";"] = token2id_[","];
|
||||
}
|
||||
}
|
||||
|
||||
void InitLexicon(std::istream &is) {
|
||||
@@ -195,8 +198,6 @@ class JiebaLexicon::Impl {
|
||||
// tokens.txt is saved in token2id_
|
||||
std::unordered_map<std::string, int32_t> token2id_;
|
||||
|
||||
OfflineTtsVitsModelMetaData meta_data_;
|
||||
|
||||
std::unique_ptr<cppjieba::Jieba> jieba_;
|
||||
bool debug_ = false;
|
||||
};
|
||||
@@ -205,11 +206,8 @@ JiebaLexicon::~JiebaLexicon() = default;
|
||||
|
||||
JiebaLexicon::JiebaLexicon(const std::string &lexicon,
|
||||
const std::string &tokens,
|
||||
const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data,
|
||||
bool debug)
|
||||
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, meta_data,
|
||||
debug)) {}
|
||||
const std::string &dict_dir, bool debug)
|
||||
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, debug)) {}
|
||||
|
||||
std::vector<TokenIDs> JiebaLexicon::ConvertTextToTokenIds(
|
||||
const std::string &text, const std::string & /*unused_voice = ""*/) const {
|
||||
|
||||
Reference in New Issue
Block a user