Add jieba for Chinese TTS models (#797)
This commit is contained in:
@@ -260,6 +260,7 @@ if(SHERPA_ONNX_ENABLE_TTS)
|
||||
set(ESPEAK_NG_DIR ${espeak_ng_SOURCE_DIR})
|
||||
message(STATUS "ESPEAK_NG_DIR: ${ESPEAK_NG_DIR}")
|
||||
include(piper-phonemize)
|
||||
include(cppjieba) # For Chinese TTS. It is a header-only C++ library
|
||||
endif()
|
||||
|
||||
add_subdirectory(sherpa-onnx)
|
||||
|
||||
45
cmake/cppjieba.cmake
Normal file
45
cmake/cppjieba.cmake
Normal file
@@ -0,0 +1,45 @@
|
||||
function(download_cppjieba)
|
||||
include(FetchContent)
|
||||
|
||||
set(cppjieba_URL "https://github.com/csukuangfj/cppjieba/archive/refs/tags/sherpa-onnx-2024-04-19.tar.gz")
|
||||
set(cppjieba_URL2 "https://hub.nuaa.cf/csukuangfj/cppjieba/archive/refs/tags/sherpa-onnx-2024-04-19.tar.gz")
|
||||
set(cppjieba_HASH "SHA256=03e5264687f0efaef05487a07d49c3f4c0f743347bfbf825df4b30cc75ac5288")
|
||||
|
||||
# If you don't have access to the Internet,
|
||||
# please pre-download cppjieba
|
||||
set(possible_file_locations
|
||||
$ENV{HOME}/Downloads/cppjieba-sherpa-onnx-2024-04-19.tar.gz
|
||||
${CMAKE_SOURCE_DIR}/cppjieba-sherpa-onnx-2024-04-19.tar.gz
|
||||
${CMAKE_BINARY_DIR}/cppjieba-sherpa-onnx-2024-04-19.tar.gz
|
||||
/tmp/cppjieba-sherpa-onnx-2024-04-19.tar.gz
|
||||
/star-fj/fangjun/download/github/cppjieba-sherpa-onnx-2024-04-19.tar.gz
|
||||
)
|
||||
|
||||
foreach(f IN LISTS possible_file_locations)
|
||||
if(EXISTS ${f})
|
||||
set(cppjieba_URL "${f}")
|
||||
file(TO_CMAKE_PATH "${cppjieba_URL}" cppjieba_URL)
|
||||
message(STATUS "Found local downloaded cppjieba: ${cppjieba_URL}")
|
||||
set(cppjieba_URL2)
|
||||
break()
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
FetchContent_Declare(cppjieba
|
||||
URL
|
||||
${cppjieba_URL}
|
||||
${cppjieba_URL2}
|
||||
URL_HASH
|
||||
${cppjieba_HASH}
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(cppjieba)
|
||||
if(NOT cppjieba_POPULATED)
|
||||
message(STATUS "Downloading cppjieba ${cppjieba_URL}")
|
||||
FetchContent_Populate(cppjieba)
|
||||
endif()
|
||||
message(STATUS "cppjieba is downloaded to ${cppjieba_SOURCE_DIR}")
|
||||
add_subdirectory(${cppjieba_SOURCE_DIR} ${cppjieba_BINARY_DIR} EXCLUDE_FROM_ALL)
|
||||
endfunction()
|
||||
|
||||
download_cppjieba()
|
||||
@@ -132,6 +132,7 @@ list(APPEND sources
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
list(APPEND sources
|
||||
jieba-lexicon.cc
|
||||
lexicon.cc
|
||||
offline-tts-character-frontend.cc
|
||||
offline-tts-impl.cc
|
||||
@@ -184,6 +185,7 @@ endif()
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
target_link_libraries(sherpa-onnx-core piper_phonemize)
|
||||
target_link_libraries(sherpa-onnx-core fstfar fst)
|
||||
target_link_libraries(sherpa-onnx-core cppjieba)
|
||||
endif()
|
||||
|
||||
if(SHERPA_ONNX_ENABLE_CHECK)
|
||||
@@ -491,6 +493,7 @@ if(SHERPA_ONNX_ENABLE_TESTS)
|
||||
)
|
||||
if(SHERPA_ONNX_ENABLE_TTS)
|
||||
list(APPEND sherpa_onnx_test_srcs
|
||||
cppjieba-test.cc
|
||||
piper-phonemize-test.cc
|
||||
)
|
||||
endif()
|
||||
|
||||
144
sherpa-onnx/csrc/cppjieba-test.cc
Normal file
144
sherpa-onnx/csrc/cppjieba-test.cc
Normal file
@@ -0,0 +1,144 @@
|
||||
// sherpa-onnx/csrc/cppjieba-test.cc
|
||||
//
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
#include <iostream>
|
||||
#include <regex> // NOLINT
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "cppjieba/Jieba.hpp"
|
||||
#include "gtest/gtest.h"
|
||||
#include "sherpa-onnx/csrc/file-utils.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
// Please download dict files form
|
||||
// https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
|
||||
const char *const kDictPath = "./dict/jieba.dict.utf8";
|
||||
const char *const kHmmPath = "./dict/hmm_model.utf8";
|
||||
const char *const kUserDictPath = "./dict/user.dict.utf8";
|
||||
const char *const kIdfPath = "./dict/idf.utf8";
|
||||
const char *const kStopWordPath = "./dict/stop_words.utf8";
|
||||
|
||||
TEST(CppJieBa, Case1) {
|
||||
if (!FileExists(kDictPath)) {
|
||||
SHERPA_ONNX_LOGE("%s does not exist. Skipping test", kDictPath);
|
||||
return;
|
||||
}
|
||||
|
||||
cppjieba::Jieba jieba(kDictPath, kHmmPath, kUserDictPath, kIdfPath,
|
||||
kStopWordPath);
|
||||
|
||||
std::vector<std::string> words;
|
||||
std::vector<cppjieba::Word> jiebawords;
|
||||
|
||||
std::string s = "他来到了网易杭研大厦";
|
||||
std::cout << s << std::endl;
|
||||
std::cout << "[demo] Cut With HMM" << std::endl;
|
||||
jieba.Cut(s, words, true);
|
||||
std::cout << limonp::Join(words.begin(), words.end(), "/") << std::endl;
|
||||
/*
|
||||
他来到了网易杭研大厦
|
||||
[demo] Cut With HMM
|
||||
他/来到/了/网易/杭研/大厦
|
||||
*/
|
||||
s = "小明硕士毕业于中国科学院计算所,后在日本京都大学深造";
|
||||
std::cout << s << std::endl;
|
||||
std::cout << "[demo] CutForSearch" << std::endl;
|
||||
jieba.CutForSearch(s, words);
|
||||
std::cout << limonp::Join(words.begin(), words.end(), "/") << std::endl;
|
||||
/*
|
||||
小明硕士毕业于中国科学院计算所,后在日本京都大学深造
|
||||
[demo] CutForSearch
|
||||
小明/硕士/毕业/于/中国/科学/学院/科学院/中国科学院/计算/计算所/,/后/在/日本/京都/大学/日本京都大学/深造
|
||||
*/
|
||||
std::cout << "[demo] Insert User Word" << std::endl;
|
||||
jieba.Cut("男默女泪", words);
|
||||
std::cout << limonp::Join(words.begin(), words.end(), "/") << std::endl;
|
||||
jieba.InsertUserWord("男默女泪");
|
||||
jieba.Cut("男默女泪", words);
|
||||
std::cout << limonp::Join(words.begin(), words.end(), "/") << std::endl;
|
||||
/*
|
||||
[demo] Insert User Word
|
||||
男默/女泪
|
||||
男默女泪
|
||||
*/
|
||||
std::cout << "[demo] CutForSearch Word With Offset" << std::endl;
|
||||
jieba.CutForSearch(s, jiebawords, true);
|
||||
std::cout << jiebawords << std::endl;
|
||||
/*
|
||||
[demo] CutForSearch Word With Offset
|
||||
[{"word": "小明", "offset": 0}, {"word": "硕士", "offset": 6}, {"word": "毕业",
|
||||
"offset": 12}, {"word": "于", "offset": 18}, {"word": "中国", "offset": 21},
|
||||
{"word": "科学", "offset": 27}, {"word": "学院", "offset": 30}, {"word":
|
||||
"科学院", "offset": 27}, {"word": "中国科学院", "offset": 21}, {"word": "计算",
|
||||
"offset": 36}, {"word": "计算所", "offset": 36}, {"word": ",", "offset": 45},
|
||||
{"word": "后", "offset": 48}, {"word": "在", "offset": 51}, {"word": "日本",
|
||||
"offset": 54}, {"word": "京都", "offset": 60}, {"word": "大学", "offset": 66},
|
||||
{"word": "日本京都大学", "offset": 54}, {"word": " 深造", "offset": 72}]
|
||||
*/
|
||||
// see more test at
|
||||
// https://github.com/yanyiwu/cppjieba/blob/master/test/demo.cpp
|
||||
}
|
||||
|
||||
TEST(CppJieBa, Case2) {
|
||||
if (!FileExists(kDictPath)) {
|
||||
SHERPA_ONNX_LOGE("%s does not exist. Skipping test", kDictPath);
|
||||
return;
|
||||
}
|
||||
|
||||
cppjieba::Jieba jieba(kDictPath, kHmmPath, kUserDictPath, kIdfPath,
|
||||
kStopWordPath);
|
||||
std::string s =
|
||||
"当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如"
|
||||
"涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感"
|
||||
"受着生命的奇迹与温柔";
|
||||
std::vector<std::string> words;
|
||||
bool is_hmm = true;
|
||||
jieba.Cut(s, words, is_hmm);
|
||||
{
|
||||
std::ostringstream os;
|
||||
std::string sep = "";
|
||||
for (const auto &w : words) {
|
||||
os << sep << w;
|
||||
sep = "_";
|
||||
}
|
||||
|
||||
std::cout << os.str() << "\n";
|
||||
}
|
||||
/*
|
||||
当_夜幕降临_,_星光点点_,_伴随_着_微风_拂面_,
|
||||
_我_在_静谧_中_感受_着_时光_的_流转_,
|
||||
_思念_如_涟漪_荡漾_,_梦境_如_画卷_展开_,_我_与_自然_融为一体_,
|
||||
_沉静_在_这_片_宁静_的_美丽_之中_,_感受_着_生命_的_奇迹_与_温柔
|
||||
*/
|
||||
s = "这里有:红的、绿的、蓝的;各种各样的颜色都有!你想要什么呢?测试.";
|
||||
std::regex punct_re(":|、|;");
|
||||
std::string s2 = std::regex_replace(s, punct_re, ",");
|
||||
|
||||
std::regex punct_re2("[.]");
|
||||
s2 = std::regex_replace(s2, punct_re2, "。");
|
||||
|
||||
std::regex punct_re3("[?]");
|
||||
s2 = std::regex_replace(s2, punct_re3, "?");
|
||||
|
||||
std::regex punct_re4("[!]");
|
||||
s2 = std::regex_replace(s2, punct_re4, "!");
|
||||
std::cout << s << "\n" << s2 << "\n";
|
||||
|
||||
words.clear();
|
||||
jieba.Cut(s2, words, is_hmm);
|
||||
{
|
||||
std::ostringstream os;
|
||||
std::string sep = "";
|
||||
for (const auto &w : words) {
|
||||
os << sep << w;
|
||||
sep = "_";
|
||||
}
|
||||
|
||||
std::cout << os.str() << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
@@ -18,6 +18,7 @@ bool FileExists(const std::string &filename) {
|
||||
void AssertFileExists(const std::string &filename) {
|
||||
if (!FileExists(filename)) {
|
||||
SHERPA_ONNX_LOG(FATAL) << filename << " does not exist!";
|
||||
exit(-1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
216
sherpa-onnx/csrc/jieba-lexicon.cc
Normal file
216
sherpa-onnx/csrc/jieba-lexicon.cc
Normal file
@@ -0,0 +1,216 @@
|
||||
// sherpa-onnx/csrc/jieba-lexicon.cc
|
||||
//
|
||||
// Copyright (c) 2022-2024 Xiaomi Corporation
|
||||
|
||||
#include "sherpa-onnx/csrc/jieba-lexicon.h"
|
||||
|
||||
#include <fstream>
|
||||
#include <regex> // NOLINT
|
||||
#include <utility>
|
||||
|
||||
#include "cppjieba/Jieba.hpp"
|
||||
#include "sherpa-onnx/csrc/file-utils.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/text-utils.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
// implemented in ./lexicon.cc
|
||||
std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is);
|
||||
std::vector<int32_t> ConvertTokensToIds(
|
||||
const std::unordered_map<std::string, int32_t> &token2id,
|
||||
const std::vector<std::string> &tokens);
|
||||
|
||||
class JiebaLexicon::Impl {
|
||||
public:
|
||||
Impl(const std::string &lexicon, const std::string &tokens,
|
||||
const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug)
|
||||
: meta_data_(meta_data), debug_(debug) {
|
||||
std::string dict = dict_dir + "/jieba.dict.utf8";
|
||||
std::string hmm = dict_dir + "/hmm_model.utf8";
|
||||
std::string user_dict = dict_dir + "/user.dict.utf8";
|
||||
std::string idf = dict_dir + "/idf.utf8";
|
||||
std::string stop_word = dict_dir + "/stop_words.utf8";
|
||||
|
||||
AssertFileExists(dict);
|
||||
AssertFileExists(hmm);
|
||||
AssertFileExists(user_dict);
|
||||
AssertFileExists(idf);
|
||||
AssertFileExists(stop_word);
|
||||
|
||||
jieba_ =
|
||||
std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
|
||||
|
||||
{
|
||||
std::ifstream is(tokens);
|
||||
InitTokens(is);
|
||||
}
|
||||
|
||||
{
|
||||
std::ifstream is(lexicon);
|
||||
InitLexicon(is);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::vector<int64_t>> ConvertTextToTokenIds(
|
||||
const std::string &text) const {
|
||||
// see
|
||||
// https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
|
||||
std::regex punct_re{":|、|;"};
|
||||
std::string s = std::regex_replace(text, punct_re, ",");
|
||||
|
||||
std::regex punct_re2("[.]");
|
||||
s = std::regex_replace(s, punct_re2, "。");
|
||||
|
||||
std::regex punct_re3("[?]");
|
||||
s = std::regex_replace(s, punct_re3, "?");
|
||||
|
||||
std::regex punct_re4("[!]");
|
||||
s = std::regex_replace(s, punct_re4, "!");
|
||||
|
||||
std::vector<std::string> words;
|
||||
bool is_hmm = true;
|
||||
jieba_->Cut(text, words, is_hmm);
|
||||
|
||||
if (debug_) {
|
||||
SHERPA_ONNX_LOGE("input text: %s", text.c_str());
|
||||
SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
|
||||
|
||||
std::ostringstream os;
|
||||
std::string sep = "";
|
||||
for (const auto &w : words) {
|
||||
os << sep << w;
|
||||
sep = "_";
|
||||
}
|
||||
|
||||
SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str());
|
||||
}
|
||||
|
||||
std::vector<std::vector<int64_t>> ans;
|
||||
std::vector<int64_t> this_sentence;
|
||||
|
||||
int32_t blank = token2id_.at(" ");
|
||||
for (const auto &w : words) {
|
||||
auto ids = ConvertWordToIds(w);
|
||||
if (ids.empty()) {
|
||||
SHERPA_ONNX_LOGE("Ignore OOV '%s'", w.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
this_sentence.insert(this_sentence.end(), ids.begin(), ids.end());
|
||||
this_sentence.push_back(blank);
|
||||
|
||||
if (w == "。" || w == "!" || w == "?" || w == ",") {
|
||||
ans.push_back(std::move(this_sentence));
|
||||
}
|
||||
} // for (const auto &w : words)
|
||||
|
||||
if (!this_sentence.empty()) {
|
||||
ans.push_back(std::move(this_sentence));
|
||||
}
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<int32_t> ConvertWordToIds(const std::string &w) const {
|
||||
if (word2ids_.count(w)) {
|
||||
return word2ids_.at(w);
|
||||
}
|
||||
|
||||
if (token2id_.count(w)) {
|
||||
return {token2id_.at(w)};
|
||||
}
|
||||
|
||||
std::vector<int32_t> ans;
|
||||
|
||||
std::vector<std::string> words = SplitUtf8(w);
|
||||
for (const auto &word : words) {
|
||||
if (word2ids_.count(word)) {
|
||||
auto ids = ConvertWordToIds(word);
|
||||
ans.insert(ans.end(), ids.begin(), ids.end());
|
||||
}
|
||||
}
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
void InitTokens(std::istream &is) {
|
||||
token2id_ = ReadTokens(is);
|
||||
|
||||
std::vector<std::pair<std::string, std::string>> puncts = {
|
||||
{",", ","}, {".", "。"}, {"!", "!"}, {"?", "?"}};
|
||||
|
||||
for (const auto &p : puncts) {
|
||||
if (token2id_.count(p.first) && !token2id_.count(p.second)) {
|
||||
token2id_[p.second] = token2id_[p.first];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InitLexicon(std::istream &is) {
|
||||
std::string word;
|
||||
std::vector<std::string> token_list;
|
||||
std::string line;
|
||||
std::string phone;
|
||||
int32_t line_num = 0;
|
||||
|
||||
while (std::getline(is, line)) {
|
||||
++line_num;
|
||||
|
||||
std::istringstream iss(line);
|
||||
|
||||
token_list.clear();
|
||||
|
||||
iss >> word;
|
||||
ToLowerCase(&word);
|
||||
|
||||
if (word2ids_.count(word)) {
|
||||
SHERPA_ONNX_LOGE("Duplicated word: %s at line %d:%s. Ignore it.",
|
||||
word.c_str(), line_num, line.c_str());
|
||||
continue;
|
||||
}
|
||||
|
||||
while (iss >> phone) {
|
||||
token_list.push_back(std::move(phone));
|
||||
}
|
||||
|
||||
std::vector<int32_t> ids = ConvertTokensToIds(token2id_, token_list);
|
||||
if (ids.empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
word2ids_.insert({std::move(word), std::move(ids)});
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
// lexicon.txt is saved in word2ids_
|
||||
std::unordered_map<std::string, std::vector<int32_t>> word2ids_;
|
||||
|
||||
// tokens.txt is saved in token2id_
|
||||
std::unordered_map<std::string, int32_t> token2id_;
|
||||
|
||||
OfflineTtsVitsModelMetaData meta_data_;
|
||||
|
||||
std::unique_ptr<cppjieba::Jieba> jieba_;
|
||||
bool debug_ = false;
|
||||
};
|
||||
|
||||
JiebaLexicon::~JiebaLexicon() = default;
|
||||
|
||||
JiebaLexicon::JiebaLexicon(const std::string &lexicon,
|
||||
const std::string &tokens,
|
||||
const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data,
|
||||
bool debug)
|
||||
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, meta_data,
|
||||
debug)) {}
|
||||
|
||||
std::vector<std::vector<int64_t>> JiebaLexicon::ConvertTextToTokenIds(
|
||||
const std::string &text, const std::string &unused_voice /*= ""*/) const {
|
||||
return impl_->ConvertTextToTokenIds(text);
|
||||
}
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
47
sherpa-onnx/csrc/jieba-lexicon.h
Normal file
47
sherpa-onnx/csrc/jieba-lexicon.h
Normal file
@@ -0,0 +1,47 @@
|
||||
// sherpa-onnx/csrc/jieba-lexicon.h
|
||||
//
|
||||
// Copyright (c) 2022-2024 Xiaomi Corporation
|
||||
|
||||
#ifndef SHERPA_ONNX_CSRC_JIEBA_LEXICON_H_
|
||||
#define SHERPA_ONNX_CSRC_JIEBA_LEXICON_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-tts-frontend.h"
|
||||
#include "sherpa-onnx/csrc/offline-tts-vits-model-metadata.h"
|
||||
|
||||
namespace sherpa_onnx {
|
||||
|
||||
class JiebaLexicon : public OfflineTtsFrontend {
|
||||
public:
|
||||
~JiebaLexicon() override;
|
||||
JiebaLexicon(const std::string &lexicon, const std::string &tokens,
|
||||
const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
JiebaLexicon(AAssetManager *mgr, const std::string &lexicon,
|
||||
const std::string &tokens, const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data);
|
||||
#endif
|
||||
|
||||
std::vector<std::vector<int64_t>> ConvertTextToTokenIds(
|
||||
const std::string &text,
|
||||
const std::string &unused_voice = "") const override;
|
||||
|
||||
private:
|
||||
class Impl;
|
||||
std::unique_ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
#endif // SHERPA_ONNX_CSRC_JIEBA_LEXICON_H_
|
||||
@@ -76,7 +76,7 @@ static std::vector<std::string> ProcessHeteronyms(
|
||||
|
||||
// Note: We don't use SymbolTable here since tokens may contain a blank
|
||||
// in the first column
|
||||
static std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is) {
|
||||
std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is) {
|
||||
std::unordered_map<std::string, int32_t> token2id;
|
||||
|
||||
std::string line;
|
||||
@@ -113,7 +113,7 @@ static std::unordered_map<std::string, int32_t> ReadTokens(std::istream &is) {
|
||||
return token2id;
|
||||
}
|
||||
|
||||
static std::vector<int32_t> ConvertTokensToIds(
|
||||
std::vector<int32_t> ConvertTokensToIds(
|
||||
const std::unordered_map<std::string, int32_t> &token2id,
|
||||
const std::vector<std::string> &tokens) {
|
||||
std::vector<int32_t> ids;
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "fst/extensions/far/far.h"
|
||||
#include "kaldifst/csrc/kaldi-fst-io.h"
|
||||
#include "kaldifst/csrc/text-normalizer.h"
|
||||
#include "sherpa-onnx/csrc/jieba-lexicon.h"
|
||||
#include "sherpa-onnx/csrc/lexicon.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/offline-tts-character-frontend.h"
|
||||
@@ -290,9 +291,26 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
void InitFrontend() {
|
||||
const auto &meta_data = model_->GetMetaData();
|
||||
|
||||
if (meta_data.jieba && config_.model.vits.dict_dir.empty()) {
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Please provide --vits-dict-dir for Chinese TTS models using jieba");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (!meta_data.jieba && !config_.model.vits.dict_dir.empty()) {
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Current model is not using jieba but you provided --vits-dict-dir");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
if (meta_data.frontend == "characters") {
|
||||
frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
|
||||
config_.model.vits.tokens, meta_data);
|
||||
} else if (meta_data.jieba && !config_.model.vits.dict_dir.empty()) {
|
||||
frontend_ = std::make_unique<JiebaLexicon>(
|
||||
config_.model.vits.lexicon, config_.model.vits.tokens,
|
||||
config_.model.vits.dict_dir, model_->GetMetaData(),
|
||||
config_.model.debug);
|
||||
} else if ((meta_data.is_piper || meta_data.is_coqui ||
|
||||
meta_data.is_icefall) &&
|
||||
!config_.model.vits.data_dir.empty()) {
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
|
||||
#include "sherpa-onnx/csrc/offline-tts-vits-model-config.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "sherpa-onnx/csrc/file-utils.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
|
||||
@@ -16,6 +18,9 @@ void OfflineTtsVitsModelConfig::Register(ParseOptions *po) {
|
||||
po->Register("vits-data-dir", &data_dir,
|
||||
"Path to the directory containing dict for espeak-ng. If it is "
|
||||
"given, --vits-lexicon is ignored.");
|
||||
po->Register("vits-dict-dir", &dict_dir,
|
||||
"Path to the directory containing dict for jieba. Used only for "
|
||||
"Chinese TTS models using jieba");
|
||||
po->Register("vits-noise-scale", &noise_scale, "noise_scale for VITS models");
|
||||
po->Register("vits-noise-scale-w", &noise_scale_w,
|
||||
"noise_scale_w for VITS models");
|
||||
@@ -64,12 +69,24 @@ bool OfflineTtsVitsModelConfig::Validate() const {
|
||||
}
|
||||
|
||||
if (!FileExists(data_dir + "/intonations")) {
|
||||
SHERPA_ONNX_LOGE("%s/intonations does not exist. Skipping test",
|
||||
data_dir.c_str());
|
||||
SHERPA_ONNX_LOGE("%s/intonations does not exist.", data_dir.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dict_dir.empty()) {
|
||||
std::vector<std::string> required_files = {
|
||||
"jieba.dict.utf8", "hmm_model.utf8", "user.dict.utf8",
|
||||
"idf.utf8", "stop_words.utf8",
|
||||
};
|
||||
|
||||
for (const auto &f : required_files) {
|
||||
if (!FileExists(dict_dir + "/" + f)) {
|
||||
SHERPA_ONNX_LOGE("%s/%s does not exist.", data_dir.c_str(), f.c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -81,6 +98,7 @@ std::string OfflineTtsVitsModelConfig::ToString() const {
|
||||
os << "lexicon=\"" << lexicon << "\", ";
|
||||
os << "tokens=\"" << tokens << "\", ";
|
||||
os << "data_dir=\"" << data_dir << "\", ";
|
||||
os << "dict_dir=\"" << dict_dir << "\", ";
|
||||
os << "noise_scale=" << noise_scale << ", ";
|
||||
os << "noise_scale_w=" << noise_scale_w << ", ";
|
||||
os << "length_scale=" << length_scale << ")";
|
||||
|
||||
@@ -20,6 +20,9 @@ struct OfflineTtsVitsModelConfig {
|
||||
// data_dir is for piper-phonemize, which uses espeak-ng
|
||||
std::string data_dir;
|
||||
|
||||
// Used for Chinese TTS models using jieba
|
||||
std::string dict_dir;
|
||||
|
||||
float noise_scale = 0.667;
|
||||
float noise_scale_w = 0.8;
|
||||
float length_scale = 1;
|
||||
@@ -33,12 +36,14 @@ struct OfflineTtsVitsModelConfig {
|
||||
const std::string &lexicon,
|
||||
const std::string &tokens,
|
||||
const std::string &data_dir,
|
||||
const std::string &dict_dir,
|
||||
float noise_scale = 0.667,
|
||||
float noise_scale_w = 0.8, float length_scale = 1)
|
||||
: model(model),
|
||||
lexicon(lexicon),
|
||||
tokens(tokens),
|
||||
data_dir(data_dir),
|
||||
dict_dir(dict_dir),
|
||||
noise_scale(noise_scale),
|
||||
noise_scale_w(noise_scale_w),
|
||||
length_scale(length_scale) {}
|
||||
|
||||
@@ -22,6 +22,10 @@ struct OfflineTtsVitsModelMetaData {
|
||||
bool is_coqui = false;
|
||||
bool is_icefall = false;
|
||||
|
||||
// for Chinese TTS models from
|
||||
// https://github.com/Plachtaa/VITS-fast-fine-tuning
|
||||
int32_t jieba = 0;
|
||||
|
||||
// the following options are for models from coqui-ai/TTS
|
||||
int32_t blank_id = 0;
|
||||
int32_t bos_id = 0;
|
||||
|
||||
@@ -93,6 +93,7 @@ class OfflineTtsVitsModel::Impl {
|
||||
SHERPA_ONNX_READ_META_DATA_STR_WITH_DEFAULT(meta_data_.frontend, "frontend",
|
||||
"");
|
||||
|
||||
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.jieba, "jieba", 0);
|
||||
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.blank_id, "blank_id", 0);
|
||||
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.bos_id, "bos_id", 0);
|
||||
SHERPA_ONNX_READ_META_DATA_WITH_DEFAULT(meta_data_.eos_id, "eos_id", 0);
|
||||
|
||||
@@ -16,15 +16,17 @@ void PybindOfflineTtsVitsModelConfig(py::module *m) {
|
||||
py::class_<PyClass>(*m, "OfflineTtsVitsModelConfig")
|
||||
.def(py::init<>())
|
||||
.def(py::init<const std::string &, const std::string &,
|
||||
const std::string &, const std::string &, float, float,
|
||||
float>(),
|
||||
const std::string &, const std::string &,
|
||||
const std::string &, float, float, float>(),
|
||||
py::arg("model"), py::arg("lexicon"), py::arg("tokens"),
|
||||
py::arg("data_dir") = "", py::arg("noise_scale") = 0.667,
|
||||
py::arg("noise_scale_w") = 0.8, py::arg("length_scale") = 1.0)
|
||||
py::arg("data_dir") = "", py::arg("dict_dir") = "",
|
||||
py::arg("noise_scale") = 0.667, py::arg("noise_scale_w") = 0.8,
|
||||
py::arg("length_scale") = 1.0)
|
||||
.def_readwrite("model", &PyClass::model)
|
||||
.def_readwrite("lexicon", &PyClass::lexicon)
|
||||
.def_readwrite("tokens", &PyClass::tokens)
|
||||
.def_readwrite("data_dir", &PyClass::data_dir)
|
||||
.def_readwrite("dict_dir", &PyClass::dict_dir)
|
||||
.def_readwrite("noise_scale", &PyClass::noise_scale)
|
||||
.def_readwrite("noise_scale_w", &PyClass::noise_scale_w)
|
||||
.def_readwrite("length_scale", &PyClass::length_scale)
|
||||
|
||||
Reference in New Issue
Block a user