Add HarmonyOS examples for MatchaTTS. (#1678)
This commit is contained in:
@@ -2098,7 +2098,7 @@ SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
|
||||
}
|
||||
|
||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||
SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
|
||||
const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
|
||||
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {
|
||||
if (!mgr) {
|
||||
return SherpaOnnxCreateOfflineTts(config);
|
||||
|
||||
@@ -1618,7 +1618,7 @@ SherpaOnnxCreateVoiceActivityDetectorOHOS(
|
||||
const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
|
||||
NativeResourceManager *mgr);
|
||||
|
||||
SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
|
||||
SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
|
||||
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);
|
||||
|
||||
SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
|
||||
|
||||
@@ -6,12 +6,23 @@
|
||||
|
||||
#include <fstream>
|
||||
#include <regex> // NOLINT
|
||||
#include <strstream>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#if __OHOS__
|
||||
#include "rawfile/raw_file_manager.h"
|
||||
#endif
|
||||
|
||||
#include "cppjieba/Jieba.hpp"
|
||||
#include "sherpa-onnx/csrc/file-utils.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/onnx-utils.h"
|
||||
#include "sherpa-onnx/csrc/symbol-table.h"
|
||||
#include "sherpa-onnx/csrc/text-utils.h"
|
||||
|
||||
@@ -56,6 +67,39 @@ class JiebaLexicon::Impl {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Manager>
|
||||
Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
|
||||
const std::string &dict_dir, bool debug)
|
||||
: debug_(debug) {
|
||||
std::string dict = dict_dir + "/jieba.dict.utf8";
|
||||
std::string hmm = dict_dir + "/hmm_model.utf8";
|
||||
std::string user_dict = dict_dir + "/user.dict.utf8";
|
||||
std::string idf = dict_dir + "/idf.utf8";
|
||||
std::string stop_word = dict_dir + "/stop_words.utf8";
|
||||
|
||||
AssertFileExists(dict);
|
||||
AssertFileExists(hmm);
|
||||
AssertFileExists(user_dict);
|
||||
AssertFileExists(idf);
|
||||
AssertFileExists(stop_word);
|
||||
|
||||
jieba_ =
|
||||
std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
|
||||
|
||||
{
|
||||
auto buf = ReadFile(mgr, tokens);
|
||||
std::istrstream is(buf.data(), buf.size());
|
||||
|
||||
InitTokens(is);
|
||||
}
|
||||
|
||||
{
|
||||
auto buf = ReadFile(mgr, lexicon);
|
||||
std::istrstream is(buf.data(), buf.size());
|
||||
InitLexicon(is);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &text) const {
|
||||
// see
|
||||
// https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
|
||||
@@ -279,9 +323,29 @@ JiebaLexicon::JiebaLexicon(const std::string &lexicon,
|
||||
const std::string &dict_dir, bool debug)
|
||||
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, debug)) {}
|
||||
|
||||
template <typename Manager>
|
||||
JiebaLexicon::JiebaLexicon(Manager *mgr, const std::string &lexicon,
|
||||
const std::string &tokens,
|
||||
const std::string &dict_dir, bool debug)
|
||||
: impl_(std::make_unique<Impl>(mgr, lexicon, tokens, dict_dir, debug)) {}
|
||||
|
||||
std::vector<TokenIDs> JiebaLexicon::ConvertTextToTokenIds(
|
||||
const std::string &text, const std::string & /*unused_voice = ""*/) const {
|
||||
return impl_->ConvertTextToTokenIds(text);
|
||||
}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
template JiebaLexicon::JiebaLexicon(AAssetManager *mgr,
|
||||
const std::string &lexicon,
|
||||
const std::string &tokens,
|
||||
const std::string &dict_dir, bool debug);
|
||||
#endif
|
||||
|
||||
#if __OHOS__
|
||||
template JiebaLexicon::JiebaLexicon(NativeResourceManager *mgr,
|
||||
const std::string &lexicon,
|
||||
const std::string &tokens,
|
||||
const std::string &dict_dir, bool debug);
|
||||
#endif
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
@@ -17,9 +17,15 @@ namespace sherpa_onnx {
|
||||
class JiebaLexicon : public OfflineTtsFrontend {
|
||||
public:
|
||||
~JiebaLexicon() override;
|
||||
|
||||
JiebaLexicon(const std::string &lexicon, const std::string &tokens,
|
||||
const std::string &dict_dir, bool debug);
|
||||
|
||||
template <typename Manager>
|
||||
JiebaLexicon(Manager *mgr, const std::string &lexicon,
|
||||
const std::string &tokens, const std::string &dict_dir,
|
||||
bool debug);
|
||||
|
||||
std::vector<TokenIDs> ConvertTextToTokenIds(
|
||||
const std::string &text,
|
||||
const std::string &unused_voice = "") const override;
|
||||
|
||||
@@ -327,13 +327,12 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
|
||||
// from assets to disk
|
||||
//
|
||||
// for jieba
|
||||
// we require that you copy tokens.txt, lexicon.txt and dict
|
||||
// from assets to disk
|
||||
// we require that you copy dict from assets to disk
|
||||
const auto &meta_data = model_->GetMetaData();
|
||||
|
||||
if (meta_data.jieba && !meta_data.has_espeak) {
|
||||
frontend_ = std::make_unique<JiebaLexicon>(
|
||||
config_.model.matcha.lexicon, config_.model.matcha.tokens,
|
||||
mgr, config_.model.matcha.lexicon, config_.model.matcha.tokens,
|
||||
config_.model.matcha.dict_dir, config_.model.debug);
|
||||
} else if (meta_data.has_espeak && !meta_data.jieba) {
|
||||
frontend_ = std::make_unique<PiperPhonemizeLexicon>(
|
||||
|
||||
Reference in New Issue
Block a user