Add HarmonyOS examples for MatchaTTS. (#1678)
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* Use these variables when you tailor your ArkTS code. They must be of the const type.
|
* Use these variables when you tailor your ArkTS code. They must be of the const type.
|
||||||
*/
|
*/
|
||||||
export const HAR_VERSION = '1.10.35';
|
export const HAR_VERSION = '1.10.37';
|
||||||
export const BUILD_MODE_NAME = 'debug';
|
export const BUILD_MODE_NAME = 'debug';
|
||||||
export const DEBUG = true;
|
export const DEBUG = true;
|
||||||
export const TARGET_NAME = 'default';
|
export const TARGET_NAME = 'default';
|
||||||
|
|||||||
@@ -1,11 +1,6 @@
|
|||||||
export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";
|
export { listRawfileDir, readWave, readWaveFromBinary, } from "libsherpa_onnx.so";
|
||||||
|
|
||||||
export { CircularBuffer,
|
export { CircularBuffer, SileroVadConfig, SpeechSegment, Vad, VadConfig, } from './src/main/ets/components/Vad';
|
||||||
SileroVadConfig,
|
|
||||||
SpeechSegment,
|
|
||||||
Vad,
|
|
||||||
VadConfig,
|
|
||||||
} from './src/main/ets/components/Vad';
|
|
||||||
|
|
||||||
|
|
||||||
export { Samples,
|
export { Samples,
|
||||||
@@ -36,7 +31,8 @@ export { OnlineStream,
|
|||||||
OnlineRecognizer,
|
OnlineRecognizer,
|
||||||
} from './src/main/ets/components/StreamingAsr';
|
} from './src/main/ets/components/StreamingAsr';
|
||||||
|
|
||||||
export { OfflineTtsVitsModelConfig,
|
export { OfflineTtsMatchaModelConfig,
|
||||||
|
OfflineTtsVitsModelConfig,
|
||||||
OfflineTtsModelConfig,
|
OfflineTtsModelConfig,
|
||||||
OfflineTtsConfig,
|
OfflineTtsConfig,
|
||||||
OfflineTts,
|
OfflineTts,
|
||||||
|
|||||||
@@ -17,8 +17,20 @@ export class OfflineTtsVitsModelConfig {
|
|||||||
public lengthScale: number = 1.0;
|
public lengthScale: number = 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export class OfflineTtsMatchaModelConfig {
|
||||||
|
public acousticModel: string = '';
|
||||||
|
public vocoder: string = '';
|
||||||
|
public lexicon: string = '';
|
||||||
|
public tokens: string = '';
|
||||||
|
public dataDir: string = '';
|
||||||
|
public dictDir: String = '';
|
||||||
|
public noiseScale: number = 0.667;
|
||||||
|
public lengthScale: number = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
export class OfflineTtsModelConfig {
|
export class OfflineTtsModelConfig {
|
||||||
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
|
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
|
||||||
|
public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
|
||||||
public numThreads: number = 1;
|
public numThreads: number = 1;
|
||||||
public debug: boolean = false;
|
public debug: boolean = false;
|
||||||
public provider: string = 'cpu';
|
public provider: string = 'cpu';
|
||||||
|
|||||||
@@ -73,7 +73,16 @@ function initTts(context: Context): OfflineTts {
|
|||||||
// for details
|
// for details
|
||||||
|
|
||||||
let modelDir = '';
|
let modelDir = '';
|
||||||
|
|
||||||
|
// for VITS begin
|
||||||
let modelName = '';
|
let modelName = '';
|
||||||
|
// for VITS end
|
||||||
|
|
||||||
|
// for Matcha begin
|
||||||
|
let acousticModelName = '';
|
||||||
|
let vocoder = '';
|
||||||
|
// for Matcha end
|
||||||
|
|
||||||
let ruleFsts = '';
|
let ruleFsts = '';
|
||||||
let ruleFars = '';
|
let ruleFars = '';
|
||||||
let lexicon = '';
|
let lexicon = '';
|
||||||
@@ -134,15 +143,47 @@ function initTts(context: Context): OfflineTts {
|
|||||||
// dictDir = 'dict';
|
// dictDir = 'dict';
|
||||||
// ruleFsts = `date.fst,phone.fst,number.fst`;
|
// ruleFsts = `date.fst,phone.fst,number.fst`;
|
||||||
|
|
||||||
|
// Example 8
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||||
|
// modelDir = 'matcha-icefall-zh-baker'
|
||||||
|
// acousticModelName = 'model-steps-3.onnx'
|
||||||
|
// vocoder = 'hifigan_v2.onnx'
|
||||||
|
// lexicon = 'lexicon.txt'
|
||||||
|
// dictDir = 'dict';
|
||||||
|
// ruleFsts = `date.fst,phone.fst,number.fst`;
|
||||||
|
|
||||||
|
// Example 9
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
|
// modelDir = 'matcha-icefall-en_US-ljspeech'
|
||||||
|
// acousticModelName = 'model-steps-3.onnx'
|
||||||
|
// vocoder = 'hifigan_v2.onnx'
|
||||||
|
// dataDir = 'espeak-ng-data';
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// Please don't change the remaining part of this function
|
// Please don't change the remaining part of this function
|
||||||
// ============================================================
|
// ============================================================
|
||||||
|
|
||||||
if (modelName == '') {
|
if (modelName == '' && acousticModelName == '' && vocoder == '') {
|
||||||
throw new Error('You are supposed to select a model by changing the code before you run the app');
|
throw new Error('You are supposed to select a model by changing the code before you run the app');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (modelName != '' && acousticModelName != '') {
|
||||||
|
throw new Error('Please select either VITS or Matcha, not both');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (acousticModelName != '' && vocoder == '') {
|
||||||
|
throw new Error('Please provider vocoder for matcha tts models');
|
||||||
|
}
|
||||||
|
|
||||||
|
if (modelName != '') {
|
||||||
modelName = modelDir + '/' + modelName;
|
modelName = modelDir + '/' + modelName;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (acousticModelName != '') {
|
||||||
|
acousticModelName = modelDir + '/' + acousticModelName;
|
||||||
|
}
|
||||||
|
|
||||||
if (ruleFsts != '') {
|
if (ruleFsts != '') {
|
||||||
let fsts = ruleFsts.split(',')
|
let fsts = ruleFsts.split(',')
|
||||||
@@ -186,6 +227,14 @@ function initTts(context: Context): OfflineTts {
|
|||||||
config.model.vits.tokens = tokens;
|
config.model.vits.tokens = tokens;
|
||||||
config.model.vits.dataDir = dataDir;
|
config.model.vits.dataDir = dataDir;
|
||||||
config.model.vits.dictDir = dictDir;
|
config.model.vits.dictDir = dictDir;
|
||||||
|
|
||||||
|
config.model.matcha.acousticModel = acousticModelName;
|
||||||
|
config.model.matcha.vocoder = vocoder;
|
||||||
|
config.model.matcha.lexicon = lexicon;
|
||||||
|
config.model.matcha.tokens = tokens;
|
||||||
|
config.model.matcha.dataDir = dataDir;
|
||||||
|
config.model.matcha.dictDir = dictDir;
|
||||||
|
|
||||||
config.model.numThreads = 2;
|
config.model.numThreads = 2;
|
||||||
config.model.debug = true;
|
config.model.debug = true;
|
||||||
config.ruleFsts = ruleFsts;
|
config.ruleFsts = ruleFsts;
|
||||||
|
|||||||
@@ -2098,7 +2098,7 @@ SherpaOnnxCreateSpeakerEmbeddingExtractorOHOS(
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if SHERPA_ONNX_ENABLE_TTS == 1
|
#if SHERPA_ONNX_ENABLE_TTS == 1
|
||||||
SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
|
const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
|
||||||
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {
|
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr) {
|
||||||
if (!mgr) {
|
if (!mgr) {
|
||||||
return SherpaOnnxCreateOfflineTts(config);
|
return SherpaOnnxCreateOfflineTts(config);
|
||||||
|
|||||||
@@ -1618,7 +1618,7 @@ SherpaOnnxCreateVoiceActivityDetectorOHOS(
|
|||||||
const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
|
const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds,
|
||||||
NativeResourceManager *mgr);
|
NativeResourceManager *mgr);
|
||||||
|
|
||||||
SHERPA_ONNX_API SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
|
SHERPA_ONNX_API const SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTtsOHOS(
|
||||||
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);
|
const SherpaOnnxOfflineTtsConfig *config, NativeResourceManager *mgr);
|
||||||
|
|
||||||
SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
|
SHERPA_ONNX_API const SherpaOnnxSpeakerEmbeddingExtractor *
|
||||||
|
|||||||
@@ -6,12 +6,23 @@
|
|||||||
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <regex> // NOLINT
|
#include <regex> // NOLINT
|
||||||
|
#include <strstream>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
|
#if __ANDROID_API__ >= 9
|
||||||
|
#include "android/asset_manager.h"
|
||||||
|
#include "android/asset_manager_jni.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __OHOS__
|
||||||
|
#include "rawfile/raw_file_manager.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "cppjieba/Jieba.hpp"
|
#include "cppjieba/Jieba.hpp"
|
||||||
#include "sherpa-onnx/csrc/file-utils.h"
|
#include "sherpa-onnx/csrc/file-utils.h"
|
||||||
#include "sherpa-onnx/csrc/macros.h"
|
#include "sherpa-onnx/csrc/macros.h"
|
||||||
|
#include "sherpa-onnx/csrc/onnx-utils.h"
|
||||||
#include "sherpa-onnx/csrc/symbol-table.h"
|
#include "sherpa-onnx/csrc/symbol-table.h"
|
||||||
#include "sherpa-onnx/csrc/text-utils.h"
|
#include "sherpa-onnx/csrc/text-utils.h"
|
||||||
|
|
||||||
@@ -56,6 +67,39 @@ class JiebaLexicon::Impl {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename Manager>
|
||||||
|
Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
|
||||||
|
const std::string &dict_dir, bool debug)
|
||||||
|
: debug_(debug) {
|
||||||
|
std::string dict = dict_dir + "/jieba.dict.utf8";
|
||||||
|
std::string hmm = dict_dir + "/hmm_model.utf8";
|
||||||
|
std::string user_dict = dict_dir + "/user.dict.utf8";
|
||||||
|
std::string idf = dict_dir + "/idf.utf8";
|
||||||
|
std::string stop_word = dict_dir + "/stop_words.utf8";
|
||||||
|
|
||||||
|
AssertFileExists(dict);
|
||||||
|
AssertFileExists(hmm);
|
||||||
|
AssertFileExists(user_dict);
|
||||||
|
AssertFileExists(idf);
|
||||||
|
AssertFileExists(stop_word);
|
||||||
|
|
||||||
|
jieba_ =
|
||||||
|
std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
|
||||||
|
|
||||||
|
{
|
||||||
|
auto buf = ReadFile(mgr, tokens);
|
||||||
|
std::istrstream is(buf.data(), buf.size());
|
||||||
|
|
||||||
|
InitTokens(is);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
auto buf = ReadFile(mgr, lexicon);
|
||||||
|
std::istrstream is(buf.data(), buf.size());
|
||||||
|
InitLexicon(is);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &text) const {
|
std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &text) const {
|
||||||
// see
|
// see
|
||||||
// https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
|
// https://github.com/Plachtaa/VITS-fast-fine-tuning/blob/main/text/mandarin.py#L244
|
||||||
@@ -279,9 +323,29 @@ JiebaLexicon::JiebaLexicon(const std::string &lexicon,
|
|||||||
const std::string &dict_dir, bool debug)
|
const std::string &dict_dir, bool debug)
|
||||||
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, debug)) {}
|
: impl_(std::make_unique<Impl>(lexicon, tokens, dict_dir, debug)) {}
|
||||||
|
|
||||||
|
template <typename Manager>
|
||||||
|
JiebaLexicon::JiebaLexicon(Manager *mgr, const std::string &lexicon,
|
||||||
|
const std::string &tokens,
|
||||||
|
const std::string &dict_dir, bool debug)
|
||||||
|
: impl_(std::make_unique<Impl>(mgr, lexicon, tokens, dict_dir, debug)) {}
|
||||||
|
|
||||||
std::vector<TokenIDs> JiebaLexicon::ConvertTextToTokenIds(
|
std::vector<TokenIDs> JiebaLexicon::ConvertTextToTokenIds(
|
||||||
const std::string &text, const std::string & /*unused_voice = ""*/) const {
|
const std::string &text, const std::string & /*unused_voice = ""*/) const {
|
||||||
return impl_->ConvertTextToTokenIds(text);
|
return impl_->ConvertTextToTokenIds(text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if __ANDROID_API__ >= 9
|
||||||
|
template JiebaLexicon::JiebaLexicon(AAssetManager *mgr,
|
||||||
|
const std::string &lexicon,
|
||||||
|
const std::string &tokens,
|
||||||
|
const std::string &dict_dir, bool debug);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __OHOS__
|
||||||
|
template JiebaLexicon::JiebaLexicon(NativeResourceManager *mgr,
|
||||||
|
const std::string &lexicon,
|
||||||
|
const std::string &tokens,
|
||||||
|
const std::string &dict_dir, bool debug);
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace sherpa_onnx
|
} // namespace sherpa_onnx
|
||||||
|
|||||||
@@ -17,9 +17,15 @@ namespace sherpa_onnx {
|
|||||||
class JiebaLexicon : public OfflineTtsFrontend {
|
class JiebaLexicon : public OfflineTtsFrontend {
|
||||||
public:
|
public:
|
||||||
~JiebaLexicon() override;
|
~JiebaLexicon() override;
|
||||||
|
|
||||||
JiebaLexicon(const std::string &lexicon, const std::string &tokens,
|
JiebaLexicon(const std::string &lexicon, const std::string &tokens,
|
||||||
const std::string &dict_dir, bool debug);
|
const std::string &dict_dir, bool debug);
|
||||||
|
|
||||||
|
template <typename Manager>
|
||||||
|
JiebaLexicon(Manager *mgr, const std::string &lexicon,
|
||||||
|
const std::string &tokens, const std::string &dict_dir,
|
||||||
|
bool debug);
|
||||||
|
|
||||||
std::vector<TokenIDs> ConvertTextToTokenIds(
|
std::vector<TokenIDs> ConvertTextToTokenIds(
|
||||||
const std::string &text,
|
const std::string &text,
|
||||||
const std::string &unused_voice = "") const override;
|
const std::string &unused_voice = "") const override;
|
||||||
|
|||||||
@@ -327,13 +327,12 @@ class OfflineTtsMatchaImpl : public OfflineTtsImpl {
|
|||||||
// from assets to disk
|
// from assets to disk
|
||||||
//
|
//
|
||||||
// for jieba
|
// for jieba
|
||||||
// we require that you copy tokens.txt, lexicon.txt and dict
|
// we require that you copy dict from assets to disk
|
||||||
// from assets to disk
|
|
||||||
const auto &meta_data = model_->GetMetaData();
|
const auto &meta_data = model_->GetMetaData();
|
||||||
|
|
||||||
if (meta_data.jieba && !meta_data.has_espeak) {
|
if (meta_data.jieba && !meta_data.has_espeak) {
|
||||||
frontend_ = std::make_unique<JiebaLexicon>(
|
frontend_ = std::make_unique<JiebaLexicon>(
|
||||||
config_.model.matcha.lexicon, config_.model.matcha.tokens,
|
mgr, config_.model.matcha.lexicon, config_.model.matcha.tokens,
|
||||||
config_.model.matcha.dict_dir, config_.model.debug);
|
config_.model.matcha.dict_dir, config_.model.debug);
|
||||||
} else if (meta_data.has_espeak && !meta_data.jieba) {
|
} else if (meta_data.has_espeak && !meta_data.jieba) {
|
||||||
frontend_ = std::make_unique<PiperPhonemizeLexicon>(
|
frontend_ = std::make_unique<PiperPhonemizeLexicon>(
|
||||||
|
|||||||
Reference in New Issue
Block a user