Add on-device tex-to-speech (TTS) demo for HarmonyOS (#1590)
This commit is contained in:
@@ -40,7 +40,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
tn_list_.reserve(files.size());
|
||||
for (const auto &f : files) {
|
||||
if (config.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
|
||||
#endif
|
||||
}
|
||||
tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
|
||||
}
|
||||
@@ -57,7 +61,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
|
||||
for (const auto &f : files) {
|
||||
if (config.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
|
||||
#endif
|
||||
}
|
||||
std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
|
||||
fst::FarReader<fst::StdArc>::Open(f));
|
||||
@@ -88,7 +96,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
tn_list_.reserve(files.size());
|
||||
for (const auto &f : files) {
|
||||
if (config.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
|
||||
#endif
|
||||
}
|
||||
auto buf = ReadFile(mgr, f);
|
||||
std::istrstream is(buf.data(), buf.size());
|
||||
@@ -103,7 +115,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
|
||||
for (const auto &f : files) {
|
||||
if (config.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
auto buf = ReadFile(mgr, f);
|
||||
@@ -156,14 +172,22 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
|
||||
std::string text = _text;
|
||||
if (config_.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!tn_list_.empty()) {
|
||||
for (const auto &tn : tn_list_) {
|
||||
text = tn->Normalize(text);
|
||||
if (config_.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -226,10 +250,17 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
int32_t num_batches = x_size / batch_size;
|
||||
|
||||
if (config_.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Text is too long. Split it into %{public}d batches. batch size: "
|
||||
"%{public}d. Number of sentences: %{public}d",
|
||||
num_batches, batch_size, x_size);
|
||||
#else
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Text is too long. Split it into %d batches. batch size: %d. Number "
|
||||
"of sentences: %d",
|
||||
num_batches, batch_size, x_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
GeneratedAudio ans;
|
||||
@@ -255,7 +286,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
audio.samples.end());
|
||||
if (callback) {
|
||||
should_continue = callback(audio.samples.data(), audio.samples.size(),
|
||||
b * 1.0 / num_batches);
|
||||
(b + 1) * 1.0 / num_batches);
|
||||
// Caution(fangjun): audio is freed when the callback returns, so users
|
||||
// should copy the data if they want to access the data after
|
||||
// the callback returns to avoid segmentation fault.
|
||||
@@ -297,6 +328,16 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
if (meta_data.frontend == "characters") {
|
||||
frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
|
||||
mgr, config_.model.vits.tokens, meta_data);
|
||||
} else if (meta_data.jieba && !config_.model.vits.dict_dir.empty() &&
|
||||
meta_data.is_melo_tts) {
|
||||
frontend_ = std::make_unique<MeloTtsLexicon>(
|
||||
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
|
||||
config_.model.vits.dict_dir, model_->GetMetaData(),
|
||||
config_.model.debug);
|
||||
} else if (meta_data.is_melo_tts && meta_data.language == "English") {
|
||||
frontend_ = std::make_unique<MeloTtsLexicon>(
|
||||
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
|
||||
model_->GetMetaData(), config_.model.debug);
|
||||
} else if ((meta_data.is_piper || meta_data.is_coqui ||
|
||||
meta_data.is_icefall) &&
|
||||
!config_.model.vits.data_dir.empty()) {
|
||||
|
||||
Reference in New Issue
Block a user