Use piper-phonemize to convert text to token IDs (#453)

This commit is contained in:
Fangjun Kuang
2023-11-30 23:57:43 +08:00
committed by GitHub
parent db41778e99
commit 62dc3c3e46
55 changed files with 1048 additions and 192 deletions

View File

@@ -547,6 +547,8 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
tts_config.model.vits.lexicon =
SHERPA_ONNX_OR(config->model.vits.lexicon, "");
tts_config.model.vits.tokens = SHERPA_ONNX_OR(config->model.vits.tokens, "");
tts_config.model.vits.data_dir =
SHERPA_ONNX_OR(config->model.vits.data_dir, "");
tts_config.model.vits.noise_scale =
SHERPA_ONNX_OR(config->model.vits.noise_scale, 0.667);
tts_config.model.vits.noise_scale_w =
@@ -558,6 +560,7 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
tts_config.model.debug = config->model.debug;
tts_config.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
tts_config.rule_fsts = SHERPA_ONNX_OR(config->rule_fsts, "");
tts_config.max_num_sentences = SHERPA_ONNX_OR(config->max_num_sentences, 2);
if (tts_config.model.debug) {
fprintf(stderr, "%s\n", tts_config.ToString().c_str());

View File

@@ -607,6 +607,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsVitsModelConfig {
const char *model;
const char *lexicon;
const char *tokens;
const char *data_dir;
float noise_scale;
float noise_scale_w;
@@ -623,6 +624,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsConfig {
SherpaOnnxOfflineTtsModelConfig model;
const char *rule_fsts;
int32_t max_num_sentences;
} SherpaOnnxOfflineTtsConfig;
SHERPA_ONNX_API typedef struct SherpaOnnxGeneratedAudio {