Support scaling the duration of a pause in TTS. (#1820)

This commit is contained in:
Fangjun Kuang
2025-02-08 12:47:26 +08:00
committed by GitHub
parent d38cb81014
commit 69f489f0cd
24 changed files with 171 additions and 19 deletions

View File

@@ -21,7 +21,7 @@ function freeConfig(config, Module) {
// The user should free the returned pointers
function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
const modelLen = Module.lengthBytesUTF8(config.model || '')+ 1;
const modelLen = Module.lengthBytesUTF8(config.model || '') + 1;
const lexiconLen = Module.lengthBytesUTF8(config.lexicon || '') + 1;
const tokensLen = Module.lengthBytesUTF8(config.tokens || '') + 1;
const dataDirLen = Module.lengthBytesUTF8(config.dataDir || '') + 1;
@@ -282,7 +282,7 @@ function initSherpaOnnxOfflineTtsModelConfig(config, Module) {
function initSherpaOnnxOfflineTtsConfig(config, Module) {
const modelConfig =
initSherpaOnnxOfflineTtsModelConfig(config.offlineTtsModelConfig, Module);
const len = modelConfig.len + 3 * 4;
const len = modelConfig.len + 4 * 4;
const ptr = Module._malloc(len);
let offset = 0;
@@ -303,6 +303,10 @@ function initSherpaOnnxOfflineTtsConfig(config, Module) {
offset += 4;
Module.setValue(ptr + offset, buffer + ruleFstsLen, 'i8*');
offset += 4;
Module.setValue(ptr + offset, config.silenceScale || 0.2, 'float');
offset += 4;
return {
buffer: buffer, ptr: ptr, len: len, config: modelConfig,

View File

@@ -22,7 +22,7 @@ static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
sizeof(SherpaOnnxOfflineTtsKokoroModelConfig) + 3 * 4,
"");
static_assert(sizeof(SherpaOnnxOfflineTtsConfig) ==
sizeof(SherpaOnnxOfflineTtsModelConfig) + 3 * 4,
sizeof(SherpaOnnxOfflineTtsModelConfig) + 4 * 4,
"");
void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
@@ -68,6 +68,7 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
fprintf(stdout, "rule_fsts: %s\n", tts_config->rule_fsts);
fprintf(stdout, "rule_fars: %s\n", tts_config->rule_fars);
fprintf(stdout, "max num sentences: %d\n", tts_config->max_num_sentences);
fprintf(stdout, "silence scale: %.3f\n", tts_config->silence_scale);
}
void CopyHeap(const char *src, int32_t num_bytes, char *dst) {