Add dict_dir arg to c api to support Chinese TTS models using jieba (#809)
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
|
||||
project(sherpa-onnx)
|
||||
|
||||
set(SHERPA_ONNX_VERSION "1.9.22")
|
||||
set(SHERPA_ONNX_VERSION "1.9.23")
|
||||
|
||||
# Disable warning about
|
||||
#
|
||||
|
||||
@@ -470,6 +470,19 @@ void CNonStreamingTextToSpeechDlg::Init() {
|
||||
} else if (Exists("./lexicon.txt")) {
|
||||
config.model.vits.lexicon = "./lexicon.txt";
|
||||
}
|
||||
|
||||
if (Exists("./dict/jieba.dict.utf8")) {
|
||||
config.model.vits.dict_dir = "./dict";
|
||||
}
|
||||
|
||||
if (Exists("./phone.fst") && Exists("./date.fst") && Exists("./number.fst")) {
|
||||
config.rule_fsts = "./phone.fst,./date.fst,number.fst";
|
||||
}
|
||||
|
||||
if (Exists("./rule.far")) {
|
||||
config.rule_fars = "./rule.far";
|
||||
}
|
||||
|
||||
config.model.vits.tokens = "./tokens.txt";
|
||||
|
||||
tts_ = SherpaOnnxCreateOfflineTts(&config);
|
||||
|
||||
@@ -8,6 +8,7 @@ function createOfflineTts() {
|
||||
lexicon: '',
|
||||
tokens: './vits-piper-en_US-amy-low/tokens.txt',
|
||||
dataDir: './vits-piper-en_US-amy-low/espeak-ng-data',
|
||||
dictDir: '',
|
||||
noiseScale: 0.667,
|
||||
noiseScaleW: 0.8,
|
||||
lengthScale: 1.0,
|
||||
|
||||
@@ -8,6 +8,7 @@ function createOfflineTts() {
|
||||
lexicon: './vits-icefall-zh-aishell3/lexicon.txt',
|
||||
tokens: './vits-icefall-zh-aishell3/tokens.txt',
|
||||
dataDir: '',
|
||||
dictDir: '',
|
||||
noiseScale: 0.667,
|
||||
noiseScaleW: 0.8,
|
||||
lengthScale: 1.0,
|
||||
|
||||
@@ -23,6 +23,8 @@ namespace SherpaOnnx
|
||||
NoiseScale = 0.667F;
|
||||
NoiseScaleW = 0.8F;
|
||||
LengthScale = 1.0F;
|
||||
|
||||
DictDir = "";
|
||||
}
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string Model;
|
||||
@@ -39,6 +41,9 @@ namespace SherpaOnnx
|
||||
public float NoiseScale;
|
||||
public float NoiseScaleW;
|
||||
public float LengthScale;
|
||||
|
||||
[MarshalAs(UnmanagedType.LPStr)]
|
||||
public string DictDir;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
|
||||
@@ -532,10 +532,11 @@ type OfflineTtsVitsModelConfig struct {
|
||||
Model string // Path to the VITS onnx model
|
||||
Lexicon string // Path to lexicon.txt
|
||||
Tokens string // Path to tokens.txt
|
||||
DataDir string // Path to tokens.txt
|
||||
DataDir string // Path to espeak-ng-data directory
|
||||
NoiseScale float32 // noise scale for vits models. Please use 0.667 in general
|
||||
NoiseScaleW float32 // noise scale for vits models. Please use 0.8 in general
|
||||
LengthScale float32 // Please use 1.0 in general. Smaller -> Faster speech speed. Larger -> Slower speech speed
|
||||
DictDir string // Path to dict directory for jieba (used only in Chinese tts)
|
||||
}
|
||||
|
||||
type OfflineTtsModelConfig struct {
|
||||
@@ -605,6 +606,9 @@ func NewOfflineTts(config *OfflineTtsConfig) *OfflineTts {
|
||||
c.model.vits.noise_scale_w = C.float(config.Model.Vits.NoiseScaleW)
|
||||
c.model.vits.length_scale = C.float(config.Model.Vits.LengthScale)
|
||||
|
||||
c.model.vits.dict_dir = C.CString(config.Model.Vits.DictDir)
|
||||
defer C.free(unsafe.Pointer(c.model.vits.dict_dir))
|
||||
|
||||
c.model.num_threads = C.int(config.Model.NumThreads)
|
||||
c.model.debug = C.int(config.Model.Debug)
|
||||
|
||||
|
||||
@@ -818,6 +818,8 @@ SherpaOnnxOfflineTts *SherpaOnnxCreateOfflineTts(
|
||||
SHERPA_ONNX_OR(config->model.vits.noise_scale_w, 0.8);
|
||||
tts_config.model.vits.length_scale =
|
||||
SHERPA_ONNX_OR(config->model.vits.length_scale, 1.0);
|
||||
tts_config.model.vits.dict_dir =
|
||||
SHERPA_ONNX_OR(config->model.vits.dict_dir, "");
|
||||
|
||||
tts_config.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||
tts_config.model.debug = config->model.debug;
|
||||
|
||||
@@ -772,6 +772,7 @@ SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsVitsModelConfig {
|
||||
float noise_scale;
|
||||
float noise_scale_w;
|
||||
float length_scale; // < 1, faster in speed; > 1, slower in speed
|
||||
const char *dict_dir;
|
||||
} SherpaOnnxOfflineTtsVitsModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTtsModelConfig {
|
||||
|
||||
@@ -623,7 +623,8 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
|
||||
dataDir: String = "",
|
||||
noiseScale: Float = 0.667,
|
||||
noiseScaleW: Float = 0.8,
|
||||
lengthScale: Float = 1.0
|
||||
lengthScale: Float = 1.0,
|
||||
dictDir: String = ""
|
||||
) -> SherpaOnnxOfflineTtsVitsModelConfig {
|
||||
return SherpaOnnxOfflineTtsVitsModelConfig(
|
||||
model: toCPointer(model),
|
||||
@@ -632,7 +633,8 @@ func sherpaOnnxOfflineTtsVitsModelConfig(
|
||||
data_dir: toCPointer(dataDir),
|
||||
noise_scale: noiseScale,
|
||||
noise_scale_w: noiseScaleW,
|
||||
length_scale: lengthScale)
|
||||
length_scale: lengthScale,
|
||||
dict_dir: toCPointer(dictDir))
|
||||
}
|
||||
|
||||
func sherpaOnnxOfflineTtsModelConfig(
|
||||
|
||||
@@ -43,6 +43,7 @@ void PrintOfflineTtsConfig(SherpaOnnxOfflineTtsConfig *tts_config) {
|
||||
fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
|
||||
fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
|
||||
fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
|
||||
fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir);
|
||||
|
||||
fprintf(stdout, "----------tts model config----------\n");
|
||||
fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
|
||||
|
||||
@@ -18,7 +18,12 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
|
||||
const tokensLen = Module.lengthBytesUTF8(config.tokens) + 1;
|
||||
const dataDirLen = Module.lengthBytesUTF8(config.dataDir) + 1;
|
||||
|
||||
const n = modelLen + lexiconLen + tokensLen + dataDirLen;
|
||||
if (!('dictDir' in config)) {
|
||||
config.dictDir = ''
|
||||
}
|
||||
const dictDirLen = Module.lengthBytesUTF8(config.dictDir) + 1;
|
||||
|
||||
const n = modelLen + lexiconLen + tokensLen + dataDirLen + dictDirLen;
|
||||
|
||||
const buffer = Module._malloc(n);
|
||||
|
||||
@@ -38,6 +43,9 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
|
||||
Module.stringToUTF8(config.dataDir, buffer + offset, dataDirLen);
|
||||
offset += dataDirLen;
|
||||
|
||||
Module.stringToUTF8(config.dictDir, buffer + offset, dictDirLen);
|
||||
offset += dictDirLen;
|
||||
|
||||
offset = 0;
|
||||
Module.setValue(ptr, buffer + offset, 'i8*');
|
||||
offset += modelLen;
|
||||
@@ -54,6 +62,8 @@ function initSherpaOnnxOfflineTtsVitsModelConfig(config, Module) {
|
||||
Module.setValue(ptr + 16, config.noiseScale, 'float');
|
||||
Module.setValue(ptr + 20, config.noiseScaleW, 'float');
|
||||
Module.setValue(ptr + 24, config.lengthScale, 'float');
|
||||
Module.setValue(ptr + 28, buffer + offset, 'i8*');
|
||||
offset += dictDirLen;
|
||||
|
||||
return {
|
||||
buffer: buffer, ptr: ptr, len: len,
|
||||
@@ -184,6 +194,7 @@ function createOfflineTts(Module, myConfig) {
|
||||
lexicon: '',
|
||||
tokens: './tokens.txt',
|
||||
dataDir: './espeak-ng-data',
|
||||
dictDir: '',
|
||||
noiseScale: 0.667,
|
||||
noiseScaleW: 0.8,
|
||||
lengthScale: 1.0,
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
|
||||
extern "C" {
|
||||
|
||||
static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 7 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineTtsVitsModelConfig) == 8 * 4, "");
|
||||
static_assert(sizeof(SherpaOnnxOfflineTtsModelConfig) ==
|
||||
sizeof(SherpaOnnxOfflineTtsVitsModelConfig) + 3 * 4,
|
||||
"");
|
||||
@@ -32,6 +32,7 @@ void MyPrint(SherpaOnnxOfflineTtsConfig *tts_config) {
|
||||
fprintf(stdout, "noise scale: %.3f\n", vits_model_config->noise_scale);
|
||||
fprintf(stdout, "noise scale w: %.3f\n", vits_model_config->noise_scale_w);
|
||||
fprintf(stdout, "length scale: %.3f\n", vits_model_config->length_scale);
|
||||
fprintf(stdout, "dict_dir: %s\n", vits_model_config->dict_dir);
|
||||
|
||||
fprintf(stdout, "----------tts model config----------\n");
|
||||
fprintf(stdout, "num threads: %d\n", tts_model_config->num_threads);
|
||||
|
||||
Reference in New Issue
Block a user