This repository has been archived on 2025-08-26. You can view files and clone it, but cannot push or open issues or pull requests.
Files
enginex_bi_series-sherpa-onnx/scripts/node-addon-api/src/non-streaming-tts.cc

330 lines
8.8 KiB
C++

// scripts/node-addon-api/src/non-streaming-tts.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <algorithm>
#include <sstream>
#include "macros.h" // NOLINT
#include "napi.h" // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
static SherpaOnnxOfflineTtsVitsModelConfig GetOfflineTtsVitsModelConfig(
Napi::Object obj) {
SherpaOnnxOfflineTtsVitsModelConfig c;
memset(&c, 0, sizeof(c));
if (!obj.Has("vits") || !obj.Get("vits").IsObject()) {
return c;
}
Napi::Object o = obj.Get("vits").As<Napi::Object>();
SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
SHERPA_ONNX_ASSIGN_ATTR_STR(lexicon, lexicon);
SHERPA_ONNX_ASSIGN_ATTR_STR(tokens, tokens);
SHERPA_ONNX_ASSIGN_ATTR_STR(data_dir, dataDir);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale, noiseScale);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(noise_scale_w, noiseScaleW);
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(length_scale, lengthScale);
SHERPA_ONNX_ASSIGN_ATTR_STR(dict_dir, dictDir);
return c;
}
static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig(
Napi::Object obj) {
SherpaOnnxOfflineTtsModelConfig c;
memset(&c, 0, sizeof(c));
if (!obj.Has("model") || !obj.Get("model").IsObject()) {
return c;
}
Napi::Object o = obj.Get("model").As<Napi::Object>();
c.vits = GetOfflineTtsVitsModelConfig(o);
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
if (o.Has("debug") &&
(o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
if (o.Get("debug").IsBoolean()) {
c.debug = o.Get("debug").As<Napi::Boolean>().Value();
} else {
c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
}
}
SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
return c;
}
static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsObject()) {
Napi::TypeError::New(env, "Expect an object as the argument")
.ThrowAsJavaScriptException();
return {};
}
Napi::Object o = info[0].As<Napi::Object>();
SherpaOnnxOfflineTtsConfig c;
memset(&c, 0, sizeof(c));
c.model = GetOfflineTtsModelConfig(o);
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fsts, ruleFsts);
SHERPA_ONNX_ASSIGN_ATTR_INT32(max_num_sentences, maxNumSentences);
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&c);
if (c.model.vits.model) {
delete[] c.model.vits.model;
}
if (c.model.vits.lexicon) {
delete[] c.model.vits.lexicon;
}
if (c.model.vits.tokens) {
delete[] c.model.vits.tokens;
}
if (c.model.vits.data_dir) {
delete[] c.model.vits.data_dir;
}
if (c.model.vits.dict_dir) {
delete[] c.model.vits.dict_dir;
}
if (c.model.provider) {
delete[] c.model.provider;
}
if (c.rule_fsts) {
delete[] c.rule_fsts;
}
if (c.rule_fars) {
delete[] c.rule_fars;
}
if (!tts) {
Napi::TypeError::New(env, "Please check your config!")
.ThrowAsJavaScriptException();
return {};
}
return Napi::External<SherpaOnnxOfflineTts>::New(
env, tts, [](Napi::Env env, SherpaOnnxOfflineTts *tts) {
SherpaOnnxDestroyOfflineTts(tts);
});
}
static Napi::Number OfflineTtsSampleRateWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxOfflineTts *tts =
info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
int32_t sample_rate = SherpaOnnxOfflineTtsSampleRate(tts);
return Napi::Number::New(env, sample_rate);
}
static Napi::Number OfflineTtsNumSpeakersWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxOfflineTts *tts =
info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
int32_t num_speakers = SherpaOnnxOfflineTtsNumSpeakers(tts);
return Napi::Number::New(env, num_speakers);
}
static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 2) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxOfflineTts *tts =
info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
if (!info[1].IsObject()) {
Napi::TypeError::New(env, "Argument 1 should be an object")
.ThrowAsJavaScriptException();
return {};
}
Napi::Object obj = info[1].As<Napi::Object>();
if (!obj.Has("text")) {
Napi::TypeError::New(env, "The argument object should have a field text")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("text").IsString()) {
Napi::TypeError::New(env, "The object['text'] should be a string")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("sid")) {
Napi::TypeError::New(env, "The argument object should have a field sid")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("sid").IsNumber()) {
Napi::TypeError::New(env, "The object['sid'] should be a number")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Has("speed")) {
Napi::TypeError::New(env, "The argument object should have a field speed")
.ThrowAsJavaScriptException();
return {};
}
if (!obj.Get("speed").IsNumber()) {
Napi::TypeError::New(env, "The object['speed'] should be a number")
.ThrowAsJavaScriptException();
return {};
}
bool enable_external_buffer = true;
if (obj.Has("enableExternalBuffer") &&
obj.Get("enableExternalBuffer").IsBoolean()) {
enable_external_buffer =
obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
}
Napi::String _text = obj.Get("text").As<Napi::String>();
std::string text = _text.Utf8Value();
int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
const SherpaOnnxGeneratedAudio *audio =
SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);
if (enable_external_buffer) {
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
env, const_cast<float *>(audio->samples), sizeof(float) * audio->n,
[](Napi::Env /*env*/, void * /*data*/,
const SherpaOnnxGeneratedAudio *hint) {
SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
},
audio);
Napi::Float32Array float32Array =
Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
Napi::Object ans = Napi::Object::New(env);
ans.Set(Napi::String::New(env, "samples"), float32Array);
ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
return ans;
} else {
// don't use external buffer
Napi::ArrayBuffer arrayBuffer =
Napi::ArrayBuffer::New(env, sizeof(float) * audio->n);
Napi::Float32Array float32Array =
Napi::Float32Array::New(env, audio->n, arrayBuffer, 0);
std::copy(audio->samples, audio->samples + audio->n, float32Array.Data());
Napi::Object ans = Napi::Object::New(env);
ans.Set(Napi::String::New(env, "samples"), float32Array);
ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate);
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio);
return ans;
}
}
void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "createOfflineTts"),
Napi::Function::New(env, CreateOfflineTtsWrapper));
exports.Set(Napi::String::New(env, "getOfflineTtsSampleRate"),
Napi::Function::New(env, OfflineTtsSampleRateWrapper));
exports.Set(Napi::String::New(env, "getOfflineTtsNumSpeakers"),
Napi::Function::New(env, OfflineTtsNumSpeakersWrapper));
exports.Set(Napi::String::New(env, "offlineTtsGenerate"),
Napi::Function::New(env, OfflineTtsGenerateWrapper));
}