Add HarmonyOS support for text-to-speech. (#1584)
This commit is contained in:
@@ -2,8 +2,8 @@
|
||||
* Use these variables when you tailor your ArkTS code. They must be of the const type.
|
||||
*/
|
||||
export const HAR_VERSION = '1.10.32';
|
||||
export const BUILD_MODE_NAME = 'release';
|
||||
export const DEBUG = false;
|
||||
export const BUILD_MODE_NAME = 'debug';
|
||||
export const DEBUG = true;
|
||||
export const TARGET_NAME = 'default';
|
||||
|
||||
/**
|
||||
|
||||
@@ -38,3 +38,12 @@ export {
|
||||
OnlineRecognizerResult,
|
||||
OnlineRecognizer,
|
||||
} from './src/main/ets/components/StreamingAsr';
|
||||
|
||||
export {
|
||||
OfflineTtsVitsModelConfig,
|
||||
OfflineTtsModelConfig,
|
||||
OfflineTtsConfig,
|
||||
OfflineTts,
|
||||
TtsOutput,
|
||||
TtsInput,
|
||||
} from './src/main/ets/components/NonStreamingTts';
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
#include <string>
|
||||
|
||||
#if __OHOS__
|
||||
#include "rawfile/raw_file_manager.h"
|
||||
#include "hilog/log.h"
|
||||
#include "rawfile/raw_file_manager.h"
|
||||
|
||||
#undef LOG_DOMAIN
|
||||
#undef LOG_TAG
|
||||
|
||||
@@ -236,7 +236,10 @@ CreateOfflineRecognizerWrapper(const Napi::CallbackInfo &info) {
|
||||
SHERPA_ONNX_ASSIGN_ATTR_FLOAT(blank_penalty, blankPenalty);
|
||||
|
||||
#if __OHOS__
|
||||
std::unique_ptr<NativeResourceManager, decltype(&OH_ResourceManager_ReleaseNativeResourceManager)> mgr (OH_ResourceManager_InitNativeResourceManager(env, info[1]), &OH_ResourceManager_ReleaseNativeResourceManager);
|
||||
std::unique_ptr<NativeResourceManager,
|
||||
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
|
||||
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
|
||||
&OH_ResourceManager_ReleaseNativeResourceManager);
|
||||
|
||||
const SherpaOnnxOfflineRecognizer *recognizer =
|
||||
SherpaOnnxCreateOfflineRecognizerOHOS(&c, mgr.get());
|
||||
|
||||
@@ -63,6 +63,17 @@ static SherpaOnnxOfflineTtsModelConfig GetOfflineTtsModelConfig(
|
||||
static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper(
|
||||
const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
#if __OHOS__
|
||||
// the last argument is the NativeResourceManager
|
||||
if (info.Length() != 2) {
|
||||
std::ostringstream os;
|
||||
os << "Expect only 2 arguments. Given: " << info.Length();
|
||||
|
||||
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
#else
|
||||
if (info.Length() != 1) {
|
||||
std::ostringstream os;
|
||||
os << "Expect only 1 argument. Given: " << info.Length();
|
||||
@@ -71,6 +82,7 @@ static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper(
|
||||
|
||||
return {};
|
||||
}
|
||||
#endif
|
||||
|
||||
if (!info[0].IsObject()) {
|
||||
Napi::TypeError::New(env, "Expect an object as the argument")
|
||||
@@ -90,7 +102,15 @@ static Napi::External<SherpaOnnxOfflineTts> CreateOfflineTtsWrapper(
|
||||
SHERPA_ONNX_ASSIGN_ATTR_INT32(max_num_sentences, maxNumSentences);
|
||||
SHERPA_ONNX_ASSIGN_ATTR_STR(rule_fars, ruleFars);
|
||||
|
||||
#if __OHOS__
|
||||
std::unique_ptr<NativeResourceManager,
|
||||
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
|
||||
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
|
||||
&OH_ResourceManager_ReleaseNativeResourceManager);
|
||||
SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTtsOHOS(&c, mgr.get());
|
||||
#else
|
||||
SherpaOnnxOfflineTts *tts = SherpaOnnxCreateOfflineTts(&c);
|
||||
#endif
|
||||
|
||||
if (c.model.vits.model) {
|
||||
delete[] c.model.vits.model;
|
||||
|
||||
@@ -211,7 +211,10 @@ static Napi::External<SherpaOnnxOnlineRecognizer> CreateOnlineRecognizerWrapper(
|
||||
c.ctc_fst_decoder_config = GetCtcFstDecoderConfig(o);
|
||||
|
||||
#if __OHOS__
|
||||
std::unique_ptr<NativeResourceManager, decltype(&OH_ResourceManager_ReleaseNativeResourceManager)> mgr (OH_ResourceManager_InitNativeResourceManager(env, info[1]), &OH_ResourceManager_ReleaseNativeResourceManager);
|
||||
std::unique_ptr<NativeResourceManager,
|
||||
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
|
||||
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]),
|
||||
&OH_ResourceManager_ReleaseNativeResourceManager);
|
||||
|
||||
const SherpaOnnxOnlineRecognizer *recognizer =
|
||||
SherpaOnnxCreateOnlineRecognizerOHOS(&c, mgr.get());
|
||||
|
||||
@@ -33,3 +33,8 @@ export const decodeOnlineStream: (handle: object, streamHandle: object) => void;
|
||||
export const isEndpoint: (handle: object, streamHandle: object) => boolean;
|
||||
export const reset: (handle: object, streamHandle: object) => void;
|
||||
export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object) => string;
|
||||
|
||||
export const createOfflineTts: (config: object, mgr?: object) => object;
|
||||
export const getOfflineTtsNumSpeakers: (handle: object) => number;
|
||||
export const getOfflineTtsSampleRate: (handle: object) => number;
|
||||
export const offlineTtsGenerate: (handle: object, input: object) => object;
|
||||
|
||||
@@ -70,8 +70,10 @@ static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) {
|
||||
|
||||
#if __OHOS__
|
||||
// Note(fangjun): Normally, we don't need to divied it by sizeof(float).
|
||||
// However, data.ElementLength() here returns number of bytes, not number of elements.
|
||||
SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength() / sizeof(float));
|
||||
// However, data.ElementLength() here returns number of bytes, not number of
|
||||
// elements.
|
||||
SherpaOnnxCircularBufferPush(buf, data.Data(),
|
||||
data.ElementLength() / sizeof(float));
|
||||
#else
|
||||
SherpaOnnxCircularBufferPush(buf, data.Data(), data.ElementLength());
|
||||
#endif
|
||||
@@ -353,10 +355,14 @@ CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) {
|
||||
float buffer_size_in_seconds = info[1].As<Napi::Number>().FloatValue();
|
||||
|
||||
#if __OHOS__
|
||||
std::unique_ptr<NativeResourceManager, decltype(&OH_ResourceManager_ReleaseNativeResourceManager)> mgr(OH_ResourceManager_InitNativeResourceManager(env, info[2]), &OH_ResourceManager_ReleaseNativeResourceManager);
|
||||
std::unique_ptr<NativeResourceManager,
|
||||
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
|
||||
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[2]),
|
||||
&OH_ResourceManager_ReleaseNativeResourceManager);
|
||||
|
||||
SherpaOnnxVoiceActivityDetector *vad =
|
||||
SherpaOnnxCreateVoiceActivityDetectorOHOS(&c, buffer_size_in_seconds, mgr.get());
|
||||
SherpaOnnxCreateVoiceActivityDetectorOHOS(&c, buffer_size_in_seconds,
|
||||
mgr.get());
|
||||
#else
|
||||
SherpaOnnxVoiceActivityDetector *vad =
|
||||
SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds);
|
||||
@@ -410,9 +416,10 @@ static void VoiceActivityDetectorAcceptWaveformWrapper(
|
||||
Napi::Float32Array samples = info[1].As<Napi::Float32Array>();
|
||||
|
||||
#if __OHOS__
|
||||
// Note(fangjun): For unknown reasons, we need to use `/sizeof(float)` here for Huawei
|
||||
SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(),
|
||||
samples.ElementLength() / sizeof(float));
|
||||
// Note(fangjun): For unknown reasons, we need to use `/sizeof(float)` here
|
||||
// for Huawei
|
||||
SherpaOnnxVoiceActivityDetectorAcceptWaveform(
|
||||
vad, samples.Data(), samples.ElementLength() / sizeof(float));
|
||||
#else
|
||||
SherpaOnnxVoiceActivityDetectorAcceptWaveform(vad, samples.Data(),
|
||||
samples.ElementLength());
|
||||
|
||||
@@ -102,10 +102,11 @@ static Napi::Object ReadWaveFromBinaryWrapper(const Napi::CallbackInfo &info) {
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
Napi::Uint8Array data = info[0].As<Napi::Uint8Array>();
|
||||
int32_t n = data.ElementLength();
|
||||
const SherpaOnnxWave *wave = SherpaOnnxReadWaveFromBinaryData(reinterpret_cast<const char*>(data.Data()), n);
|
||||
const SherpaOnnxWave *wave = SherpaOnnxReadWaveFromBinaryData(
|
||||
reinterpret_cast<const char *>(data.Data()), n);
|
||||
if (!wave) {
|
||||
std::ostringstream os;
|
||||
os << "Failed to read wave";
|
||||
@@ -113,7 +114,7 @@ static Napi::Object ReadWaveFromBinaryWrapper(const Napi::CallbackInfo &info) {
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
bool enable_external_buffer = true;
|
||||
if (info.Length() == 2) {
|
||||
if (info[1].IsBoolean()) {
|
||||
@@ -165,7 +166,7 @@ static Napi::Object ReadWaveFromBinaryWrapper(const Napi::CallbackInfo &info) {
|
||||
void InitWaveReader(Napi::Env env, Napi::Object exports) {
|
||||
exports.Set(Napi::String::New(env, "readWave"),
|
||||
Napi::Function::New(env, ReadWaveWrapper));
|
||||
|
||||
|
||||
exports.Set(Napi::String::New(env, "readWaveFromBinary"),
|
||||
Napi::Function::New(env, ReadWaveFromBinaryWrapper));
|
||||
}
|
||||
Napi::Function::New(env, ReadWaveFromBinaryWrapper));
|
||||
}
|
||||
|
||||
@@ -79,7 +79,7 @@ export class OfflineModelConfig {
|
||||
public tokens: string = '';
|
||||
public numThreads: number = 1;
|
||||
public debug: boolean = false;
|
||||
public provider: string = "cpu";
|
||||
public provider: string = 'cpu';
|
||||
public modelType: string = '';
|
||||
public modelingUnit: string = "cjkchar";
|
||||
public bpeVocab: string = '';
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
import {
|
||||
createOfflineTts,
|
||||
getOfflineTtsNumSpeakers,
|
||||
getOfflineTtsSampleRate,
|
||||
offlineTtsGenerate,
|
||||
} from "libsherpa_onnx.so";
|
||||
|
||||
export class OfflineTtsVitsModelConfig {
|
||||
public model: string = '';
|
||||
public lexicon: string = '';
|
||||
public tokens: string = '';
|
||||
public dataDir: string = '';
|
||||
public dictDir: String = '';
|
||||
public noiseScale: number = 0.667;
|
||||
public noiseScaleW: number = 0.8;
|
||||
public lengthScale: number = 1.0;
|
||||
}
|
||||
|
||||
export class OfflineTtsModelConfig{
|
||||
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
|
||||
public numThreads: number = 1;
|
||||
public debug: boolean = false;
|
||||
public provider: string = 'cpu';
|
||||
}
|
||||
|
||||
export class OfflineTtsConfig{
|
||||
public model: OfflineTtsModelConfig = new OfflineTtsModelConfig();
|
||||
public ruleFsts: string = '';
|
||||
public ruleFars: string = '';
|
||||
public maxNumSentences: number = 1;
|
||||
}
|
||||
|
||||
export class TtsOutput {
|
||||
public samples: Float32Array = new Float32Array(0);
|
||||
public sampleRate: number = 0;
|
||||
}
|
||||
|
||||
export class TtsInput {
|
||||
public text: string = '';
|
||||
public sid: number = 0;
|
||||
public speed: number = 1.0;
|
||||
}
|
||||
|
||||
export class OfflineTts {
|
||||
private handle: object;
|
||||
public config: OfflineTtsConfig;
|
||||
public numSpeakers: number;
|
||||
public sampleRate: number;
|
||||
constructor(config: OfflineTtsConfig, mgr?: object) {
|
||||
this.handle = createOfflineTts(config, mgr);
|
||||
this.config = config;
|
||||
|
||||
this.numSpeakers = getOfflineTtsNumSpeakers(this.handle);
|
||||
this.sampleRate = getOfflineTtsSampleRate(this.handle);
|
||||
}
|
||||
|
||||
/*
|
||||
input obj: {text: "xxxx", sid: 0, speed: 1.0}
|
||||
where text is a string, sid is a int32, speed is a float
|
||||
|
||||
return an object {samples: Float32Array, sampleRate: <a number>}
|
||||
*/
|
||||
generate(input: TtsInput): TtsOutput {
|
||||
return offlineTtsGenerate(this.handle, input) as TtsOutput;
|
||||
}
|
||||
}
|
||||
@@ -52,7 +52,7 @@ export class OnlineModelConfig {
|
||||
public zipformer2_ctc: OnlineZipformer2CtcModelConfig = new OnlineZipformer2CtcModelConfig();
|
||||
public tokens: string = '';
|
||||
public numThreads: number = 1;
|
||||
public provider: string = "cpu";
|
||||
public provider: string = 'cpu';
|
||||
public debug: boolean = false;
|
||||
public modelType: string = '';
|
||||
public modelingUnit: string = "cjkchar";
|
||||
@@ -67,7 +67,7 @@ export class OnlineCtcFstDecoderConfig {
|
||||
export class OnlineRecognizerConfig {
|
||||
public featConfig: FeatureConfig = new FeatureConfig();
|
||||
public modelConfig: OnlineModelConfig = new OnlineModelConfig();
|
||||
public decodingMethod: string = "greedy_search";
|
||||
public decodingMethod: string = 'greedy_search';
|
||||
public maxActivePaths: number = 4;
|
||||
public enableEndpoint: boolean = false;
|
||||
public rule1MinTrailingSilence: number = 2.4;
|
||||
|
||||
Reference in New Issue
Block a user