Add on-device tex-to-speech (TTS) demo for HarmonyOS (#1590)
This commit is contained in:
1
harmony-os/.gitignore
vendored
1
harmony-os/.gitignore
vendored
@@ -1 +1,2 @@
|
||||
!build-profile.json5
|
||||
*.har
|
||||
|
||||
@@ -1,4 +1,8 @@
|
||||
export { readWave, readWaveFromBinary } from "libsherpa_onnx.so";
|
||||
export {
|
||||
listRawfileDir,
|
||||
readWave,
|
||||
readWaveFromBinary,
|
||||
} from "libsherpa_onnx.so";
|
||||
|
||||
export {
|
||||
CircularBuffer,
|
||||
|
||||
@@ -4,7 +4,7 @@
|
||||
"externalNativeOptions": {
|
||||
"path": "./src/main/cpp/CMakeLists.txt",
|
||||
"arguments": "",
|
||||
"cppFlags": "",
|
||||
"cppFlags": "-std=c++17",
|
||||
"abiFilters": [
|
||||
"arm64-v8a",
|
||||
"x86_64",
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
cmake_minimum_required(VERSION 3.13.0)
|
||||
project(myNpmLib)
|
||||
|
||||
if (NOT CMAKE_CXX_STANDARD)
|
||||
set(CMAKE_CXX_STANDARD 17 CACHE STRING "The C++ version to use")
|
||||
endif()
|
||||
|
||||
# Disable warning about
|
||||
#
|
||||
# "The DOWNLOAD_EXTRACT_TIMESTAMP option was not given and policy CMP0135 is
|
||||
@@ -46,6 +50,7 @@ add_library(sherpa_onnx SHARED
|
||||
speaker-identification.cc
|
||||
spoken-language-identification.cc
|
||||
streaming-asr.cc
|
||||
utils.cc
|
||||
vad.cc
|
||||
wave-reader.cc
|
||||
wave-writer.cc
|
||||
|
||||
@@ -213,12 +213,13 @@ static Napi::Number OfflineTtsNumSpeakersWrapper(
|
||||
return Napi::Number::New(env, num_speakers);
|
||||
}
|
||||
|
||||
// synchronous version
|
||||
static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
|
||||
if (info.Length() != 2) {
|
||||
std::ostringstream os;
|
||||
os << "Expect only 1 argument. Given: " << info.Length();
|
||||
os << "Expect only 2 arguments. Given: " << info.Length();
|
||||
|
||||
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
|
||||
|
||||
@@ -298,8 +299,8 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
|
||||
int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
|
||||
float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
|
||||
|
||||
const SherpaOnnxGeneratedAudio *audio =
|
||||
SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);
|
||||
const SherpaOnnxGeneratedAudio *audio;
|
||||
audio = SherpaOnnxOfflineTtsGenerate(tts, text.c_str(), sid, speed);
|
||||
|
||||
if (enable_external_buffer) {
|
||||
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
|
||||
@@ -334,6 +335,256 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) {
|
||||
}
|
||||
}
|
||||
|
||||
struct TtsCallbackData {
|
||||
std::vector<float> samples;
|
||||
float progress;
|
||||
bool processed = false;
|
||||
bool cancelled = false;
|
||||
};
|
||||
|
||||
// see
|
||||
// https://github.com/nodejs/node-addon-examples/blob/main/src/6-threadsafe-function/typed_threadsafe_function/node-addon-api/clock.cc
|
||||
void InvokeJsCallback(Napi::Env env, Napi::Function callback,
|
||||
Napi::Reference<Napi::Value> *context,
|
||||
TtsCallbackData *data) {
|
||||
if (env != nullptr) {
|
||||
if (callback != nullptr) {
|
||||
Napi::ArrayBuffer arrayBuffer =
|
||||
Napi::ArrayBuffer::New(env, sizeof(float) * data->samples.size());
|
||||
|
||||
Napi::Float32Array float32Array =
|
||||
Napi::Float32Array::New(env, data->samples.size(), arrayBuffer, 0);
|
||||
|
||||
std::copy(data->samples.begin(), data->samples.end(),
|
||||
float32Array.Data());
|
||||
|
||||
Napi::Object arg = Napi::Object::New(env);
|
||||
arg.Set(Napi::String::New(env, "samples"), float32Array);
|
||||
arg.Set(Napi::String::New(env, "progress"), data->progress);
|
||||
|
||||
auto v = callback.Call(context->Value(), {arg});
|
||||
data->processed = true;
|
||||
if (v.IsNumber() && v.As<Napi::Number>().Int32Value()) {
|
||||
data->cancelled = false;
|
||||
} else {
|
||||
data->cancelled = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
using TSFN = Napi::TypedThreadSafeFunction<Napi::Reference<Napi::Value>,
|
||||
TtsCallbackData, InvokeJsCallback>;
|
||||
|
||||
class TtsGenerateWorker : public Napi::AsyncWorker {
|
||||
public:
|
||||
TtsGenerateWorker(const Napi::Env &env, TSFN tsfn, SherpaOnnxOfflineTts *tts,
|
||||
const std::string &text, float speed, int32_t sid,
|
||||
bool use_external_buffer)
|
||||
: tsfn_(tsfn),
|
||||
Napi::AsyncWorker{env, "TtsGenerateWorker"},
|
||||
deferred_(env),
|
||||
tts_(tts),
|
||||
text_(text),
|
||||
speed_(speed),
|
||||
sid_(sid),
|
||||
use_external_buffer_(use_external_buffer) {}
|
||||
|
||||
Napi::Promise Promise() { return deferred_.Promise(); }
|
||||
|
||||
~TtsGenerateWorker() {
|
||||
for (auto d : data_list_) {
|
||||
delete d;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
void Execute() override {
|
||||
auto callback = [](const float *samples, int32_t n, float progress,
|
||||
void *arg) -> int32_t {
|
||||
TtsGenerateWorker *_this = reinterpret_cast<TtsGenerateWorker *>(arg);
|
||||
|
||||
for (auto d : _this->data_list_) {
|
||||
if (d->cancelled) {
|
||||
OH_LOG_INFO(LOG_APP, "TtsGenerate is cancelled");
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
auto data = new TtsCallbackData;
|
||||
data->samples = std::vector<float>{samples, samples + n};
|
||||
data->progress = progress;
|
||||
_this->data_list_.push_back(data);
|
||||
|
||||
_this->tsfn_.NonBlockingCall(data);
|
||||
|
||||
return 1;
|
||||
};
|
||||
audio_ = SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
|
||||
tts_, text_.c_str(), sid_, speed_, callback, this);
|
||||
|
||||
tsfn_.Release();
|
||||
}
|
||||
|
||||
void OnOK() override {
|
||||
Napi::Env env = deferred_.Env();
|
||||
Napi::Object ans = Napi::Object::New(env);
|
||||
if (use_external_buffer_) {
|
||||
Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New(
|
||||
env, const_cast<float *>(audio_->samples), sizeof(float) * audio_->n,
|
||||
[](Napi::Env /*env*/, void * /*data*/,
|
||||
const SherpaOnnxGeneratedAudio *hint) {
|
||||
SherpaOnnxDestroyOfflineTtsGeneratedAudio(hint);
|
||||
},
|
||||
audio_);
|
||||
Napi::Float32Array float32Array =
|
||||
Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
|
||||
|
||||
ans.Set(Napi::String::New(env, "samples"), float32Array);
|
||||
ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
|
||||
} else {
|
||||
// don't use external buffer
|
||||
Napi::ArrayBuffer arrayBuffer =
|
||||
Napi::ArrayBuffer::New(env, sizeof(float) * audio_->n);
|
||||
|
||||
Napi::Float32Array float32Array =
|
||||
Napi::Float32Array::New(env, audio_->n, arrayBuffer, 0);
|
||||
|
||||
std::copy(audio_->samples, audio_->samples + audio_->n,
|
||||
float32Array.Data());
|
||||
|
||||
ans.Set(Napi::String::New(env, "samples"), float32Array);
|
||||
ans.Set(Napi::String::New(env, "sampleRate"), audio_->sample_rate);
|
||||
SherpaOnnxDestroyOfflineTtsGeneratedAudio(audio_);
|
||||
}
|
||||
|
||||
deferred_.Resolve(ans);
|
||||
}
|
||||
|
||||
private:
|
||||
TSFN tsfn_;
|
||||
Napi::Promise::Deferred deferred_;
|
||||
SherpaOnnxOfflineTts *tts_;
|
||||
std::string text_;
|
||||
float speed_;
|
||||
int32_t sid_;
|
||||
bool use_external_buffer_;
|
||||
|
||||
const SherpaOnnxGeneratedAudio *audio_;
|
||||
|
||||
std::vector<TtsCallbackData *> data_list_;
|
||||
};
|
||||
|
||||
static Napi::Object OfflineTtsGenerateAsyncWrapper(
|
||||
const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
|
||||
if (info.Length() != 2) {
|
||||
std::ostringstream os;
|
||||
os << "Expect only 2 arguments. Given: " << info.Length();
|
||||
|
||||
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!info[0].IsExternal()) {
|
||||
Napi::TypeError::New(env, "Argument 0 should be an offline tts pointer.")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
SherpaOnnxOfflineTts *tts =
|
||||
info[0].As<Napi::External<SherpaOnnxOfflineTts>>().Data();
|
||||
|
||||
if (!info[1].IsObject()) {
|
||||
Napi::TypeError::New(env, "Argument 1 should be an object")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
Napi::Object obj = info[1].As<Napi::Object>();
|
||||
|
||||
if (!obj.Has("text")) {
|
||||
Napi::TypeError::New(env, "The argument object should have a field text")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!obj.Get("text").IsString()) {
|
||||
Napi::TypeError::New(env, "The object['text'] should be a string")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!obj.Has("sid")) {
|
||||
Napi::TypeError::New(env, "The argument object should have a field sid")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!obj.Get("sid").IsNumber()) {
|
||||
Napi::TypeError::New(env, "The object['sid'] should be a number")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!obj.Has("speed")) {
|
||||
Napi::TypeError::New(env, "The argument object should have a field speed")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
if (!obj.Get("speed").IsNumber()) {
|
||||
Napi::TypeError::New(env, "The object['speed'] should be a number")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
bool enable_external_buffer = true;
|
||||
if (obj.Has("enableExternalBuffer") &&
|
||||
obj.Get("enableExternalBuffer").IsBoolean()) {
|
||||
enable_external_buffer =
|
||||
obj.Get("enableExternalBuffer").As<Napi::Boolean>().Value();
|
||||
}
|
||||
|
||||
Napi::String _text = obj.Get("text").As<Napi::String>();
|
||||
std::string text = _text.Utf8Value();
|
||||
int32_t sid = obj.Get("sid").As<Napi::Number>().Int32Value();
|
||||
float speed = obj.Get("speed").As<Napi::Number>().FloatValue();
|
||||
|
||||
Napi::Function cb;
|
||||
if (obj.Has("callback") && obj.Get("callback").IsFunction()) {
|
||||
cb = obj.Get("callback").As<Napi::Function>();
|
||||
}
|
||||
|
||||
auto context =
|
||||
new Napi::Reference<Napi::Value>(Napi::Persistent(info.This()));
|
||||
|
||||
TSFN tsfn = TSFN::New(
|
||||
env,
|
||||
cb, // JavaScript function called asynchronously
|
||||
"TtsGenerateFunc", // Name
|
||||
0, // Unlimited queue
|
||||
1, // Only one thread will use this initially
|
||||
context,
|
||||
[](Napi::Env, void *, Napi::Reference<Napi::Value> *ctx) { delete ctx; });
|
||||
|
||||
const SherpaOnnxGeneratedAudio *audio;
|
||||
TtsGenerateWorker *worker = new TtsGenerateWorker(
|
||||
env, tsfn, tts, text, speed, sid, enable_external_buffer);
|
||||
worker->Queue();
|
||||
return worker->Promise();
|
||||
}
|
||||
|
||||
void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
|
||||
exports.Set(Napi::String::New(env, "createOfflineTts"),
|
||||
Napi::Function::New(env, CreateOfflineTtsWrapper));
|
||||
@@ -346,4 +597,7 @@ void InitNonStreamingTts(Napi::Env env, Napi::Object exports) {
|
||||
|
||||
exports.Set(Napi::String::New(env, "offlineTtsGenerate"),
|
||||
Napi::Function::New(env, OfflineTtsGenerateWrapper));
|
||||
|
||||
exports.Set(Napi::String::New(env, "offlineTtsGenerateAsync"),
|
||||
Napi::Function::New(env, OfflineTtsGenerateAsyncWrapper));
|
||||
}
|
||||
|
||||
@@ -27,6 +27,10 @@ void InitKeywordSpotting(Napi::Env env, Napi::Object exports);
|
||||
|
||||
void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports);
|
||||
|
||||
#if __OHOS__
|
||||
void InitUtils(Napi::Env env, Napi::Object exports);
|
||||
#endif
|
||||
|
||||
Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
||||
InitStreamingAsr(env, exports);
|
||||
InitNonStreamingAsr(env, exports);
|
||||
@@ -41,7 +45,15 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
|
||||
InitKeywordSpotting(env, exports);
|
||||
InitNonStreamingSpeakerDiarization(env, exports);
|
||||
|
||||
#if __OHOS__
|
||||
InitUtils(env, exports);
|
||||
#endif
|
||||
|
||||
return exports;
|
||||
}
|
||||
|
||||
#if __OHOS__
|
||||
NODE_API_MODULE(sherpa_onnx, Init)
|
||||
#else
|
||||
NODE_API_MODULE(addon, Init)
|
||||
#endif
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
export const listRawfileDir: (mgr: object, dir: string) => Array<string>;
|
||||
|
||||
export const readWave: (filename: string, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
|
||||
export const readWaveFromBinary: (data: Uint8Array, enableExternalBuffer: boolean = true) => {samples: Float32Array, sampleRate: number};
|
||||
export const createCircularBuffer: (capacity: number) => object;
|
||||
@@ -37,4 +39,11 @@ export const getOnlineStreamResultAsJson: (handle: object, streamHandle: object)
|
||||
export const createOfflineTts: (config: object, mgr?: object) => object;
|
||||
export const getOfflineTtsNumSpeakers: (handle: object) => number;
|
||||
export const getOfflineTtsSampleRate: (handle: object) => number;
|
||||
export const offlineTtsGenerate: (handle: object, input: object) => object;
|
||||
|
||||
export type TtsOutput = {
|
||||
samples: Float32Array;
|
||||
sampleRate: number;
|
||||
};
|
||||
|
||||
export const offlineTtsGenerate: (handle: object, input: object) => TtsOutput;
|
||||
export const offlineTtsGenerateAsync: (handle: object, input: object) => Promise<TtsOutput>;
|
||||
|
||||
76
harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/utils.cc
Normal file
76
harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/utils.cc
Normal file
@@ -0,0 +1,76 @@
|
||||
// Copyright (c) 2024 Xiaomi Corporation
|
||||
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "macros.h" // NOLINT
|
||||
#include "napi.h" // NOLINT
|
||||
|
||||
static std::vector<std::string> GetFilenames(NativeResourceManager *mgr,
|
||||
const std::string &d) {
|
||||
std::unique_ptr<RawDir, decltype(&OH_ResourceManager_CloseRawDir)> raw_dir(
|
||||
OH_ResourceManager_OpenRawDir(mgr, d.c_str()),
|
||||
&OH_ResourceManager_CloseRawDir);
|
||||
int count = OH_ResourceManager_GetRawFileCount(raw_dir.get());
|
||||
std::vector<std::string> ans;
|
||||
ans.reserve(count);
|
||||
for (int32_t i = 0; i < count; ++i) {
|
||||
std::string filename = OH_ResourceManager_GetRawFileName(raw_dir.get(), i);
|
||||
bool is_dir = OH_ResourceManager_IsRawDir(
|
||||
mgr, d.empty() ? filename.c_str() : (d + "/" + filename).c_str());
|
||||
if (is_dir) {
|
||||
auto files = GetFilenames(mgr, d.empty() ? filename : d + "/" + filename);
|
||||
for (auto &f : files) {
|
||||
ans.push_back(std::move(f));
|
||||
}
|
||||
} else {
|
||||
if (d.empty()) {
|
||||
ans.push_back(std::move(filename));
|
||||
} else {
|
||||
ans.push_back(d + "/" + filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
static Napi::Array ListRawFileDir(const Napi::CallbackInfo &info) {
|
||||
Napi::Env env = info.Env();
|
||||
|
||||
if (info.Length() != 2) {
|
||||
std::ostringstream os;
|
||||
os << "Expect only 2 arguments. Given: " << info.Length();
|
||||
|
||||
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::unique_ptr<NativeResourceManager,
|
||||
decltype(&OH_ResourceManager_ReleaseNativeResourceManager)>
|
||||
mgr(OH_ResourceManager_InitNativeResourceManager(env, info[0]),
|
||||
&OH_ResourceManager_ReleaseNativeResourceManager);
|
||||
|
||||
if (!info[1].IsString()) {
|
||||
Napi::TypeError::New(env, "Argument 1 should be a string")
|
||||
.ThrowAsJavaScriptException();
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
std::string dir = info[1].As<Napi::String>().Utf8Value();
|
||||
|
||||
auto files = GetFilenames(mgr.get(), dir);
|
||||
Napi::Array ans = Napi::Array::New(env, files.size());
|
||||
for (int32_t i = 0; i != files.size(); ++i) {
|
||||
ans[i] = Napi::String::New(env, files[i]);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
void InitUtils(Napi::Env env, Napi::Object exports) {
|
||||
exports.Set(Napi::String::New(env, "listRawfileDir"),
|
||||
Napi::Function::New(env, ListRawFileDir));
|
||||
}
|
||||
@@ -3,6 +3,7 @@ import {
|
||||
getOfflineTtsNumSpeakers,
|
||||
getOfflineTtsSampleRate,
|
||||
offlineTtsGenerate,
|
||||
offlineTtsGenerateAsync,
|
||||
} from "libsherpa_onnx.so";
|
||||
|
||||
export class OfflineTtsVitsModelConfig {
|
||||
@@ -16,14 +17,14 @@ export class OfflineTtsVitsModelConfig {
|
||||
public lengthScale: number = 1.0;
|
||||
}
|
||||
|
||||
export class OfflineTtsModelConfig{
|
||||
export class OfflineTtsModelConfig {
|
||||
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
|
||||
public numThreads: number = 1;
|
||||
public debug: boolean = false;
|
||||
public provider: string = 'cpu';
|
||||
}
|
||||
|
||||
export class OfflineTtsConfig{
|
||||
export class OfflineTtsConfig {
|
||||
public model: OfflineTtsModelConfig = new OfflineTtsModelConfig();
|
||||
public ruleFsts: string = '';
|
||||
public ruleFars: string = '';
|
||||
@@ -35,17 +36,24 @@ export class TtsOutput {
|
||||
public sampleRate: number = 0;
|
||||
}
|
||||
|
||||
interface TtsCallbackData {
|
||||
samples: Float32Array;
|
||||
progress: number;
|
||||
}
|
||||
|
||||
export class TtsInput {
|
||||
public text: string = '';
|
||||
public sid: number = 0;
|
||||
public speed: number = 1.0;
|
||||
public callback?: (data: TtsCallbackData) => number;
|
||||
}
|
||||
|
||||
export class OfflineTts {
|
||||
private handle: object;
|
||||
public config: OfflineTtsConfig;
|
||||
public numSpeakers: number;
|
||||
public sampleRate: number;
|
||||
private handle: object;
|
||||
|
||||
constructor(config: OfflineTtsConfig, mgr?: object) {
|
||||
this.handle = createOfflineTts(config, mgr);
|
||||
this.config = config;
|
||||
@@ -63,4 +71,8 @@ export class OfflineTts {
|
||||
generate(input: TtsInput): TtsOutput {
|
||||
return offlineTtsGenerate(this.handle, input) as TtsOutput;
|
||||
}
|
||||
|
||||
generateAsync(input: TtsInput): Promise<TtsOutput> {
|
||||
return offlineTtsGenerateAsync(this.handle, input);
|
||||
}
|
||||
}
|
||||
@@ -57,7 +57,6 @@ export class CircularBuffer {
|
||||
|
||||
// samples is a float32 array
|
||||
push(samples: Float32Array) {
|
||||
console.log(`here samples: ${samples}`);
|
||||
circularBufferPush(this.handle, samples);
|
||||
}
|
||||
|
||||
|
||||
12
harmony-os/SherpaOnnxTts/.gitignore
vendored
Normal file
12
harmony-os/SherpaOnnxTts/.gitignore
vendored
Normal file
@@ -0,0 +1,12 @@
|
||||
/node_modules
|
||||
/oh_modules
|
||||
/local.properties
|
||||
/.idea
|
||||
**/build
|
||||
/.hvigor
|
||||
.cxx
|
||||
/.clangd
|
||||
/.clang-format
|
||||
/.clang-tidy
|
||||
**/.test
|
||||
/.appanalyzer
|
||||
10
harmony-os/SherpaOnnxTts/AppScope/app.json5
Normal file
10
harmony-os/SherpaOnnxTts/AppScope/app.json5
Normal file
@@ -0,0 +1,10 @@
|
||||
{
|
||||
"app": {
|
||||
"bundleName": "com.k2fsa.sherpa.onnx.tts",
|
||||
"vendor": "next-gen Kaldi",
|
||||
"versionCode": 1000000,
|
||||
"versionName": "1.0.0",
|
||||
"icon": "$media:app_icon",
|
||||
"label": "$string:app_name"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"string": [
|
||||
{
|
||||
"name": "app_name",
|
||||
"value": "SherpaOnnxTts"
|
||||
}
|
||||
]
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 2.7 KiB |
40
harmony-os/SherpaOnnxTts/build-profile.json5
Normal file
40
harmony-os/SherpaOnnxTts/build-profile.json5
Normal file
@@ -0,0 +1,40 @@
|
||||
{
|
||||
"app": {
|
||||
"signingConfigs": [],
|
||||
"products": [
|
||||
{
|
||||
"name": "default",
|
||||
"signingConfig": "default",
|
||||
"compatibleSdkVersion": "4.0.0(10)",
|
||||
"runtimeOS": "HarmonyOS",
|
||||
"buildOption": {
|
||||
"strictMode": {
|
||||
"caseSensitiveCheck": true,
|
||||
}
|
||||
}
|
||||
}
|
||||
],
|
||||
"buildModeSet": [
|
||||
{
|
||||
"name": "debug",
|
||||
},
|
||||
{
|
||||
"name": "release"
|
||||
}
|
||||
]
|
||||
},
|
||||
"modules": [
|
||||
{
|
||||
"name": "entry",
|
||||
"srcPath": "./entry",
|
||||
"targets": [
|
||||
{
|
||||
"name": "default",
|
||||
"applyToProducts": [
|
||||
"default"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
20
harmony-os/SherpaOnnxTts/code-linter.json5
Normal file
20
harmony-os/SherpaOnnxTts/code-linter.json5
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
"files": [
|
||||
"**/*.ets"
|
||||
],
|
||||
"ignore": [
|
||||
"**/src/ohosTest/**/*",
|
||||
"**/src/test/**/*",
|
||||
"**/src/mock/**/*",
|
||||
"**/node_modules/**/*",
|
||||
"**/oh_modules/**/*",
|
||||
"**/build/**/*",
|
||||
"**/.preview/**/*"
|
||||
],
|
||||
"ruleSet": [
|
||||
"plugin:@performance/recommended",
|
||||
"plugin:@typescript-eslint/recommended"
|
||||
],
|
||||
"rules": {
|
||||
}
|
||||
}
|
||||
6
harmony-os/SherpaOnnxTts/entry/.gitignore
vendored
Normal file
6
harmony-os/SherpaOnnxTts/entry/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
/node_modules
|
||||
/oh_modules
|
||||
/.preview
|
||||
/build
|
||||
/.cxx
|
||||
/.test
|
||||
33
harmony-os/SherpaOnnxTts/entry/build-profile.json5
Normal file
33
harmony-os/SherpaOnnxTts/entry/build-profile.json5
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"apiType": "stageMode",
|
||||
"buildOption": {
|
||||
"sourceOption": {
|
||||
"workers": [
|
||||
"./src/main/ets/workers/NonStreamingTtsWorker.ets"
|
||||
]
|
||||
}
|
||||
},
|
||||
"buildOptionSet": [
|
||||
{
|
||||
"name": "release",
|
||||
"arkOptions": {
|
||||
"obfuscation": {
|
||||
"ruleOptions": {
|
||||
"enable": false,
|
||||
"files": [
|
||||
"./obfuscation-rules.txt"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
],
|
||||
"targets": [
|
||||
{
|
||||
"name": "default"
|
||||
},
|
||||
{
|
||||
"name": "ohosTest",
|
||||
}
|
||||
]
|
||||
}
|
||||
6
harmony-os/SherpaOnnxTts/entry/hvigorfile.ts
Normal file
6
harmony-os/SherpaOnnxTts/entry/hvigorfile.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
import { hapTasks } from '@ohos/hvigor-ohos-plugin';
|
||||
|
||||
export default {
|
||||
system: hapTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
|
||||
plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
|
||||
}
|
||||
23
harmony-os/SherpaOnnxTts/entry/obfuscation-rules.txt
Normal file
23
harmony-os/SherpaOnnxTts/entry/obfuscation-rules.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
# Define project specific obfuscation rules here.
|
||||
# You can include the obfuscation configuration files in the current module's build-profile.json5.
|
||||
#
|
||||
# For more details, see
|
||||
# https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/source-obfuscation-V5
|
||||
|
||||
# Obfuscation options:
|
||||
# -disable-obfuscation: disable all obfuscations
|
||||
# -enable-property-obfuscation: obfuscate the property names
|
||||
# -enable-toplevel-obfuscation: obfuscate the names in the global scope
|
||||
# -compact: remove unnecessary blank spaces and all line feeds
|
||||
# -remove-log: remove all console.* statements
|
||||
# -print-namecache: print the name cache that contains the mapping from the old names to new names
|
||||
# -apply-namecache: reuse the given cache file
|
||||
|
||||
# Keep options:
|
||||
# -keep-property-name: specifies property names that you want to keep
|
||||
# -keep-global-name: specifies names that you want to keep in the global scope
|
||||
|
||||
-enable-property-obfuscation
|
||||
-enable-toplevel-obfuscation
|
||||
-enable-filename-obfuscation
|
||||
-enable-export-obfuscation
|
||||
29
harmony-os/SherpaOnnxTts/entry/oh-package-lock.json5
Normal file
29
harmony-os/SherpaOnnxTts/entry/oh-package-lock.json5
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"meta": {
|
||||
"stableOrder": true
|
||||
},
|
||||
"lockfileVersion": 3,
|
||||
"ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
|
||||
"specifiers": {
|
||||
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
|
||||
"sherpa_onnx@1.10.32": "sherpa_onnx@1.10.32"
|
||||
},
|
||||
"packages": {
|
||||
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
|
||||
"name": "libsherpa_onnx.so",
|
||||
"version": "1.0.0",
|
||||
"resolved": "../oh_modules/.ohpm/sherpa_onnx@1.10.32/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
|
||||
"registryType": "local"
|
||||
},
|
||||
"sherpa_onnx@1.10.32": {
|
||||
"name": "sherpa_onnx",
|
||||
"version": "1.10.32",
|
||||
"integrity": "sha512-yHYmWoeqhrunOqGr9gxPJJH/8+rdwcKFOW6onYByVObQVpbqypslg301IjGm9xpnc5bJEkO3S9sra2zQTpPA/w==",
|
||||
"resolved": "https://ohpm.openharmony.cn/ohpm/sherpa_onnx/-/sherpa_onnx-1.10.32.har",
|
||||
"registryType": "ohpm",
|
||||
"dependencies": {
|
||||
"libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
12
harmony-os/SherpaOnnxTts/entry/oh-package.json5
Normal file
12
harmony-os/SherpaOnnxTts/entry/oh-package.json5
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"name": "entry",
|
||||
"version": "1.0.0",
|
||||
"description": "Please describe the basic information.",
|
||||
"main": "",
|
||||
"author": "",
|
||||
"license": "",
|
||||
"dependencies": {
|
||||
"sherpa_onnx": "1.10.32",
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,43 @@
|
||||
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
|
||||
import hilog from '@ohos.hilog';
|
||||
import UIAbility from '@ohos.app.ability.UIAbility';
|
||||
import Want from '@ohos.app.ability.Want';
|
||||
import window from '@ohos.window';
|
||||
|
||||
export default class EntryAbility extends UIAbility {
|
||||
onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
|
||||
}
|
||||
|
||||
onDestroy(): void {
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
|
||||
}
|
||||
|
||||
onWindowStageCreate(windowStage: window.WindowStage): void {
|
||||
// Main window is created, set main page for this ability
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
|
||||
|
||||
windowStage.loadContent('pages/Index', (err) => {
|
||||
if (err.code) {
|
||||
hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
|
||||
return;
|
||||
}
|
||||
hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
|
||||
});
|
||||
}
|
||||
|
||||
onWindowStageDestroy(): void {
|
||||
// Main window is destroyed, release UI related resources
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
|
||||
}
|
||||
|
||||
onForeground(): void {
|
||||
// Ability has brought to foreground
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
|
||||
}
|
||||
|
||||
onBackground(): void {
|
||||
// Ability has back to background
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
import hilog from '@ohos.hilog';
|
||||
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
|
||||
|
||||
export default class EntryBackupAbility extends BackupExtensionAbility {
|
||||
async onBackup() {
|
||||
hilog.info(0x0000, 'testTag', 'onBackup ok');
|
||||
}
|
||||
|
||||
async onRestore(bundleVersion: BundleVersion) {
|
||||
hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
|
||||
}
|
||||
}
|
||||
409
harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets
Normal file
409
harmony-os/SherpaOnnxTts/entry/src/main/ets/pages/Index.ets
Normal file
@@ -0,0 +1,409 @@
|
||||
import { CircularBuffer } from 'sherpa_onnx';
|
||||
import worker, { MessageEvents } from '@ohos.worker';
|
||||
import { audio } from '@kit.AudioKit';
|
||||
import picker from '@ohos.file.picker';
|
||||
import fs from '@ohos.file.fs';
|
||||
import systemTime from '@ohos.systemTime';
|
||||
|
||||
|
||||
function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
|
||||
const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);
|
||||
|
||||
const header = new ArrayBuffer(44);
|
||||
const view = new DataView(header);
|
||||
|
||||
// http://soundfile.sapp.org/doc/WaveFormat/
|
||||
// F F I R
|
||||
view.setUint32(0, 0x46464952, true); // chunkID
|
||||
view.setUint32(4, 36 + samples.length * 2, true); // chunkSize // E V A W
|
||||
view.setUint32(8, 0x45564157, true); // format // // t m f
|
||||
view.setUint32(12, 0x20746d66, true); // subchunk1ID
|
||||
view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
|
||||
view.setUint32(20, 1, true); // audioFormat, 1 for PCM
|
||||
view.setUint16(22, 1, true); // numChannels: 1 channel
|
||||
view.setUint32(24, sampleRate, true); // sampleRate
|
||||
view.setUint32(28, sampleRate * 2, true); // byteRate
|
||||
view.setUint16(32, 2, true); // blockAlign
|
||||
view.setUint16(34, 16, true); // bitsPerSample
|
||||
view.setUint32(36, 0x61746164, true); // Subchunk2ID
|
||||
view.setUint32(40, samples.length * 2, true); // subchunk2Size
|
||||
|
||||
fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
|
||||
fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });
|
||||
|
||||
fs.closeSync(fp.fd);
|
||||
}
|
||||
|
||||
function toInt16Samples(samples: Float32Array): Int16Array {
|
||||
const int16Samples = new Int16Array(samples.length);
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
let s = samples[i] * 32767;
|
||||
s = s > 32767 ? 32767 : s;
|
||||
s = s < -32768 ? -32768 : s;
|
||||
int16Samples[i] = s;
|
||||
}
|
||||
|
||||
return int16Samples;
|
||||
}
|
||||
|
||||
|
||||
@Entry
|
||||
@Component
|
||||
struct Index {
|
||||
@State currentIndex: number = 0;
|
||||
@State title: string = 'Next-gen Kaldi: Text-to-speech';
|
||||
@State info: string = '';
|
||||
@State btnStartCaption: string = 'Start';
|
||||
@State btnStartEnabled: boolean = false;
|
||||
@State btnStopCaption: string = 'Stop';
|
||||
@State btnStopEnabled: boolean = false;
|
||||
@State btnSaveCaption: string = 'Save';
|
||||
@State btnSaveEnabled: boolean = false;
|
||||
@State progress: number = 0;
|
||||
@State sid: string = '0';
|
||||
@State speechSpeed: string = '1.0';
|
||||
@State isGenerating: boolean = false;
|
||||
@State initTtsDone: boolean = false;
|
||||
@State ttsGeneratedDone: boolean = true;
|
||||
@State numSpeakers: number = 1;
|
||||
@State initAudioDone: boolean = false;
|
||||
private controller: TabsController = new TabsController();
|
||||
private cancelled: boolean = false;
|
||||
private sampleRate: number = 0;
|
||||
private startTime: number = 0;
|
||||
private stopTime: number = 0;
|
||||
private inputText: string = '';
|
||||
// it specifies only the initial capacity.
|
||||
private workerInstance?: worker.ThreadWorker
|
||||
private readonly scriptURL: string = 'entry/ets/workers/NonStreamingTtsWorker.ets'
|
||||
// note that circular buffer can automatically resize.
|
||||
private sampleBuffer: CircularBuffer = new CircularBuffer(16000 * 5);
|
||||
private finalSamples: Float32Array | null = null;
|
||||
private audioRenderer: audio.AudioRenderer | null = null;
|
||||
|
||||
initAudioRenderer() {
|
||||
if (this.audioRenderer) {
|
||||
console.log(`Audio renderer has already been created. Skip creating`);
|
||||
return;
|
||||
} // see // https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/using-audiorenderer-for-playback-V5
|
||||
console.log('Initializing audio renderer');
|
||||
const audioStreamInfo: audio.AudioStreamInfo = {
|
||||
samplingRate: this.sampleRate,
|
||||
channels: audio.AudioChannel.CHANNEL_1, // 通道
|
||||
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
|
||||
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW
|
||||
};
|
||||
|
||||
const audioRendererInfo: audio.AudioRendererInfo = {
|
||||
usage: audio.StreamUsage.STREAM_USAGE_MUSIC, rendererFlags: 0
|
||||
};
|
||||
|
||||
const audioRendererOptions: audio.AudioRendererOptions = {
|
||||
streamInfo: audioStreamInfo, rendererInfo: audioRendererInfo
|
||||
};
|
||||
|
||||
audio.createAudioRenderer(audioRendererOptions, (err, renderer) => {
|
||||
if (!err) {
|
||||
console.log('audio renderer initialized successfully');
|
||||
this.initAudioDone = true;
|
||||
if (renderer) {
|
||||
this.audioRenderer = renderer;
|
||||
this.audioRenderer.on("writeData", this.audioPlayCallback);
|
||||
if (this.sampleBuffer.size()) {
|
||||
this.audioRenderer.start();
|
||||
}
|
||||
} else {
|
||||
console.log(`returned audio renderer is ${renderer}`);
|
||||
}
|
||||
} else {
|
||||
console.log(`Failed to initialize audio renderer. error message: ${err.message}, error code: ${err.code}`);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async aboutToAppear() {
|
||||
this.initAudioRenderer();
|
||||
|
||||
this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
|
||||
name: 'NonStreaming TTS worker'
|
||||
});
|
||||
this.workerInstance.onmessage = (e: MessageEvents) => {
|
||||
const msgType = e.data['msgType'] as string;
|
||||
console.log(`received msg from worker: ${msgType}`);
|
||||
|
||||
if (msgType == 'init-tts-done') {
|
||||
this.info = 'Model initialized!\nPlease enter text and press start.';
|
||||
this.sampleRate = e.data['sampleRate'] as number;
|
||||
this.numSpeakers = e.data['numSpeakers'] as number;
|
||||
|
||||
this.initTtsDone = true;
|
||||
}
|
||||
|
||||
if (msgType == 'tts-generate-partial') {
|
||||
if (this.cancelled) {
|
||||
return;
|
||||
}
|
||||
|
||||
const samples: Float32Array = e.data['samples'] as Float32Array;
|
||||
const progress: number = e.data['progress'] as number;
|
||||
this.progress = progress;
|
||||
|
||||
this.sampleBuffer.push(samples);
|
||||
|
||||
if (!this.initAudioDone) {
|
||||
this.initAudioRenderer();
|
||||
}
|
||||
|
||||
if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING) {
|
||||
this.audioRenderer.start();
|
||||
}
|
||||
}
|
||||
|
||||
if (msgType == 'tts-generate-done') {
|
||||
this.isGenerating = false;
|
||||
const samples: Float32Array = e.data['samples'] as Float32Array;
|
||||
|
||||
systemTime.getRealTime((err, data) => {
|
||||
|
||||
if (err) {
|
||||
console.log(`Failed to get stop time`)
|
||||
} else {
|
||||
this.stopTime = data;
|
||||
|
||||
const audioDuration = samples.length / this.sampleRate;
|
||||
const elapsedSeconds = (this.stopTime - this.startTime) / 1000;
|
||||
const RTF = elapsedSeconds / audioDuration;
|
||||
|
||||
this.info = `Audio duration: ${audioDuration} s
|
||||
Elapsed: ${elapsedSeconds} s
|
||||
RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
|
||||
`;
|
||||
if (this.cancelled) {
|
||||
this.info += '\nCancelled.';
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
this.finalSamples = samples;
|
||||
this.ttsGeneratedDone = true;
|
||||
this.btnSaveEnabled = true;
|
||||
|
||||
this.ttsGeneratedDone = true;
|
||||
|
||||
if (this.audioRenderer && this.audioRenderer?.state != audio.AudioState.STATE_RUNNING &&
|
||||
this.sampleBuffer.size() == 0) {
|
||||
this.sampleBuffer.push(samples);
|
||||
this.progress = 1;
|
||||
this.audioRenderer.start();
|
||||
}
|
||||
|
||||
if (!this.initAudioDone) {
|
||||
this.btnStartEnabled = true;
|
||||
this.btnStopEnabled = false;
|
||||
this.info += '\nAudio renderer is not initialized. Disable playing audio.';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
this.info = 'Initializing TTS model ...';
|
||||
this.workerInstance.postMessage({ msgType: 'init-tts', context: getContext() });
|
||||
}
|
||||
|
||||
@Builder
|
||||
TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
|
||||
Column() {
|
||||
Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
|
||||
Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
|
||||
}.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
|
||||
this.currentIndex = targetIndex;
|
||||
this.controller.changeIndex(this.currentIndex);
|
||||
})
|
||||
}
|
||||
|
||||
build() {
|
||||
Column() {
|
||||
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
|
||||
TabContent() {
|
||||
Column({ space: 10 }) {
|
||||
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
|
||||
if (this.numSpeakers > 1) {
|
||||
Row({ space: 10 }) {
|
||||
Text(`Speaker ID (0-${this.numSpeakers - 1})`).width('60%')
|
||||
|
||||
TextInput({ text: this.sid }).onChange((text) => {
|
||||
this.sid = text.trim();
|
||||
}).width('20%')
|
||||
}.justifyContent(FlexAlign.Center)
|
||||
}
|
||||
|
||||
Row() {
|
||||
Text('Speech speed').width('60%');
|
||||
|
||||
TextInput({ text: this.speechSpeed }).onChange((text) => {
|
||||
this.speechSpeed = text.trim();
|
||||
}).width('20%')
|
||||
}
|
||||
|
||||
Row({ space: 10 }) {
|
||||
Button(this.btnStartCaption).enabled(this.btnStartEnabled).onClick(async () => {
|
||||
let sid = parseInt(this.sid);
|
||||
if (sid.toString() != this.sid) {
|
||||
this.info = 'Please input a valid speaker ID';
|
||||
return;
|
||||
}
|
||||
|
||||
let speed = parseFloat(this.speechSpeed);
|
||||
if (isNaN(speed)) {
|
||||
this.info = 'Please enter a valid speech speed';
|
||||
return;
|
||||
}
|
||||
|
||||
if (speed <= 0) {
|
||||
this.info = 'Please enter a positive speech speed';
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.workerInstance && this.initTtsDone) {
|
||||
this.info = 'Generating...';
|
||||
this.cancelled = false;
|
||||
this.finalSamples = null;
|
||||
this.sampleBuffer.reset();
|
||||
this.ttsGeneratedDone = false;
|
||||
this.progress = 0;
|
||||
|
||||
this.btnStartEnabled = false;
|
||||
this.btnStopEnabled = true;
|
||||
this.btnSaveEnabled = false;
|
||||
console.log(`sending ${this.inputText}`)
|
||||
this.ttsGeneratedDone = false;
|
||||
this.startTime = await systemTime.getRealTime();
|
||||
this.workerInstance?.postMessage({
|
||||
msgType: 'tts-generate',
|
||||
text: this.inputText,
|
||||
sid: sid,
|
||||
speed: speed,
|
||||
});
|
||||
this.isGenerating = true;
|
||||
this.info = '';
|
||||
} else {
|
||||
this.info = 'Failed to initialize tts model';
|
||||
this.btnStartEnabled = false;
|
||||
}
|
||||
});
|
||||
|
||||
Button(this.btnStopCaption).enabled(this.btnStopEnabled).onClick(() => {
|
||||
this.ttsGeneratedDone = true;
|
||||
this.btnStartEnabled = true;
|
||||
this.btnStopEnabled = false;
|
||||
this.sampleBuffer.reset();
|
||||
this.cancelled = true;
|
||||
this.isGenerating = false;
|
||||
|
||||
if (this.workerInstance && this.initTtsDone) {
|
||||
this.workerInstance.postMessage({ msgType: 'tts-generate-cancel' });
|
||||
}
|
||||
this.audioRenderer?.stop();
|
||||
})
|
||||
|
||||
Button(this.btnSaveCaption).enabled(this.btnSaveEnabled).onClick(() => {
|
||||
if (!this.finalSamples || this.finalSamples.length == 0) {
|
||||
|
||||
this.btnSaveEnabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
let uri: string = '';
|
||||
|
||||
const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
|
||||
|
||||
const audioViewPicker = new picker.AudioViewPicker();
|
||||
|
||||
audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
|
||||
uri = audioSelectResult[0];
|
||||
if (this.finalSamples) {
|
||||
savePcmToWav(uri, toInt16Samples(this.finalSamples), this.sampleRate);
|
||||
console.log(`Saved to ${uri}`);
|
||||
this.info += `\nSaved to ${uri}`;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
if (this.info != '') {
|
||||
TextArea({ text: this.info }).focusable(false);
|
||||
}
|
||||
if (this.progress > 0) {
|
||||
Row() {
|
||||
Progress({ value: 0, total: 100, type: ProgressType.Capsule })
|
||||
.width('80%')
|
||||
.height(20)
|
||||
.value(this.progress * 100);
|
||||
|
||||
Text(`${(this.progress * 100).toFixed(2)}%`).width('15%')
|
||||
}.width('100%').justifyContent(FlexAlign.Center)
|
||||
}
|
||||
|
||||
TextArea({ placeholder: 'Input text for TTS and click the start button' })
|
||||
.width('100%')
|
||||
.height('100%')
|
||||
.focusable(this.isGenerating == false && this.initTtsDone)
|
||||
.onChange((text) => {
|
||||
this.inputText = text;
|
||||
if (text.trim() == '') {
|
||||
this.btnStartEnabled = false;
|
||||
return;
|
||||
}
|
||||
this.btnStartEnabled = true;
|
||||
})
|
||||
}.width('100%')
|
||||
|
||||
// see https://composeicons.com/
|
||||
}.tabBar(this.TabBuilder('TTS', 0, $r('app.media.home'), $r('app.media.home')))
|
||||
|
||||
TabContent() {
|
||||
Column({space: 10}) {
|
||||
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
|
||||
TextArea({text: `
|
||||
Everyting is open-sourced.
|
||||
|
||||
It runs locally, without accessing the network
|
||||
|
||||
See also https://github.com/k2-fsa/sherpa-onnx
|
||||
|
||||
新一代 Kaldi QQ 和微信交流群: 请看
|
||||
|
||||
https://k2-fsa.github.io/sherpa/social-groups.html
|
||||
|
||||
微信公众号: 新一代 Kaldi
|
||||
`}).width('100%')
|
||||
.height('100%')
|
||||
.focusable(false)
|
||||
}.justifyContent(FlexAlign.Start)
|
||||
}.tabBar(this.TabBuilder('Help', 1, $r('app.media.info'), $r('app.media.info')))
|
||||
}.scrollable(false)
|
||||
}
|
||||
}
|
||||
|
||||
private audioPlayCallback = (buffer: ArrayBuffer) => {
|
||||
const numSamples = buffer.byteLength / 2;
|
||||
if (this.sampleBuffer.size() >= numSamples) {
|
||||
const samples: Float32Array = this.sampleBuffer.get(this.sampleBuffer.head(), numSamples);
|
||||
|
||||
const int16Samples = new Int16Array(buffer);
|
||||
for (let i = 0; i < numSamples; ++i) {
|
||||
let s = samples[i] * 32767;
|
||||
s = s > 32767 ? 32767 : s;
|
||||
s = s < -32768 ? -32768 : s;
|
||||
int16Samples[i] = s;
|
||||
}
|
||||
this.sampleBuffer.pop(numSamples);
|
||||
} else {
|
||||
(new Int16Array(buffer)).fill(0);
|
||||
if (this.ttsGeneratedDone) {
|
||||
this.audioRenderer?.stop();
|
||||
this.btnStartEnabled = true;
|
||||
this.btnStopEnabled = false;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,284 @@
|
||||
import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker';
|
||||
|
||||
import { fileIo as fs } from '@kit.CoreFileKit';
|
||||
|
||||
import {OfflineTtsConfig, OfflineTts, listRawfileDir, TtsInput, TtsOutput} from 'sherpa_onnx';
|
||||
import { buffer } from '@kit.ArkTS';
|
||||
|
||||
const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
|
||||
|
||||
let tts: OfflineTts;
|
||||
let cancelled = false;
|
||||
|
||||
function mkdir(context: Context, parts: string[]) {
|
||||
const path = parts.join('/');
|
||||
if (fs.accessSync(path)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const sandboxPath: string = context.getApplicationContext().filesDir;
|
||||
let d = sandboxPath
|
||||
for (const p of parts) {
|
||||
d = d + '/' + p;
|
||||
|
||||
if (fs.accessSync(d)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
fs.mkdirSync(d);
|
||||
}
|
||||
}
|
||||
|
||||
function copyRawFileDirToSandbox(context: Context, srcDir: string) {
|
||||
let mgr = context.resourceManager;
|
||||
const allFiles: string[] = listRawfileDir(mgr, srcDir);
|
||||
for (const src of allFiles) {
|
||||
const parts: string[] = src.split('/');
|
||||
if (parts.length != 1) {
|
||||
mkdir(context, parts.slice(0, -1));
|
||||
}
|
||||
|
||||
copyRawFileToSandbox(context, src, src);
|
||||
}
|
||||
}
|
||||
|
||||
function copyRawFileToSandbox(context: Context, src: string, dst: string) {
|
||||
// see https://blog.csdn.net/weixin_44640245/article/details/142634846
|
||||
// https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
|
||||
let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);
|
||||
|
||||
// https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
|
||||
let sandboxPath: string = context.getApplicationContext().filesDir;
|
||||
let filepath = sandboxPath + '/' + dst;
|
||||
|
||||
if (fs.accessSync(filepath)) {
|
||||
// if the destination exists and has the expected file size,
|
||||
// then we skip copying it
|
||||
let stat = fs.statSync(filepath);
|
||||
if (stat.size == uint8Array.length) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const fp = fs.openSync(filepath, fs.OpenMode.WRITE_ONLY | fs.OpenMode.CREATE | fs.OpenMode.TRUNC);
|
||||
fs.writeSync(fp.fd, buffer.from(uint8Array).buffer)
|
||||
fs.close(fp.fd);
|
||||
}
|
||||
|
||||
function initTts(context: Context): OfflineTts {
|
||||
// Such a design is to make it easier to build flutter APPs with
|
||||
// github actions for a variety of tts models
|
||||
//
|
||||
// See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
|
||||
// for details
|
||||
|
||||
let modelDir = '';
|
||||
let modelName = '';
|
||||
let ruleFsts = '';
|
||||
let ruleFars = '';
|
||||
let lexicon = '';
|
||||
let dataDir = '';
|
||||
let dictDir = '';
|
||||
// You can select an example below and change it according to match your
|
||||
// selected tts model
|
||||
|
||||
// ============================================================
|
||||
// Your change starts here
|
||||
// ============================================================
|
||||
|
||||
// Example 1:
|
||||
// modelDir = 'vits-vctk';
|
||||
// modelName = 'vits-vctk.onnx';
|
||||
// lexicon = 'lexicon.txt';
|
||||
|
||||
// Example 2:
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-amy-low.tar.bz2
|
||||
// modelDir = 'vits-piper-en_US-amy-low';
|
||||
// modelName = 'en_US-amy-low.onnx';
|
||||
// dataDir = 'espeak-ng-data';
|
||||
|
||||
// Example 3:
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-icefall-zh-aishell3.tar.bz2
|
||||
// modelDir = 'vits-icefall-zh-aishell3';
|
||||
// modelName = 'model.onnx';
|
||||
// ruleFsts = 'phone.fst,date.fst,number.fst,new_heteronym.fst';
|
||||
// ruleFars = 'rule.far';
|
||||
// lexicon = 'lexicon.txt';
|
||||
|
||||
// Example 4:
|
||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/vits.html#csukuangfj-vits-zh-hf-fanchen-c-chinese-187-speakers
|
||||
// modelDir = 'vits-zh-hf-fanchen-C';
|
||||
// modelName = 'vits-zh-hf-fanchen-C.onnx';
|
||||
// lexicon = 'lexicon.txt';
|
||||
// dictDir = 'dict';
|
||||
|
||||
// Example 5:
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-coqui-de-css10.tar.bz2
|
||||
// modelDir = 'vits-coqui-de-css10';
|
||||
// modelName = 'model.onnx';
|
||||
|
||||
// Example 6
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-piper-en_US-libritts_r-medium.tar.bz2
|
||||
// modelDir = 'vits-piper-en_US-libritts_r-medium';
|
||||
// modelName = 'en_US-libritts_r-medium.onnx';
|
||||
// dataDir = 'espeak-ng-data';
|
||||
|
||||
// Example 7
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/vits-melo-tts-zh_en.tar.bz2
|
||||
// modelDir = 'vits-melo-tts-zh_en';
|
||||
// modelName = 'model.onnx';
|
||||
// lexicon = 'lexicon.txt';
|
||||
// dictDir = 'dict';
|
||||
// ruleFsts = `date.fst,phone.fst,number.fst`;
|
||||
|
||||
// ============================================================
|
||||
// Please don't change the remaining part of this function
|
||||
// ============================================================
|
||||
|
||||
if (modelName == '') {
|
||||
throw new Error('You are supposed to select a model by changing the code before you run the app');
|
||||
}
|
||||
|
||||
modelName = modelDir + '/' + modelName;
|
||||
|
||||
if (ruleFsts != '') {
|
||||
let fsts = ruleFsts.split(',')
|
||||
let tmp: string[] = [];
|
||||
for (const f of fsts) {
|
||||
tmp.push(modelDir + '/' + f);
|
||||
}
|
||||
ruleFsts = tmp.join(',');
|
||||
}
|
||||
|
||||
if (ruleFars != '') {
|
||||
let fars = ruleFars.split(',')
|
||||
let tmp: string[] = [];
|
||||
for (const f of fars) {
|
||||
tmp.push(modelDir + '/' + f);
|
||||
}
|
||||
ruleFars = tmp.join(',');
|
||||
}
|
||||
|
||||
if (lexicon != '') {
|
||||
lexicon = modelDir + '/' + lexicon;
|
||||
}
|
||||
|
||||
if (dataDir != '') {
|
||||
copyRawFileDirToSandbox(context, modelDir + '/' + dataDir)
|
||||
let sandboxPath: string = context.getApplicationContext().filesDir;
|
||||
dataDir = sandboxPath + '/' + modelDir + '/' + dataDir;
|
||||
}
|
||||
|
||||
if (dictDir != '') {
|
||||
copyRawFileDirToSandbox(context, modelDir + '/' + dictDir)
|
||||
let sandboxPath: string = context.getApplicationContext().filesDir;
|
||||
dictDir = sandboxPath + '/' + modelDir + '/' + dictDir;
|
||||
}
|
||||
|
||||
const tokens = modelDir + '/tokens.txt';
|
||||
|
||||
const config: OfflineTtsConfig = new OfflineTtsConfig();
|
||||
config.model.vits.model = modelName;
|
||||
config.model.vits.lexicon = lexicon;
|
||||
config.model.vits.tokens = tokens;
|
||||
config.model.vits.dataDir = dataDir;
|
||||
config.model.vits.dictDir = dictDir;
|
||||
config.model.numThreads = 2;
|
||||
config.model.debug = true;
|
||||
config.ruleFsts = ruleFsts;
|
||||
config.ruleFars = ruleFars;
|
||||
|
||||
return new OfflineTts(config, context.resourceManager);
|
||||
}
|
||||
|
||||
interface TtsCallbackData {
|
||||
samples: Float32Array;
|
||||
progress: number;
|
||||
}
|
||||
|
||||
function callback(data: TtsCallbackData): number {
|
||||
workerPort.postMessage({
|
||||
'msgType': 'tts-generate-partial',
|
||||
samples: Float32Array.from(data.samples),
|
||||
progress: data.progress,
|
||||
});
|
||||
|
||||
// 0 means to stop generating in C++
|
||||
// 1 means to continue generating in C++
|
||||
return cancelled? 0 : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the event handler to be called when the worker thread receives a message sent by the host thread.
|
||||
* The event handler is executed in the worker thread.
|
||||
*
|
||||
* @param e message data
|
||||
*/
|
||||
workerPort.onmessage = (e: MessageEvents) => {
|
||||
const msgType = e.data['msgType'] as string;
|
||||
console.log(`msg-type: ${msgType}`);
|
||||
if (msgType == 'init-tts' && !tts) {
|
||||
const context = e.data['context'] as Context;
|
||||
tts = initTts(context);
|
||||
workerPort.postMessage({ 'msgType': 'init-tts-done',
|
||||
sampleRate: tts.sampleRate,
|
||||
numSpeakers: tts.numSpeakers,
|
||||
});
|
||||
}
|
||||
|
||||
if (msgType == 'tts-generate-cancel') {
|
||||
cancelled = true;
|
||||
}
|
||||
|
||||
if (msgType == 'tts-generate') {
|
||||
const text = e.data['text'] as string;
|
||||
console.log(`recevied text ${text}`);
|
||||
const input: TtsInput = new TtsInput();
|
||||
input.text = text;
|
||||
input.sid = e.data['sid'] as number;
|
||||
input.speed = e.data['speed'] as number;
|
||||
input.callback = callback;
|
||||
|
||||
cancelled = false;
|
||||
if (true) {
|
||||
tts.generateAsync(input).then((ttsOutput: TtsOutput) => {
|
||||
console.log(`sampleRate: ${ttsOutput.sampleRate}`);
|
||||
|
||||
workerPort.postMessage({
|
||||
'msgType': 'tts-generate-done',
|
||||
samples: Float32Array.from(ttsOutput.samples),
|
||||
});
|
||||
|
||||
});
|
||||
} else {
|
||||
const ttsOutput: TtsOutput = tts.generate(input);
|
||||
workerPort.postMessage({
|
||||
'msgType': 'tts-generate-done',
|
||||
samples: Float32Array.from(ttsOutput.samples),
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the event handler to be called when the worker receives a message that cannot be deserialized.
|
||||
* The event handler is executed in the worker thread.
|
||||
*
|
||||
* @param e message data
|
||||
*/
|
||||
workerPort.onmessageerror = (e: MessageEvents) => {
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the event handler to be called when an exception occurs during worker execution.
|
||||
* The event handler is executed in the worker thread.
|
||||
*
|
||||
* @param e error message
|
||||
*/
|
||||
workerPort.onerror = (e: ErrorEvent) => {
|
||||
}
|
||||
52
harmony-os/SherpaOnnxTts/entry/src/main/module.json5
Normal file
52
harmony-os/SherpaOnnxTts/entry/src/main/module.json5
Normal file
@@ -0,0 +1,52 @@
|
||||
{
|
||||
"module": {
|
||||
"name": "entry",
|
||||
"type": "entry",
|
||||
"description": "$string:module_desc",
|
||||
"mainElement": "EntryAbility",
|
||||
"deviceTypes": [
|
||||
"phone",
|
||||
"tablet",
|
||||
"2in1"
|
||||
],
|
||||
"deliveryWithInstall": true,
|
||||
"installationFree": false,
|
||||
"pages": "$profile:main_pages",
|
||||
"abilities": [
|
||||
{
|
||||
"name": "EntryAbility",
|
||||
"srcEntry": "./ets/entryability/EntryAbility.ets",
|
||||
"description": "$string:EntryAbility_desc",
|
||||
"icon": "$media:layered_image",
|
||||
"label": "$string:EntryAbility_label",
|
||||
"startWindowIcon": "$media:startIcon",
|
||||
"startWindowBackground": "$color:start_window_background",
|
||||
"exported": true,
|
||||
"skills": [
|
||||
{
|
||||
"entities": [
|
||||
"entity.system.home"
|
||||
],
|
||||
"actions": [
|
||||
"action.system.home"
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"extensionAbilities": [
|
||||
{
|
||||
"name": "EntryBackupAbility",
|
||||
"srcEntry": "./ets/entrybackupability/EntryBackupAbility.ets",
|
||||
"type": "backup",
|
||||
"exported": false,
|
||||
"metadata": [
|
||||
{
|
||||
"name": "ohos.extension.backup",
|
||||
"resource": "$profile:backup_config"
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"color": [
|
||||
{
|
||||
"name": "start_window_background",
|
||||
"value": "#FFFFFF"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"string": [
|
||||
{
|
||||
"name": "module_desc",
|
||||
"value": "On-device text-to-speech with Next-gen Kaldi"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_desc",
|
||||
"value": "On-device text-to-speech with Next-gen Kaldi"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_label",
|
||||
"value": "TTS"
|
||||
}
|
||||
]
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 56 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 12 KiB |
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="m480-840 440 330-48 64-72-54v380H160v-380l-72 54-48-64zM294-478q0 53 57 113t129 125q72-65 129-125t57-113q0-44-30-73t-72-29q-26 0-47.5 10.5T480-542q-15-17-37.5-27.5T396-580q-42 0-72 29t-30 73m426 278v-360L480-740 240-560v360zm0 0H240z"/></svg>
|
||||
|
After Width: | Height: | Size: 339 B |
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 -960 960 960"><path d="M440-280h80v-240h-80zm40-320q17 0 28.5-11.5T520-640t-11.5-28.5T480-680t-28.5 11.5T440-640t11.5 28.5T480-600m0 520q-83 0-156-31.5T197-197t-85.5-127T80-480t31.5-156T197-763t127-85.5T480-880t156 31.5T763-763t85.5 127T880-480t-31.5 156T763-197t-127 85.5T480-80m0-80q134 0 227-93t93-227-93-227-227-93-227 93-93 227 93 227 227 93m0-320"/></svg>
|
||||
|
After Width: | Height: | Size: 435 B |
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"layered-image":
|
||||
{
|
||||
"background" : "$media:background",
|
||||
"foreground" : "$media:foreground"
|
||||
}
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 20 KiB |
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"allowToBackupRestore": true
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"src": [
|
||||
"pages/Index"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"string": [
|
||||
{
|
||||
"name": "module_desc",
|
||||
"value": "On-device text-to-speech with Next-gen Kaldi"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_desc",
|
||||
"value": "On-device text-to-speech with Next-gen Kaldi"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_label",
|
||||
"value": "TTS"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"string": [
|
||||
{
|
||||
"name": "module_desc",
|
||||
"value": "使用新一代Kaldi进行本地离线语音合成"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_desc",
|
||||
"value": "使用新一代Kaldi进行本地离线语音合成"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_label",
|
||||
"value": "本地语音合成"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
import hilog from '@ohos.hilog';
|
||||
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
|
||||
|
||||
export default function abilityTest() {
|
||||
describe('ActsAbilityTest', () => {
|
||||
// Defines a test suite. Two parameters are supported: test suite name and test suite function.
|
||||
beforeAll(() => {
|
||||
// Presets an action, which is performed only once before all test cases of the test suite start.
|
||||
// This API supports only one parameter: preset action function.
|
||||
})
|
||||
beforeEach(() => {
|
||||
// Presets an action, which is performed before each unit test case starts.
|
||||
// The number of execution times is the same as the number of test cases defined by **it**.
|
||||
// This API supports only one parameter: preset action function.
|
||||
})
|
||||
afterEach(() => {
|
||||
// Presets a clear action, which is performed after each unit test case ends.
|
||||
// The number of execution times is the same as the number of test cases defined by **it**.
|
||||
// This API supports only one parameter: clear action function.
|
||||
})
|
||||
afterAll(() => {
|
||||
// Presets a clear action, which is performed after all test cases of the test suite end.
|
||||
// This API supports only one parameter: clear action function.
|
||||
})
|
||||
it('assertContain', 0, () => {
|
||||
// Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'it begin');
|
||||
let a = 'abc';
|
||||
let b = 'b';
|
||||
// Defines a variety of assertion methods, which are used to declare expected boolean conditions.
|
||||
expect(a).assertContain(b);
|
||||
expect(a).assertEqual(a);
|
||||
})
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,5 @@
|
||||
import abilityTest from './Ability.test';
|
||||
|
||||
export default function testsuite() {
|
||||
abilityTest();
|
||||
}
|
||||
13
harmony-os/SherpaOnnxTts/entry/src/ohosTest/module.json5
Normal file
13
harmony-os/SherpaOnnxTts/entry/src/ohosTest/module.json5
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"module": {
|
||||
"name": "entry_test",
|
||||
"type": "feature",
|
||||
"deviceTypes": [
|
||||
"phone",
|
||||
"tablet",
|
||||
"2in1"
|
||||
],
|
||||
"deliveryWithInstall": true,
|
||||
"installationFree": false
|
||||
}
|
||||
}
|
||||
5
harmony-os/SherpaOnnxTts/entry/src/test/List.test.ets
Normal file
5
harmony-os/SherpaOnnxTts/entry/src/test/List.test.ets
Normal file
@@ -0,0 +1,5 @@
|
||||
import localUnitTest from './LocalUnit.test';
|
||||
|
||||
export default function testsuite() {
|
||||
localUnitTest();
|
||||
}
|
||||
33
harmony-os/SherpaOnnxTts/entry/src/test/LocalUnit.test.ets
Normal file
33
harmony-os/SherpaOnnxTts/entry/src/test/LocalUnit.test.ets
Normal file
@@ -0,0 +1,33 @@
|
||||
import { describe, beforeAll, beforeEach, afterEach, afterAll, it, expect } from '@ohos/hypium';
|
||||
|
||||
export default function localUnitTest() {
|
||||
describe('localUnitTest', () => {
|
||||
// Defines a test suite. Two parameters are supported: test suite name and test suite function.
|
||||
beforeAll(() => {
|
||||
// Presets an action, which is performed only once before all test cases of the test suite start.
|
||||
// This API supports only one parameter: preset action function.
|
||||
});
|
||||
beforeEach(() => {
|
||||
// Presets an action, which is performed before each unit test case starts.
|
||||
// The number of execution times is the same as the number of test cases defined by **it**.
|
||||
// This API supports only one parameter: preset action function.
|
||||
});
|
||||
afterEach(() => {
|
||||
// Presets a clear action, which is performed after each unit test case ends.
|
||||
// The number of execution times is the same as the number of test cases defined by **it**.
|
||||
// This API supports only one parameter: clear action function.
|
||||
});
|
||||
afterAll(() => {
|
||||
// Presets a clear action, which is performed after all test cases of the test suite end.
|
||||
// This API supports only one parameter: clear action function.
|
||||
});
|
||||
it('assertContain', 0, () => {
|
||||
// Defines a test case. This API supports three parameters: test case name, filter parameter, and test case function.
|
||||
let a = 'abc';
|
||||
let b = 'b';
|
||||
// Defines a variety of assertion methods, which are used to declare expected boolean conditions.
|
||||
expect(a).assertContain(b);
|
||||
expect(a).assertEqual(a);
|
||||
});
|
||||
});
|
||||
}
|
||||
22
harmony-os/SherpaOnnxTts/hvigor/hvigor-config.json5
Normal file
22
harmony-os/SherpaOnnxTts/hvigor/hvigor-config.json5
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"modelVersion": "5.0.0",
|
||||
"dependencies": {
|
||||
},
|
||||
"execution": {
|
||||
// "analyze": "normal", /* Define the build analyze mode. Value: [ "normal" | "advanced" | false ]. Default: "normal" */
|
||||
// "daemon": true, /* Enable daemon compilation. Value: [ true | false ]. Default: true */
|
||||
// "incremental": true, /* Enable incremental compilation. Value: [ true | false ]. Default: true */
|
||||
// "parallel": true, /* Enable parallel compilation. Value: [ true | false ]. Default: true */
|
||||
// "typeCheck": false, /* Enable typeCheck. Value: [ true | false ]. Default: false */
|
||||
},
|
||||
"logging": {
|
||||
// "level": "info" /* Define the log level. Value: [ "debug" | "info" | "warn" | "error" ]. Default: "info" */
|
||||
},
|
||||
"debugging": {
|
||||
// "stacktrace": false /* Disable stacktrace compilation. Value: [ true | false ]. Default: false */
|
||||
},
|
||||
"nodeOptions": {
|
||||
// "maxOldSpaceSize": 8192 /* Enable nodeOptions maxOldSpaceSize compilation. Unit M. Used for the daemon process. Default: 8192*/
|
||||
// "exposeGC": true /* Enable to trigger garbage collection explicitly. Default: true*/
|
||||
}
|
||||
}
|
||||
6
harmony-os/SherpaOnnxTts/hvigorfile.ts
Normal file
6
harmony-os/SherpaOnnxTts/hvigorfile.ts
Normal file
@@ -0,0 +1,6 @@
|
||||
import { appTasks } from '@ohos/hvigor-ohos-plugin';
|
||||
|
||||
export default {
|
||||
system: appTasks, /* Built-in plugin of Hvigor. It cannot be modified. */
|
||||
plugins:[] /* Custom plugin to extend the functionality of Hvigor. */
|
||||
}
|
||||
19
harmony-os/SherpaOnnxTts/oh-package-lock.json5
Normal file
19
harmony-os/SherpaOnnxTts/oh-package-lock.json5
Normal file
@@ -0,0 +1,19 @@
|
||||
{
|
||||
"meta": {
|
||||
"stableOrder": true
|
||||
},
|
||||
"lockfileVersion": 3,
|
||||
"ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
|
||||
"specifiers": {
|
||||
"@ohos/hypium@1.0.19": "@ohos/hypium@1.0.19"
|
||||
},
|
||||
"packages": {
|
||||
"@ohos/hypium@1.0.19": {
|
||||
"name": "@ohos/hypium",
|
||||
"version": "1.0.19",
|
||||
"integrity": "sha512-cEjDgLFCm3cWZDeRXk7agBUkPqjWxUo6AQeiu0gEkb3J8ESqlduQLSIXeo3cCsm8U/asL7iKjF85ZyOuufAGSQ==",
|
||||
"resolved": "https://ohpm.openharmony.cn/ohpm/@ohos/hypium/-/hypium-1.0.19.har",
|
||||
"registryType": "ohpm"
|
||||
}
|
||||
}
|
||||
}
|
||||
9
harmony-os/SherpaOnnxTts/oh-package.json5
Normal file
9
harmony-os/SherpaOnnxTts/oh-package.json5
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"modelVersion": "5.0.0",
|
||||
"description": "Please describe the basic information.",
|
||||
"dependencies": {
|
||||
},
|
||||
"devDependencies": {
|
||||
"@ohos/hypium": "1.0.19"
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,7 @@ import { audio } from '@kit.AudioKit';
|
||||
@Entry
|
||||
@Component
|
||||
struct Index {
|
||||
@State title: string = 'Next-gen Kaldi: VAD + ASR';
|
||||
@State currentIndex: number = 0;
|
||||
@State resultForFile: string = '';
|
||||
@State progressForFile: number = 0;
|
||||
@@ -73,13 +74,11 @@ struct Index {
|
||||
};
|
||||
|
||||
const audioCapturerInfo: audio.AudioCapturerInfo = {
|
||||
source: audio.SourceType.SOURCE_TYPE_MIC,
|
||||
capturerFlags: 0
|
||||
source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
|
||||
};
|
||||
|
||||
const audioCapturerOptions: audio.AudioCapturerOptions = {
|
||||
streamInfo: audioStreamInfo,
|
||||
capturerInfo: audioCapturerInfo
|
||||
streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo
|
||||
|
||||
};
|
||||
audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
|
||||
@@ -162,15 +161,9 @@ struct Index {
|
||||
@Builder
|
||||
TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
|
||||
Column() {
|
||||
Image(this.currentIndex == targetIndex ? selectedImg : normalImg)
|
||||
.size({ width: 25, height: 25 })
|
||||
Text(title)
|
||||
.fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
|
||||
}
|
||||
.width('100%')
|
||||
.height(50)
|
||||
.justifyContent(FlexAlign.Center)
|
||||
.onClick(() => {
|
||||
Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
|
||||
Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
|
||||
}.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
|
||||
this.currentIndex = targetIndex;
|
||||
this.controller.changeIndex(this.currentIndex);
|
||||
})
|
||||
@@ -181,11 +174,7 @@ struct Index {
|
||||
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
|
||||
TabContent() {
|
||||
Column({ space: 10 }) {
|
||||
Text('Next-gen Kaldi: VAD + ASR')
|
||||
.fontColor('#182431')
|
||||
.fontSize(25)
|
||||
.lineHeight(41)
|
||||
.fontWeight(500)
|
||||
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
|
||||
|
||||
Button('Select .wav file (16kHz) ')
|
||||
.enabled(this.selectFileBtnEnabled)
|
||||
@@ -211,8 +200,7 @@ struct Index {
|
||||
|
||||
if (this.workerInstance) {
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'non-streaming-asr-vad-decode',
|
||||
filename: result[0],
|
||||
msgType: 'non-streaming-asr-vad-decode', filename: result[0],
|
||||
});
|
||||
} else {
|
||||
console.log(`this worker instance is undefined ${this.workerInstance}`);
|
||||
@@ -236,80 +224,86 @@ struct Index {
|
||||
}.width('100%').justifyContent(FlexAlign.Center)
|
||||
}
|
||||
|
||||
TextArea({ text: this.resultForFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
|
||||
|
||||
}
|
||||
.alignItems(HorizontalAlign.Center)
|
||||
.justifyContent(FlexAlign.Start)
|
||||
TextArea({ text: this.resultForFile })
|
||||
.width('100%')
|
||||
.lineSpacing({ value: 10, unit: LengthUnit.VP })
|
||||
.height('100%');
|
||||
}.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
|
||||
}.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc_default')))
|
||||
|
||||
TabContent() {
|
||||
Column() {
|
||||
Button(this.message)
|
||||
.enabled(this.micInitDone)
|
||||
.onClick(() => {
|
||||
console.log('clicked mic button');
|
||||
this.resultForMic = '';
|
||||
if (this.mic) {
|
||||
if (this.micStarted) {
|
||||
this.mic.stop();
|
||||
this.message = "Start recording";
|
||||
this.micStarted = false;
|
||||
console.log('mic stopped');
|
||||
Column({ space: 10 }) {
|
||||
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
|
||||
Button(this.message).enabled(this.micInitDone).onClick(() => {
|
||||
console.log('clicked mic button');
|
||||
this.resultForMic = '';
|
||||
if (this.mic) {
|
||||
if (this.micStarted) {
|
||||
this.mic.stop();
|
||||
this.message = "Start recording";
|
||||
this.micStarted = false;
|
||||
console.log('mic stopped');
|
||||
|
||||
const samples = this.flatten(this.sampleList);
|
||||
let s = 0;
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
s += samples[i];
|
||||
}
|
||||
console.log(`samples ${samples.length}, sum: ${s}`);
|
||||
|
||||
if (this.workerInstance) {
|
||||
console.log('decode mic');
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'non-streaming-asr-vad-mic',
|
||||
samples,
|
||||
});
|
||||
} else {
|
||||
console.log(`this worker instance is undefined ${this.workerInstance}`);
|
||||
}
|
||||
} else {
|
||||
this.sampleList = [];
|
||||
this.mic.start();
|
||||
this.message = "Stop recording";
|
||||
this.micStarted = true;
|
||||
console.log('mic started');
|
||||
const samples = this.flatten(this.sampleList);
|
||||
let s = 0;
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
s += samples[i];
|
||||
}
|
||||
console.log(`samples ${samples.length}, sum: ${s}`);
|
||||
|
||||
if (this.workerInstance) {
|
||||
console.log('decode mic');
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'non-streaming-asr-vad-mic', samples,
|
||||
});
|
||||
} else {
|
||||
console.log(`this worker instance is undefined ${this.workerInstance}`);
|
||||
}
|
||||
} else {
|
||||
this.sampleList = [];
|
||||
this.mic.start();
|
||||
this.message = "Stop recording";
|
||||
this.micStarted = true;
|
||||
console.log('mic started');
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
Text(`Supported languages: ${this.lang}`)
|
||||
|
||||
TextArea({ text: this.resultForMic }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
|
||||
}
|
||||
.alignItems(HorizontalAlign.Center)
|
||||
.justifyContent(FlexAlign.Start)
|
||||
TextArea({ text: this.resultForMic })
|
||||
.width('100%')
|
||||
.lineSpacing({ value: 10, unit: LengthUnit.VP })
|
||||
.width('100%')
|
||||
.height('100%');
|
||||
}.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
|
||||
}
|
||||
.tabBar(this.TabBuilder('From mic', 1, $r('app.media.ic_public_input_voice'),
|
||||
$r('app.media.ic_public_input_voice_default')))
|
||||
|
||||
TabContent() {
|
||||
Column() {
|
||||
Text("Everything is open-sourced");
|
||||
Divider();
|
||||
Text("It runs locally, without accessing the network");
|
||||
Divider();
|
||||
Text("See also https://github.com/k2-fsa/sherpa-onnx");
|
||||
Divider();
|
||||
Text("and https://k2-fsa.github.io/sherpa/social-groups.html");
|
||||
Column({ space: 10 }) {
|
||||
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
|
||||
TextArea({
|
||||
text: `
|
||||
Everyting is open-sourced.
|
||||
|
||||
It runs locally, without accessing the network
|
||||
|
||||
See also https://github.com/k2-fsa/sherpa-onnx
|
||||
|
||||
新一代 Kaldi QQ 和微信交流群: 请看
|
||||
|
||||
https://k2-fsa.github.io/sherpa/social-groups.html
|
||||
|
||||
微信公众号: 新一代 Kaldi
|
||||
`
|
||||
}).width('100%').height('100%').focusable(false)
|
||||
}.justifyContent(FlexAlign.Start)
|
||||
}.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'),
|
||||
$r('app.media.info_circle_default')))
|
||||
}.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'), $r('app.media.info_circle_default')))
|
||||
|
||||
}.scrollable(false)
|
||||
}
|
||||
.width('100%')
|
||||
.justifyContent(FlexAlign.Start)
|
||||
}.width('100%').justifyContent(FlexAlign.Start)
|
||||
}
|
||||
|
||||
private micCallback = (buffer: ArrayBuffer) => {
|
||||
|
||||
@@ -2,19 +2,19 @@
|
||||
"string": [
|
||||
{
|
||||
"name": "module_desc",
|
||||
"value": "VAD+ASR with Next-gen Kaldi"
|
||||
"value": "On-device VAD+ASR with Next-gen Kaldi"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_desc",
|
||||
"value": "VAD+ASR"
|
||||
"value": "On-device VAD+ASR with Next-gen Kaldi"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_label",
|
||||
"value": "VAD_ASR"
|
||||
"value": "On-device speech recognition"
|
||||
},
|
||||
{
|
||||
"name": "mic_reason",
|
||||
"value": "access the microhone for speech recognition"
|
||||
"value": "access the microhone for on-device speech recognition with Next-gen Kaldi"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -2,15 +2,19 @@
|
||||
"string": [
|
||||
{
|
||||
"name": "module_desc",
|
||||
"value": "module description"
|
||||
"value": "On-device VAD+ASR with Next-gen Kaldi"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_desc",
|
||||
"value": "description"
|
||||
"value": "On-device VAD+ASR with Next-gen Kaldi"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_label",
|
||||
"value": "label"
|
||||
"value": "On-device speech recognition"
|
||||
},
|
||||
{
|
||||
"name": "mic_reason",
|
||||
"value": "access the microhone for on-device speech recognition with Next-gen Kaldi"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -2,15 +2,19 @@
|
||||
"string": [
|
||||
{
|
||||
"name": "module_desc",
|
||||
"value": "模块描述"
|
||||
"value": "基于新一代Kaldi的本地语音识别"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_desc",
|
||||
"value": "description"
|
||||
"value": "基于新一代Kaldi的本地语音识别"
|
||||
},
|
||||
{
|
||||
"name": "EntryAbility_label",
|
||||
"value": "label"
|
||||
"value": "本地语音识别"
|
||||
},
|
||||
{
|
||||
"name": "mic_reason",
|
||||
"value": "使用新一代Kaldi, 访问麦克风进行本地语音识别 (不需要联网)"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1169,6 +1169,17 @@ SherpaOnnxOfflineTtsGenerateWithProgressCallback(
|
||||
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
|
||||
}
|
||||
|
||||
const SherpaOnnxGeneratedAudio *
|
||||
SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
||||
SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg) {
|
||||
auto wrapper = [callback, arg](const float *samples, int32_t n,
|
||||
float progress) {
|
||||
return callback(samples, n, progress, arg);
|
||||
};
|
||||
return SherpaOnnxOfflineTtsGenerateInternal(tts, text, sid, speed, wrapper);
|
||||
}
|
||||
|
||||
const SherpaOnnxGeneratedAudio *SherpaOnnxOfflineTtsGenerateWithCallbackWithArg(
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
||||
SherpaOnnxGeneratedAudioCallbackWithArg callback, void *arg) {
|
||||
|
||||
@@ -930,6 +930,9 @@ typedef int32_t (*SherpaOnnxGeneratedAudioCallbackWithArg)(const float *samples,
|
||||
typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallback)(
|
||||
const float *samples, int32_t n, float p);
|
||||
|
||||
typedef int32_t (*SherpaOnnxGeneratedAudioProgressCallbackWithArg)(
|
||||
const float *samples, int32_t n, float p, void *arg);
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineTts SherpaOnnxOfflineTts;
|
||||
|
||||
// Create an instance of offline TTS. The user has to use DestroyOfflineTts()
|
||||
@@ -964,11 +967,19 @@ SherpaOnnxOfflineTtsGenerateWithCallback(
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
||||
SherpaOnnxGeneratedAudioCallback callback);
|
||||
|
||||
SHERPA_ONNX_API
|
||||
const SherpaOnnxGeneratedAudio *
|
||||
SherpaOnnxOfflineTtsGenerateWithProgressCallback(
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
||||
|
||||
SherpaOnnxGeneratedAudioProgressCallback callback);
|
||||
|
||||
SHERPA_ONNX_API
|
||||
const SherpaOnnxGeneratedAudio *
|
||||
SherpaOnnxOfflineTtsGenerateWithProgressCallbackWithArg(
|
||||
const SherpaOnnxOfflineTts *tts, const char *text, int32_t sid, float speed,
|
||||
SherpaOnnxGeneratedAudioProgressCallbackWithArg callback, void *arg);
|
||||
|
||||
// Same as SherpaOnnxGeneratedAudioCallback but you can pass an additional
|
||||
// `void* arg` to the callback.
|
||||
SHERPA_ONNX_API const SherpaOnnxGeneratedAudio *
|
||||
|
||||
@@ -22,8 +22,14 @@ CircularBuffer::CircularBuffer(int32_t capacity) {
|
||||
void CircularBuffer::Resize(int32_t new_capacity) {
|
||||
int32_t capacity = static_cast<int32_t>(buffer_.size());
|
||||
if (new_capacity <= capacity) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE(
|
||||
"new_capacity (%{public}d) <= original capacity (%{public}d). Skip it.",
|
||||
new_capacity, capacity);
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("new_capacity (%d) <= original capacity (%d). Skip it.",
|
||||
new_capacity, capacity);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -90,10 +96,18 @@ void CircularBuffer::Push(const float *p, int32_t n) {
|
||||
int32_t size = Size();
|
||||
if (n + size > capacity) {
|
||||
int32_t new_capacity = std::max(capacity * 2, n + size);
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Overflow! n: %{public}d, size: %{public}d, n+size: %{public}d, "
|
||||
"capacity: %{public}d. Increase "
|
||||
"capacity to: %{public}d. (Original data is copied. No data loss!)",
|
||||
n, size, n + size, capacity, new_capacity);
|
||||
#else
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Overflow! n: %d, size: %d, n+size: %d, capacity: %d. Increase "
|
||||
"capacity to: %d",
|
||||
"capacity to: %d. (Original data is copied. No data loss!)",
|
||||
n, size, n + size, capacity, new_capacity);
|
||||
#endif
|
||||
Resize(new_capacity);
|
||||
|
||||
capacity = new_capacity;
|
||||
|
||||
@@ -7,6 +7,7 @@
|
||||
#include <algorithm>
|
||||
#include <cctype>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <strstream>
|
||||
@@ -159,17 +160,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsChinese(
|
||||
words = ProcessHeteronyms(words);
|
||||
|
||||
if (debug_) {
|
||||
fprintf(stderr, "Input text in string: %s\n", text.c_str());
|
||||
fprintf(stderr, "Input text in bytes:");
|
||||
std::ostringstream os;
|
||||
|
||||
os << "Input text in string: " << text << "\n";
|
||||
os << "Input text in bytes:";
|
||||
for (uint8_t c : text) {
|
||||
fprintf(stderr, " %02x", c);
|
||||
os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
|
||||
<< c;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "After splitting to words:");
|
||||
os << "\n";
|
||||
os << "After splitting to words:";
|
||||
for (const auto &w : words) {
|
||||
fprintf(stderr, " %s", w.c_str());
|
||||
os << " " << w;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
os << "\n";
|
||||
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("%s", os.str().c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
std::vector<TokenIDs> ans;
|
||||
@@ -259,17 +269,26 @@ std::vector<TokenIDs> Lexicon::ConvertTextToTokenIdsNotChinese(
|
||||
std::vector<std::string> words = SplitUtf8(text);
|
||||
|
||||
if (debug_) {
|
||||
fprintf(stderr, "Input text (lowercase) in string: %s\n", text.c_str());
|
||||
fprintf(stderr, "Input text in bytes:");
|
||||
std::ostringstream os;
|
||||
|
||||
os << "Input text (lowercase) in string: " << text << "\n";
|
||||
os << "Input text in bytes:";
|
||||
for (uint8_t c : text) {
|
||||
fprintf(stderr, " %02x", c);
|
||||
os << " 0x" << std::setfill('0') << std::setw(2) << std::right << std::hex
|
||||
<< c;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "After splitting to words:");
|
||||
os << "\n";
|
||||
os << "After splitting to words:";
|
||||
for (const auto &w : words) {
|
||||
fprintf(stderr, " %s", w.c_str());
|
||||
os << " " << w;
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
os << "\n";
|
||||
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("%{public}s", os.str().c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("%s", os.str().c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
int32_t blank = token2id_.at(" ");
|
||||
|
||||
@@ -6,11 +6,21 @@
|
||||
|
||||
#include <fstream>
|
||||
#include <regex> // NOLINT
|
||||
#include <strstream>
|
||||
#include <utility>
|
||||
#if __ANDROID_API__ >= 9
|
||||
#include "android/asset_manager.h"
|
||||
#include "android/asset_manager_jni.h"
|
||||
#endif
|
||||
|
||||
#if __OHOS__
|
||||
#include "rawfile/raw_file_manager.h"
|
||||
#endif
|
||||
|
||||
#include "cppjieba/Jieba.hpp"
|
||||
#include "sherpa-onnx/csrc/file-utils.h"
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/onnx-utils.h"
|
||||
#include "sherpa-onnx/csrc/symbol-table.h"
|
||||
#include "sherpa-onnx/csrc/text-utils.h"
|
||||
|
||||
@@ -62,6 +72,60 @@ class MeloTtsLexicon::Impl {
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Manager>
|
||||
Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
|
||||
const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug)
|
||||
: meta_data_(meta_data), debug_(debug) {
|
||||
std::string dict = dict_dir + "/jieba.dict.utf8";
|
||||
std::string hmm = dict_dir + "/hmm_model.utf8";
|
||||
std::string user_dict = dict_dir + "/user.dict.utf8";
|
||||
std::string idf = dict_dir + "/idf.utf8";
|
||||
std::string stop_word = dict_dir + "/stop_words.utf8";
|
||||
|
||||
AssertFileExists(dict);
|
||||
AssertFileExists(hmm);
|
||||
AssertFileExists(user_dict);
|
||||
AssertFileExists(idf);
|
||||
AssertFileExists(stop_word);
|
||||
|
||||
jieba_ =
|
||||
std::make_unique<cppjieba::Jieba>(dict, hmm, user_dict, idf, stop_word);
|
||||
|
||||
{
|
||||
auto buf = ReadFile(mgr, tokens);
|
||||
|
||||
std::istrstream is(buf.data(), buf.size());
|
||||
InitTokens(is);
|
||||
}
|
||||
|
||||
{
|
||||
auto buf = ReadFile(mgr, lexicon);
|
||||
|
||||
std::istrstream is(buf.data(), buf.size());
|
||||
InitLexicon(is);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Manager>
|
||||
Impl(Manager *mgr, const std::string &lexicon, const std::string &tokens,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug)
|
||||
: meta_data_(meta_data), debug_(debug) {
|
||||
{
|
||||
auto buf = ReadFile(mgr, tokens);
|
||||
|
||||
std::istrstream is(buf.data(), buf.size());
|
||||
InitTokens(is);
|
||||
}
|
||||
|
||||
{
|
||||
auto buf = ReadFile(mgr, lexicon);
|
||||
|
||||
std::istrstream is(buf.data(), buf.size());
|
||||
InitLexicon(is);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<TokenIDs> ConvertTextToTokenIds(const std::string &_text) const {
|
||||
std::string text = ToLowerCase(_text);
|
||||
// see
|
||||
@@ -84,17 +148,24 @@ class MeloTtsLexicon::Impl {
|
||||
jieba_->Cut(text, words, is_hmm);
|
||||
|
||||
if (debug_) {
|
||||
SHERPA_ONNX_LOGE("input text: %s", text.c_str());
|
||||
SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
|
||||
|
||||
std::ostringstream os;
|
||||
std::string sep = "";
|
||||
for (const auto &w : words) {
|
||||
os << sep << w;
|
||||
sep = "_";
|
||||
}
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("input text: %{public}s", text.c_str());
|
||||
SHERPA_ONNX_LOGE("after replacing punctuations: %{public}s", s.c_str());
|
||||
|
||||
SHERPA_ONNX_LOGE("after jieba processing: %{public}s",
|
||||
os.str().c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("input text: %s", text.c_str());
|
||||
SHERPA_ONNX_LOGE("after replacing punctuations: %s", s.c_str());
|
||||
|
||||
SHERPA_ONNX_LOGE("after jieba processing: %s", os.str().c_str());
|
||||
#endif
|
||||
}
|
||||
} else {
|
||||
words = SplitUtf8(text);
|
||||
@@ -102,7 +173,7 @@ class MeloTtsLexicon::Impl {
|
||||
if (debug_) {
|
||||
fprintf(stderr, "Input text in string (lowercase): %s\n", text.c_str());
|
||||
fprintf(stderr, "Input text in bytes (lowercase):");
|
||||
for (uint8_t c : text) {
|
||||
for (int8_t c : text) {
|
||||
fprintf(stderr, " %02x", c);
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
@@ -307,9 +378,48 @@ MeloTtsLexicon::MeloTtsLexicon(const std::string &lexicon,
|
||||
bool debug)
|
||||
: impl_(std::make_unique<Impl>(lexicon, tokens, meta_data, debug)) {}
|
||||
|
||||
template <typename Manager>
|
||||
MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
|
||||
const std::string &tokens,
|
||||
const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data,
|
||||
bool debug)
|
||||
: impl_(std::make_unique<Impl>(mgr, lexicon, tokens, dict_dir, meta_data,
|
||||
debug)) {}
|
||||
|
||||
template <typename Manager>
|
||||
MeloTtsLexicon::MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
|
||||
const std::string &tokens,
|
||||
const OfflineTtsVitsModelMetaData &meta_data,
|
||||
bool debug)
|
||||
: impl_(std::make_unique<Impl>(mgr, lexicon, tokens, meta_data, debug)) {}
|
||||
|
||||
std::vector<TokenIDs> MeloTtsLexicon::ConvertTextToTokenIds(
|
||||
const std::string &text, const std::string & /*unused_voice = ""*/) const {
|
||||
return impl_->ConvertTextToTokenIds(text);
|
||||
}
|
||||
|
||||
#if __ANDROID_API__ >= 9
|
||||
template MeloTtsLexicon::MeloTtsLexicon(
|
||||
AAssetManager *mgr, const std::string &lexicon, const std::string &tokens,
|
||||
const std::string &dict_dir, const OfflineTtsVitsModelMetaData &meta_data,
|
||||
bool debug);
|
||||
|
||||
template MeloTtsLexicon::MeloTtsLexicon(
|
||||
AAssetManager *mgr, const std::string &lexicon, const std::string &tokens,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
|
||||
#endif
|
||||
|
||||
#if __OHOS__
|
||||
template MeloTtsLexicon::MeloTtsLexicon(
|
||||
NativeResourceManager *mgr, const std::string &lexicon,
|
||||
const std::string &tokens, const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
|
||||
|
||||
template MeloTtsLexicon::MeloTtsLexicon(
|
||||
NativeResourceManager *mgr, const std::string &lexicon,
|
||||
const std::string &tokens, const OfflineTtsVitsModelMetaData &meta_data,
|
||||
bool debug);
|
||||
#endif
|
||||
|
||||
} // namespace sherpa_onnx
|
||||
|
||||
@@ -25,6 +25,16 @@ class MeloTtsLexicon : public OfflineTtsFrontend {
|
||||
MeloTtsLexicon(const std::string &lexicon, const std::string &tokens,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
|
||||
|
||||
template <typename Manager>
|
||||
MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
|
||||
const std::string &tokens, const std::string &dict_dir,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
|
||||
|
||||
template <typename Manager>
|
||||
MeloTtsLexicon(Manager *mgr, const std::string &lexicon,
|
||||
const std::string &tokens,
|
||||
const OfflineTtsVitsModelMetaData &meta_data, bool debug);
|
||||
|
||||
std::vector<TokenIDs> ConvertTextToTokenIds(
|
||||
const std::string &text,
|
||||
const std::string &unused_voice = "") const override;
|
||||
|
||||
@@ -40,7 +40,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
tn_list_.reserve(files.size());
|
||||
for (const auto &f : files) {
|
||||
if (config.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
|
||||
#endif
|
||||
}
|
||||
tn_list_.push_back(std::make_unique<kaldifst::TextNormalizer>(f));
|
||||
}
|
||||
@@ -57,7 +61,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
|
||||
for (const auto &f : files) {
|
||||
if (config.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
|
||||
#endif
|
||||
}
|
||||
std::unique_ptr<fst::FarReader<fst::StdArc>> reader(
|
||||
fst::FarReader<fst::StdArc>::Open(f));
|
||||
@@ -88,7 +96,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
tn_list_.reserve(files.size());
|
||||
for (const auto &f : files) {
|
||||
if (config.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("rule fst: %{public}s", f.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("rule fst: %s", f.c_str());
|
||||
#endif
|
||||
}
|
||||
auto buf = ReadFile(mgr, f);
|
||||
std::istrstream is(buf.data(), buf.size());
|
||||
@@ -103,7 +115,11 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
|
||||
for (const auto &f : files) {
|
||||
if (config.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("rule far: %{public}s", f.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("rule far: %s", f.c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
auto buf = ReadFile(mgr, f);
|
||||
@@ -156,14 +172,22 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
|
||||
std::string text = _text;
|
||||
if (config_.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("Raw text: %{public}s", text.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("Raw text: %s", text.c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
if (!tn_list_.empty()) {
|
||||
for (const auto &tn : tn_list_) {
|
||||
text = tn->Normalize(text);
|
||||
if (config_.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("After normalizing: %{public}s", text.c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("After normalizing: %s", text.c_str());
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -226,10 +250,17 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
int32_t num_batches = x_size / batch_size;
|
||||
|
||||
if (config_.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Text is too long. Split it into %{public}d batches. batch size: "
|
||||
"%{public}d. Number of sentences: %{public}d",
|
||||
num_batches, batch_size, x_size);
|
||||
#else
|
||||
SHERPA_ONNX_LOGE(
|
||||
"Text is too long. Split it into %d batches. batch size: %d. Number "
|
||||
"of sentences: %d",
|
||||
num_batches, batch_size, x_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
GeneratedAudio ans;
|
||||
@@ -255,7 +286,7 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
audio.samples.end());
|
||||
if (callback) {
|
||||
should_continue = callback(audio.samples.data(), audio.samples.size(),
|
||||
b * 1.0 / num_batches);
|
||||
(b + 1) * 1.0 / num_batches);
|
||||
// Caution(fangjun): audio is freed when the callback returns, so users
|
||||
// should copy the data if they want to access the data after
|
||||
// the callback returns to avoid segmentation fault.
|
||||
@@ -297,6 +328,16 @@ class OfflineTtsVitsImpl : public OfflineTtsImpl {
|
||||
if (meta_data.frontend == "characters") {
|
||||
frontend_ = std::make_unique<OfflineTtsCharacterFrontend>(
|
||||
mgr, config_.model.vits.tokens, meta_data);
|
||||
} else if (meta_data.jieba && !config_.model.vits.dict_dir.empty() &&
|
||||
meta_data.is_melo_tts) {
|
||||
frontend_ = std::make_unique<MeloTtsLexicon>(
|
||||
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
|
||||
config_.model.vits.dict_dir, model_->GetMetaData(),
|
||||
config_.model.debug);
|
||||
} else if (meta_data.is_melo_tts && meta_data.language == "English") {
|
||||
frontend_ = std::make_unique<MeloTtsLexicon>(
|
||||
mgr, config_.model.vits.lexicon, config_.model.vits.tokens,
|
||||
model_->GetMetaData(), config_.model.debug);
|
||||
} else if ((meta_data.is_piper || meta_data.is_coqui ||
|
||||
meta_data.is_icefall) &&
|
||||
!config_.model.vits.data_dir.empty()) {
|
||||
|
||||
@@ -144,7 +144,11 @@ class OfflineTtsVitsModel::Impl {
|
||||
++i;
|
||||
}
|
||||
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("%{public}s\n", os.str().c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("%s\n", os.str().c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
Ort::AllocatorWithDefaultOptions allocator; // used in the macro below
|
||||
|
||||
Reference in New Issue
Block a user