Support Kokoro TTS for HarmonyOS. (#1743)
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
/**
|
/**
|
||||||
* Use these variables when you tailor your ArkTS code. They must be of the const type.
|
* Use these variables when you tailor your ArkTS code. They must be of the const type.
|
||||||
*/
|
*/
|
||||||
export const HAR_VERSION = '1.10.37';
|
export const HAR_VERSION = '1.10.40';
|
||||||
export const BUILD_MODE_NAME = 'debug';
|
export const BUILD_MODE_NAME = 'debug';
|
||||||
export const DEBUG = true;
|
export const DEBUG = true;
|
||||||
export const TARGET_NAME = 'default';
|
export const TARGET_NAME = 'default';
|
||||||
|
|||||||
@@ -31,7 +31,8 @@ export { OnlineStream,
|
|||||||
OnlineRecognizer,
|
OnlineRecognizer,
|
||||||
} from './src/main/ets/components/StreamingAsr';
|
} from './src/main/ets/components/StreamingAsr';
|
||||||
|
|
||||||
export { OfflineTtsMatchaModelConfig,
|
export { OfflineTtsKokoroModelConfig,
|
||||||
|
OfflineTtsMatchaModelConfig,
|
||||||
OfflineTtsVitsModelConfig,
|
OfflineTtsVitsModelConfig,
|
||||||
OfflineTtsModelConfig,
|
OfflineTtsModelConfig,
|
||||||
OfflineTtsConfig,
|
OfflineTtsConfig,
|
||||||
|
|||||||
@@ -28,9 +28,18 @@ export class OfflineTtsMatchaModelConfig {
|
|||||||
public lengthScale: number = 1.0;
|
public lengthScale: number = 1.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export class OfflineTtsKokoroModelConfig {
|
||||||
|
public model: string = '';
|
||||||
|
public voices: string = '';
|
||||||
|
public tokens: string = '';
|
||||||
|
public dataDir: string = '';
|
||||||
|
public lengthScale: number = 1.0;
|
||||||
|
}
|
||||||
|
|
||||||
export class OfflineTtsModelConfig {
|
export class OfflineTtsModelConfig {
|
||||||
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
|
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
|
||||||
public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
|
public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
|
||||||
|
public kokoro: OfflineTtsKokoroModelConfig = new OfflineTtsKokoroModelConfig();
|
||||||
public numThreads: number = 1;
|
public numThreads: number = 1;
|
||||||
public debug: boolean = false;
|
public debug: boolean = false;
|
||||||
public provider: string = 'cpu';
|
public provider: string = 'cpu';
|
||||||
|
|||||||
@@ -66,6 +66,7 @@ struct Index {
|
|||||||
@State initTtsDone: boolean = false;
|
@State initTtsDone: boolean = false;
|
||||||
@State ttsGeneratedDone: boolean = true;
|
@State ttsGeneratedDone: boolean = true;
|
||||||
@State numSpeakers: number = 1;
|
@State numSpeakers: number = 1;
|
||||||
|
@State numThreads: number = 1;
|
||||||
@State initAudioDone: boolean = false;
|
@State initAudioDone: boolean = false;
|
||||||
private controller: TabsController = new TabsController();
|
private controller: TabsController = new TabsController();
|
||||||
private cancelled: boolean = false;
|
private cancelled: boolean = false;
|
||||||
@@ -135,6 +136,7 @@ struct Index {
|
|||||||
this.info = 'Model initialized!\nPlease enter text and press start.';
|
this.info = 'Model initialized!\nPlease enter text and press start.';
|
||||||
this.sampleRate = e.data['sampleRate'] as number;
|
this.sampleRate = e.data['sampleRate'] as number;
|
||||||
this.numSpeakers = e.data['numSpeakers'] as number;
|
this.numSpeakers = e.data['numSpeakers'] as number;
|
||||||
|
this.numThreads = e.data['numThreads'] as number;
|
||||||
|
|
||||||
this.initTtsDone = true;
|
this.initTtsDone = true;
|
||||||
}
|
}
|
||||||
@@ -177,6 +179,7 @@ struct Index {
|
|||||||
this.info = `Audio duration: ${audioDuration} s
|
this.info = `Audio duration: ${audioDuration} s
|
||||||
Elapsed: ${elapsedSeconds} s
|
Elapsed: ${elapsedSeconds} s
|
||||||
RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
|
RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
|
||||||
|
Number of threads: ${this.numThreads}
|
||||||
`;
|
`;
|
||||||
if (this.cancelled) {
|
if (this.cancelled) {
|
||||||
this.info += '\nCancelled.';
|
this.info += '\nCancelled.';
|
||||||
|
|||||||
@@ -42,9 +42,12 @@ function copyRawFileDirToSandbox(context: Context, srcDir: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function copyRawFileToSandbox(context: Context, src: string, dst: string) {
|
function copyRawFileToSandbox(context: Context, src: string,
|
||||||
// see https://blog.csdn.net/weixin_44640245/article/details/142634846
|
dst: string) {
|
||||||
// https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
|
/* see
|
||||||
|
https://blog.csdn.net/weixin_44640245/article/details/142634846
|
||||||
|
https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
|
||||||
|
*/
|
||||||
let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);
|
let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);
|
||||||
|
|
||||||
// https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
|
// https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
|
||||||
@@ -52,8 +55,9 @@ function copyRawFileToSandbox(context: Context, src: string, dst: string) {
|
|||||||
let filepath = sandboxPath + '/' + dst;
|
let filepath = sandboxPath + '/' + dst;
|
||||||
|
|
||||||
if (fs.accessSync(filepath)) {
|
if (fs.accessSync(filepath)) {
|
||||||
// if the destination exists and has the expected file size,
|
/* if the destination exists and has the expected file size
|
||||||
// then we skip copying it
|
then we skip copying it
|
||||||
|
*/
|
||||||
let stat = fs.statSync(filepath);
|
let stat = fs.statSync(filepath);
|
||||||
if (stat.size == uint8Array.length) {
|
if (stat.size == uint8Array.length) {
|
||||||
return;
|
return;
|
||||||
@@ -66,11 +70,12 @@ function copyRawFileToSandbox(context: Context, src: string, dst: string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function initTts(context: Context): OfflineTts {
|
function initTts(context: Context): OfflineTts {
|
||||||
// Such a design is to make it easier to build flutter APPs with
|
/* Such a design is to make it easier to build flutter APPs with
|
||||||
// github actions for a variety of tts models
|
github actions for a variety of tts models
|
||||||
//
|
|
||||||
// See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
|
See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
|
||||||
// for details
|
for details
|
||||||
|
*/
|
||||||
|
|
||||||
let modelDir = '';
|
let modelDir = '';
|
||||||
|
|
||||||
@@ -83,13 +88,19 @@ function initTts(context: Context): OfflineTts {
|
|||||||
let vocoder = '';
|
let vocoder = '';
|
||||||
// for Matcha end
|
// for Matcha end
|
||||||
|
|
||||||
|
// for Kokoro begin
|
||||||
|
let voices = '';
|
||||||
|
// for Kokoro end
|
||||||
|
|
||||||
let ruleFsts = '';
|
let ruleFsts = '';
|
||||||
let ruleFars = '';
|
let ruleFars = '';
|
||||||
let lexicon = '';
|
let lexicon = '';
|
||||||
let dataDir = '';
|
let dataDir = '';
|
||||||
let dictDir = '';
|
let dictDir = '';
|
||||||
// You can select an example below and change it according to match your
|
/*
|
||||||
// selected tts model
|
You can select an example below and change it according to match your
|
||||||
|
selected tts model
|
||||||
|
*/
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
// Your change starts here
|
// Your change starts here
|
||||||
@@ -146,19 +157,26 @@ function initTts(context: Context): OfflineTts {
|
|||||||
// Example 8
|
// Example 8
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
|
||||||
// modelDir = 'matcha-icefall-zh-baker'
|
// modelDir = 'matcha-icefall-zh-baker';
|
||||||
// acousticModelName = 'model-steps-3.onnx'
|
// acousticModelName = 'model-steps-3.onnx';
|
||||||
// vocoder = 'hifigan_v2.onnx'
|
// vocoder = 'hifigan_v2.onnx';
|
||||||
// lexicon = 'lexicon.txt'
|
// lexicon = 'lexicon.txt';
|
||||||
// dictDir = 'dict';
|
// dictDir = 'dict';
|
||||||
// ruleFsts = `date.fst,phone.fst,number.fst`;
|
// ruleFsts = `date.fst,phone.fst,number.fst`;
|
||||||
|
|
||||||
// Example 9
|
// Example 9
|
||||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
|
||||||
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
|
||||||
// modelDir = 'matcha-icefall-en_US-ljspeech'
|
// modelDir = 'matcha-icefall-en_US-ljspeech';
|
||||||
// acousticModelName = 'model-steps-3.onnx'
|
// acousticModelName = 'model-steps-3.onnx';
|
||||||
// vocoder = 'hifigan_v2.onnx'
|
// vocoder = 'hifigan_v2.onnx';
|
||||||
|
// dataDir = 'espeak-ng-data';
|
||||||
|
|
||||||
|
// Example 10
|
||||||
|
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
|
||||||
|
// modelDir = 'kokoro-en-v0_19';
|
||||||
|
// modelName = 'model.onnx';
|
||||||
|
// voices = 'voices.bin'
|
||||||
// dataDir = 'espeak-ng-data';
|
// dataDir = 'espeak-ng-data';
|
||||||
|
|
||||||
// ============================================================
|
// ============================================================
|
||||||
@@ -185,6 +203,10 @@ function initTts(context: Context): OfflineTts {
|
|||||||
acousticModelName = modelDir + '/' + acousticModelName;
|
acousticModelName = modelDir + '/' + acousticModelName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (voices != '') {
|
||||||
|
voices = modelDir + '/' + voices;
|
||||||
|
}
|
||||||
|
|
||||||
if (ruleFsts != '') {
|
if (ruleFsts != '') {
|
||||||
let fsts = ruleFsts.split(',')
|
let fsts = ruleFsts.split(',')
|
||||||
let tmp: string[] = [];
|
let tmp: string[] = [];
|
||||||
@@ -222,7 +244,12 @@ function initTts(context: Context): OfflineTts {
|
|||||||
const tokens = modelDir + '/tokens.txt';
|
const tokens = modelDir + '/tokens.txt';
|
||||||
|
|
||||||
const config: OfflineTtsConfig = new OfflineTtsConfig();
|
const config: OfflineTtsConfig = new OfflineTtsConfig();
|
||||||
|
if (voices != '') {
|
||||||
|
config.model.vits.model = '';
|
||||||
|
} else {
|
||||||
config.model.vits.model = modelName;
|
config.model.vits.model = modelName;
|
||||||
|
}
|
||||||
|
|
||||||
config.model.vits.lexicon = lexicon;
|
config.model.vits.lexicon = lexicon;
|
||||||
config.model.vits.tokens = tokens;
|
config.model.vits.tokens = tokens;
|
||||||
config.model.vits.dataDir = dataDir;
|
config.model.vits.dataDir = dataDir;
|
||||||
@@ -235,6 +262,15 @@ function initTts(context: Context): OfflineTts {
|
|||||||
config.model.matcha.dataDir = dataDir;
|
config.model.matcha.dataDir = dataDir;
|
||||||
config.model.matcha.dictDir = dictDir;
|
config.model.matcha.dictDir = dictDir;
|
||||||
|
|
||||||
|
if (voices != '') {
|
||||||
|
config.model.kokoro.model = modelName;
|
||||||
|
} else {
|
||||||
|
config.model.kokoro.model = '';
|
||||||
|
}
|
||||||
|
config.model.kokoro.voices = voices;
|
||||||
|
config.model.kokoro.tokens = tokens;
|
||||||
|
config.model.kokoro.dataDir = dataDir;
|
||||||
|
|
||||||
config.model.numThreads = 2;
|
config.model.numThreads = 2;
|
||||||
config.model.debug = true;
|
config.model.debug = true;
|
||||||
config.ruleFsts = ruleFsts;
|
config.ruleFsts = ruleFsts;
|
||||||
@@ -250,9 +286,7 @@ interface TtsCallbackData {
|
|||||||
|
|
||||||
function callback(data: TtsCallbackData): number {
|
function callback(data: TtsCallbackData): number {
|
||||||
workerPort.postMessage({
|
workerPort.postMessage({
|
||||||
'msgType': 'tts-generate-partial',
|
'msgType': 'tts-generate-partial', samples: Float32Array.from(data.samples), progress: data.progress,
|
||||||
samples: Float32Array.from(data.samples),
|
|
||||||
progress: data.progress,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// 0 means to stop generating in C++
|
// 0 means to stop generating in C++
|
||||||
@@ -272,9 +306,11 @@ workerPort.onmessage = (e: MessageEvents) => {
|
|||||||
if (msgType == 'init-tts' && !tts) {
|
if (msgType == 'init-tts' && !tts) {
|
||||||
const context = e.data['context'] as Context;
|
const context = e.data['context'] as Context;
|
||||||
tts = initTts(context);
|
tts = initTts(context);
|
||||||
workerPort.postMessage({ 'msgType': 'init-tts-done',
|
workerPort.postMessage({
|
||||||
|
'msgType': 'init-tts-done',
|
||||||
sampleRate: tts.sampleRate,
|
sampleRate: tts.sampleRate,
|
||||||
numSpeakers: tts.numSpeakers,
|
numSpeakers: tts.numSpeakers,
|
||||||
|
numThreads: tts.config.model.numThreads,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -297,16 +333,14 @@ workerPort.onmessage = (e: MessageEvents) => {
|
|||||||
console.log(`sampleRate: ${ttsOutput.sampleRate}`);
|
console.log(`sampleRate: ${ttsOutput.sampleRate}`);
|
||||||
|
|
||||||
workerPort.postMessage({
|
workerPort.postMessage({
|
||||||
'msgType': 'tts-generate-done',
|
'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
|
||||||
samples: Float32Array.from(ttsOutput.samples),
|
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
const ttsOutput: TtsOutput = tts.generate(input);
|
const ttsOutput: TtsOutput = tts.generate(input);
|
||||||
workerPort.postMessage({
|
workerPort.postMessage({
|
||||||
'msgType': 'tts-generate-done',
|
'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
|
||||||
samples: Float32Array.from(ttsOutput.samples),
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user