Support Kokoro TTS for HarmonyOS. (#1743)

This commit is contained in:
Fangjun Kuang
2025-01-22 11:14:42 +08:00
committed by GitHub
parent 5bcd7e100a
commit bc3322e5a6
5 changed files with 82 additions and 35 deletions

View File

@@ -1,7 +1,7 @@
/** /**
* Use these variables when you tailor your ArkTS code. They must be of the const type. * Use these variables when you tailor your ArkTS code. They must be of the const type.
*/ */
export const HAR_VERSION = '1.10.37'; export const HAR_VERSION = '1.10.40';
export const BUILD_MODE_NAME = 'debug'; export const BUILD_MODE_NAME = 'debug';
export const DEBUG = true; export const DEBUG = true;
export const TARGET_NAME = 'default'; export const TARGET_NAME = 'default';

View File

@@ -31,7 +31,8 @@ export { OnlineStream,
OnlineRecognizer, OnlineRecognizer,
} from './src/main/ets/components/StreamingAsr'; } from './src/main/ets/components/StreamingAsr';
export { OfflineTtsMatchaModelConfig, export { OfflineTtsKokoroModelConfig,
OfflineTtsMatchaModelConfig,
OfflineTtsVitsModelConfig, OfflineTtsVitsModelConfig,
OfflineTtsModelConfig, OfflineTtsModelConfig,
OfflineTtsConfig, OfflineTtsConfig,

View File

@@ -28,9 +28,18 @@ export class OfflineTtsMatchaModelConfig {
public lengthScale: number = 1.0; public lengthScale: number = 1.0;
} }
export class OfflineTtsKokoroModelConfig {
public model: string = '';
public voices: string = '';
public tokens: string = '';
public dataDir: string = '';
public lengthScale: number = 1.0;
}
export class OfflineTtsModelConfig { export class OfflineTtsModelConfig {
public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig(); public vits: OfflineTtsVitsModelConfig = new OfflineTtsVitsModelConfig();
public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig(); public matcha: OfflineTtsMatchaModelConfig = new OfflineTtsMatchaModelConfig();
public kokoro: OfflineTtsKokoroModelConfig = new OfflineTtsKokoroModelConfig();
public numThreads: number = 1; public numThreads: number = 1;
public debug: boolean = false; public debug: boolean = false;
public provider: string = 'cpu'; public provider: string = 'cpu';

View File

@@ -66,6 +66,7 @@ struct Index {
@State initTtsDone: boolean = false; @State initTtsDone: boolean = false;
@State ttsGeneratedDone: boolean = true; @State ttsGeneratedDone: boolean = true;
@State numSpeakers: number = 1; @State numSpeakers: number = 1;
@State numThreads: number = 1;
@State initAudioDone: boolean = false; @State initAudioDone: boolean = false;
private controller: TabsController = new TabsController(); private controller: TabsController = new TabsController();
private cancelled: boolean = false; private cancelled: boolean = false;
@@ -135,6 +136,7 @@ struct Index {
this.info = 'Model initialized!\nPlease enter text and press start.'; this.info = 'Model initialized!\nPlease enter text and press start.';
this.sampleRate = e.data['sampleRate'] as number; this.sampleRate = e.data['sampleRate'] as number;
this.numSpeakers = e.data['numSpeakers'] as number; this.numSpeakers = e.data['numSpeakers'] as number;
this.numThreads = e.data['numThreads'] as number;
this.initTtsDone = true; this.initTtsDone = true;
} }
@@ -177,6 +179,7 @@ struct Index {
this.info = `Audio duration: ${audioDuration} s this.info = `Audio duration: ${audioDuration} s
Elapsed: ${elapsedSeconds} s Elapsed: ${elapsedSeconds} s
RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)} RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
Number of threads: ${this.numThreads}
`; `;
if (this.cancelled) { if (this.cancelled) {
this.info += '\nCancelled.'; this.info += '\nCancelled.';

View File

@@ -42,9 +42,12 @@ function copyRawFileDirToSandbox(context: Context, srcDir: string) {
} }
} }
function copyRawFileToSandbox(context: Context, src: string, dst: string) { function copyRawFileToSandbox(context: Context, src: string,
// see https://blog.csdn.net/weixin_44640245/article/details/142634846 dst: string) {
// https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5 /* see
https://blog.csdn.net/weixin_44640245/article/details/142634846
https://developer.huawei.com/consumer/cn/doc/harmonyos-guides-V5/rawfile-guidelines-V5
*/
let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src); let uint8Array: Uint8Array = context.resourceManager.getRawFileContentSync(src);
// https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir // https://developer.huawei.com/consumer/cn/doc/harmonyos-references-V5/js-apis-file-fs-V5#fsmkdir
@@ -52,8 +55,9 @@ function copyRawFileToSandbox(context: Context, src: string, dst: string) {
let filepath = sandboxPath + '/' + dst; let filepath = sandboxPath + '/' + dst;
if (fs.accessSync(filepath)) { if (fs.accessSync(filepath)) {
// if the destination exists and has the expected file size, /* if the destination exists and has the expected file size
// then we skip copying it then we skip copying it
*/
let stat = fs.statSync(filepath); let stat = fs.statSync(filepath);
if (stat.size == uint8Array.length) { if (stat.size == uint8Array.length) {
return; return;
@@ -66,11 +70,12 @@ function copyRawFileToSandbox(context: Context, src: string, dst: string) {
} }
function initTts(context: Context): OfflineTts { function initTts(context: Context): OfflineTts {
// Such a design is to make it easier to build flutter APPs with /* Such a design is to make it easier to build flutter APPs with
// github actions for a variety of tts models github actions for a variety of tts models
//
// See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py See https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/flutter/generate-tts.py
// for details for details
*/
let modelDir = ''; let modelDir = '';
@@ -83,13 +88,19 @@ function initTts(context: Context): OfflineTts {
let vocoder = ''; let vocoder = '';
// for Matcha end // for Matcha end
// for Kokoro begin
let voices = '';
// for Kokoro end
let ruleFsts = ''; let ruleFsts = '';
let ruleFars = ''; let ruleFars = '';
let lexicon = ''; let lexicon = '';
let dataDir = ''; let dataDir = '';
let dictDir = ''; let dictDir = '';
// You can select an example below and change it according to match your /*
// selected tts model You can select an example below and change it according to match your
selected tts model
*/
// ============================================================ // ============================================================
// Your change starts here // Your change starts here
@@ -146,19 +157,26 @@ function initTts(context: Context): OfflineTts {
// Example 8 // Example 8
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-zh-baker-chinese-1-female-speaker
// modelDir = 'matcha-icefall-zh-baker' // modelDir = 'matcha-icefall-zh-baker';
// acousticModelName = 'model-steps-3.onnx' // acousticModelName = 'model-steps-3.onnx';
// vocoder = 'hifigan_v2.onnx' // vocoder = 'hifigan_v2.onnx';
// lexicon = 'lexicon.txt' // lexicon = 'lexicon.txt';
// dictDir = 'dict'; // dictDir = 'dict';
// ruleFsts = `date.fst,phone.fst,number.fst`; // ruleFsts = `date.fst,phone.fst,number.fst`;
// Example 9 // Example 9
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models // https://github.com/k2-fsa/sherpa-onnx/releases/tag/tts-models
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker // https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/matcha.html#matcha-icefall-en-us-ljspeech-american-english-1-female-speaker
// modelDir = 'matcha-icefall-en_US-ljspeech' // modelDir = 'matcha-icefall-en_US-ljspeech';
// acousticModelName = 'model-steps-3.onnx' // acousticModelName = 'model-steps-3.onnx';
// vocoder = 'hifigan_v2.onnx' // vocoder = 'hifigan_v2.onnx';
// dataDir = 'espeak-ng-data';
// Example 10
// https://k2-fsa.github.io/sherpa/onnx/tts/pretrained_models/kokoro.html#kokoro-en-v0-19-english-11-speakers
// modelDir = 'kokoro-en-v0_19';
// modelName = 'model.onnx';
// voices = 'voices.bin'
// dataDir = 'espeak-ng-data'; // dataDir = 'espeak-ng-data';
// ============================================================ // ============================================================
@@ -185,6 +203,10 @@ function initTts(context: Context): OfflineTts {
acousticModelName = modelDir + '/' + acousticModelName; acousticModelName = modelDir + '/' + acousticModelName;
} }
if (voices != '') {
voices = modelDir + '/' + voices;
}
if (ruleFsts != '') { if (ruleFsts != '') {
let fsts = ruleFsts.split(',') let fsts = ruleFsts.split(',')
let tmp: string[] = []; let tmp: string[] = [];
@@ -222,7 +244,12 @@ function initTts(context: Context): OfflineTts {
const tokens = modelDir + '/tokens.txt'; const tokens = modelDir + '/tokens.txt';
const config: OfflineTtsConfig = new OfflineTtsConfig(); const config: OfflineTtsConfig = new OfflineTtsConfig();
if (voices != '') {
config.model.vits.model = '';
} else {
config.model.vits.model = modelName; config.model.vits.model = modelName;
}
config.model.vits.lexicon = lexicon; config.model.vits.lexicon = lexicon;
config.model.vits.tokens = tokens; config.model.vits.tokens = tokens;
config.model.vits.dataDir = dataDir; config.model.vits.dataDir = dataDir;
@@ -235,6 +262,15 @@ function initTts(context: Context): OfflineTts {
config.model.matcha.dataDir = dataDir; config.model.matcha.dataDir = dataDir;
config.model.matcha.dictDir = dictDir; config.model.matcha.dictDir = dictDir;
if (voices != '') {
config.model.kokoro.model = modelName;
} else {
config.model.kokoro.model = '';
}
config.model.kokoro.voices = voices;
config.model.kokoro.tokens = tokens;
config.model.kokoro.dataDir = dataDir;
config.model.numThreads = 2; config.model.numThreads = 2;
config.model.debug = true; config.model.debug = true;
config.ruleFsts = ruleFsts; config.ruleFsts = ruleFsts;
@@ -250,9 +286,7 @@ interface TtsCallbackData {
function callback(data: TtsCallbackData): number { function callback(data: TtsCallbackData): number {
workerPort.postMessage({ workerPort.postMessage({
'msgType': 'tts-generate-partial', 'msgType': 'tts-generate-partial', samples: Float32Array.from(data.samples), progress: data.progress,
samples: Float32Array.from(data.samples),
progress: data.progress,
}); });
// 0 means to stop generating in C++ // 0 means to stop generating in C++
@@ -272,9 +306,11 @@ workerPort.onmessage = (e: MessageEvents) => {
if (msgType == 'init-tts' && !tts) { if (msgType == 'init-tts' && !tts) {
const context = e.data['context'] as Context; const context = e.data['context'] as Context;
tts = initTts(context); tts = initTts(context);
workerPort.postMessage({ 'msgType': 'init-tts-done', workerPort.postMessage({
'msgType': 'init-tts-done',
sampleRate: tts.sampleRate, sampleRate: tts.sampleRate,
numSpeakers: tts.numSpeakers, numSpeakers: tts.numSpeakers,
numThreads: tts.config.model.numThreads,
}); });
} }
@@ -297,16 +333,14 @@ workerPort.onmessage = (e: MessageEvents) => {
console.log(`sampleRate: ${ttsOutput.sampleRate}`); console.log(`sampleRate: ${ttsOutput.sampleRate}`);
workerPort.postMessage({ workerPort.postMessage({
'msgType': 'tts-generate-done', 'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
samples: Float32Array.from(ttsOutput.samples),
}); });
}); });
} else { } else {
const ttsOutput: TtsOutput = tts.generate(input); const ttsOutput: TtsOutput = tts.generate(input);
workerPort.postMessage({ workerPort.postMessage({
'msgType': 'tts-generate-done', 'msgType': 'tts-generate-done', samples: Float32Array.from(ttsOutput.samples),
samples: Float32Array.from(ttsOutput.samples),
}); });
} }