Add on-device tex-to-speech (TTS) demo for HarmonyOS (#1590)

This commit is contained in:
Fangjun Kuang
2024-12-04 14:27:12 +08:00
committed by GitHub
parent 47a2dd4cf8
commit 74a8735f7a
61 changed files with 1930 additions and 117 deletions

View File

@@ -11,6 +11,7 @@ import { audio } from '@kit.AudioKit';
@Entry
@Component
struct Index {
@State title: string = 'Next-gen Kaldi: VAD + ASR';
@State currentIndex: number = 0;
@State resultForFile: string = '';
@State progressForFile: number = 0;
@@ -73,13 +74,11 @@ struct Index {
};
const audioCapturerInfo: audio.AudioCapturerInfo = {
source: audio.SourceType.SOURCE_TYPE_MIC,
capturerFlags: 0
source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
};
const audioCapturerOptions: audio.AudioCapturerOptions = {
streamInfo: audioStreamInfo,
capturerInfo: audioCapturerInfo
streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo
};
audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
@@ -162,15 +161,9 @@ struct Index {
@Builder
TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
Column() {
Image(this.currentIndex == targetIndex ? selectedImg : normalImg)
.size({ width: 25, height: 25 })
Text(title)
.fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
}
.width('100%')
.height(50)
.justifyContent(FlexAlign.Center)
.onClick(() => {
Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
}.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
this.currentIndex = targetIndex;
this.controller.changeIndex(this.currentIndex);
})
@@ -181,11 +174,7 @@ struct Index {
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
TabContent() {
Column({ space: 10 }) {
Text('Next-gen Kaldi: VAD + ASR')
.fontColor('#182431')
.fontSize(25)
.lineHeight(41)
.fontWeight(500)
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
Button('Select .wav file (16kHz) ')
.enabled(this.selectFileBtnEnabled)
@@ -211,8 +200,7 @@ struct Index {
if (this.workerInstance) {
this.workerInstance.postMessage({
msgType: 'non-streaming-asr-vad-decode',
filename: result[0],
msgType: 'non-streaming-asr-vad-decode', filename: result[0],
});
} else {
console.log(`this worker instance is undefined ${this.workerInstance}`);
@@ -236,80 +224,86 @@ struct Index {
}.width('100%').justifyContent(FlexAlign.Center)
}
TextArea({ text: this.resultForFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
}
.alignItems(HorizontalAlign.Center)
.justifyContent(FlexAlign.Start)
TextArea({ text: this.resultForFile })
.width('100%')
.lineSpacing({ value: 10, unit: LengthUnit.VP })
.height('100%');
}.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
}.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc_default')))
TabContent() {
Column() {
Button(this.message)
.enabled(this.micInitDone)
.onClick(() => {
console.log('clicked mic button');
this.resultForMic = '';
if (this.mic) {
if (this.micStarted) {
this.mic.stop();
this.message = "Start recording";
this.micStarted = false;
console.log('mic stopped');
Column({ space: 10 }) {
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
Button(this.message).enabled(this.micInitDone).onClick(() => {
console.log('clicked mic button');
this.resultForMic = '';
if (this.mic) {
if (this.micStarted) {
this.mic.stop();
this.message = "Start recording";
this.micStarted = false;
console.log('mic stopped');
const samples = this.flatten(this.sampleList);
let s = 0;
for (let i = 0; i < samples.length; ++i) {
s += samples[i];
}
console.log(`samples ${samples.length}, sum: ${s}`);
if (this.workerInstance) {
console.log('decode mic');
this.workerInstance.postMessage({
msgType: 'non-streaming-asr-vad-mic',
samples,
});
} else {
console.log(`this worker instance is undefined ${this.workerInstance}`);
}
} else {
this.sampleList = [];
this.mic.start();
this.message = "Stop recording";
this.micStarted = true;
console.log('mic started');
const samples = this.flatten(this.sampleList);
let s = 0;
for (let i = 0; i < samples.length; ++i) {
s += samples[i];
}
console.log(`samples ${samples.length}, sum: ${s}`);
if (this.workerInstance) {
console.log('decode mic');
this.workerInstance.postMessage({
msgType: 'non-streaming-asr-vad-mic', samples,
});
} else {
console.log(`this worker instance is undefined ${this.workerInstance}`);
}
} else {
this.sampleList = [];
this.mic.start();
this.message = "Stop recording";
this.micStarted = true;
console.log('mic started');
}
});
}
});
Text(`Supported languages: ${this.lang}`)
TextArea({ text: this.resultForMic }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
}
.alignItems(HorizontalAlign.Center)
.justifyContent(FlexAlign.Start)
TextArea({ text: this.resultForMic })
.width('100%')
.lineSpacing({ value: 10, unit: LengthUnit.VP })
.width('100%')
.height('100%');
}.alignItems(HorizontalAlign.Center).justifyContent(FlexAlign.Start)
}
.tabBar(this.TabBuilder('From mic', 1, $r('app.media.ic_public_input_voice'),
$r('app.media.ic_public_input_voice_default')))
TabContent() {
Column() {
Text("Everything is open-sourced");
Divider();
Text("It runs locally, without accessing the network");
Divider();
Text("See also https://github.com/k2-fsa/sherpa-onnx");
Divider();
Text("and https://k2-fsa.github.io/sherpa/social-groups.html");
Column({ space: 10 }) {
Text(this.title).fontSize(20).fontWeight(FontWeight.Bold);
TextArea({
text: `
Everyting is open-sourced.
It runs locally, without accessing the network
See also https://github.com/k2-fsa/sherpa-onnx
新一代 Kaldi QQ 和微信交流群: 请看
https://k2-fsa.github.io/sherpa/social-groups.html
微信公众号: 新一代 Kaldi
`
}).width('100%').height('100%').focusable(false)
}.justifyContent(FlexAlign.Start)
}.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'),
$r('app.media.info_circle_default')))
}.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'), $r('app.media.info_circle_default')))
}.scrollable(false)
}
.width('100%')
.justifyContent(FlexAlign.Start)
}.width('100%').justifyContent(FlexAlign.Start)
}
private micCallback = (buffer: ArrayBuffer) => {

View File

@@ -2,19 +2,19 @@
"string": [
{
"name": "module_desc",
"value": "VAD+ASR with Next-gen Kaldi"
"value": "On-device VAD+ASR with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "VAD+ASR"
"value": "On-device VAD+ASR with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "VAD_ASR"
"value": "On-device speech recognition"
},
{
"name": "mic_reason",
"value": "access the microhone for speech recognition"
"value": "access the microhone for on-device speech recognition with Next-gen Kaldi"
}
]
}

View File

@@ -2,15 +2,19 @@
"string": [
{
"name": "module_desc",
"value": "module description"
"value": "On-device VAD+ASR with Next-gen Kaldi"
},
{
"name": "EntryAbility_desc",
"value": "description"
"value": "On-device VAD+ASR with Next-gen Kaldi"
},
{
"name": "EntryAbility_label",
"value": "label"
"value": "On-device speech recognition"
},
{
"name": "mic_reason",
"value": "access the microhone for on-device speech recognition with Next-gen Kaldi"
}
]
}

View File

@@ -2,15 +2,19 @@
"string": [
{
"name": "module_desc",
"value": "模块描述"
"value": "基于新一代Kaldi的本地语音识别"
},
{
"name": "EntryAbility_desc",
"value": "description"
"value": "基于新一代Kaldi的本地语音识别"
},
{
"name": "EntryAbility_label",
"value": "label"
"value": "本地语音识别"
},
{
"name": "mic_reason",
"value": "使用新一代Kaldi, 访问麦克风进行本地语音识别 (不需要联网)"
}
]
}