Add VAD+ASR demo for HarmonyOS (#1573)

This commit is contained in:
Fangjun Kuang
2024-11-28 22:59:56 +08:00
committed by GitHub
parent 315d8e2a47
commit f3f8961462
44 changed files with 1102 additions and 0 deletions

View File

@@ -0,0 +1,173 @@
import { LengthUnit } from '@kit.ArkUI';
import worker, { MessageEvents } from '@ohos.worker';
import { BusinessError } from '@kit.BasicServicesKit';
import { picker } from '@kit.CoreFileKit';
@Entry
@Component
struct Index {
@State currentIndex: number = 0;
@State resultFromFile: string = '';
@State progressForFile: number = 0;
@State selectFileBtnEnabled: boolean = false;
@State message: string = 'To be implemented';
@State lang: string = 'English';
private controller: TabsController = new TabsController();
private workerInstance?: worker.ThreadWorker
private readonly scriptURL: string = 'entry/ets/workers/NonStreamingAsrWithVadWorker.ets'
aboutToAppear(): void {
this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
name: 'NonStreaming ASR worker'
});
this.workerInstance.onmessage = (e: MessageEvents) => {
const msgType = e.data['msgType'] as string;
console.log(`received data ${msgType}`);
if (msgType == 'init-non-streaming-asr-done') {
this.selectFileBtnEnabled = true;
}
if (msgType == 'non-streaming-asr-vad-decode-done') {
this.resultFromFile = e.data['text'] as string + '\n';
}
if (msgType == 'non-streaming-asr-vad-decode-partial') {
if (this.resultFromFile == '') {
this.resultFromFile = e.data['text'] as string;
} else {
this.resultFromFile += '\n\n' + e.data['text'] as string;
}
}
if (msgType == 'non-streaming-asr-vad-decode-error') {
this.resultFromFile = e.data['text'] as string;
}
if (msgType == 'non-streaming-asr-vad-decode-progress') {
this.progressForFile = e.data['progress'] as number;
this.selectFileBtnEnabled = this.progressForFile >= 100;
}
}
const context = getContext();
this.workerInstance.postMessage({ msgType: 'init-vad', context });
this.workerInstance.postMessage({ msgType: 'init-non-streaming-asr', context });
}
@Builder
TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
Column() {
Image(this.currentIndex == targetIndex ? selectedImg : normalImg)
.size({ width: 25, height: 25 })
Text(title)
.fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
}
.width('100%')
.height(50)
.justifyContent(FlexAlign.Center)
.onClick(() => {
this.currentIndex = targetIndex;
this.controller.changeIndex(this.currentIndex);
})
}
build() {
Column() {
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
TabContent() {
Column({ space: 10 }) {
Text('Next-gen Kaldi: VAD + ASR')
.fontColor('#182431')
.fontSize(25)
.lineHeight(41)
.fontWeight(500)
Button('Select .wav file ')
.enabled(this.selectFileBtnEnabled)
.fontSize(13)
.width(296)
.height(60)
.onClick(() => {
this.resultFromFile = '';
this.progressForFile = 0;
const documentSelectOptions = new picker.DocumentSelectOptions();
documentSelectOptions.maxSelectNumber = 1;
documentSelectOptions.fileSuffixFilters = ['.wav'];
const documentViewPicker = new picker.DocumentViewPicker();
documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
console.log(`Result: ${result}`);
if (!result[0]) {
this.resultFromFile = 'Please select a file to decode';
this.selectFileBtnEnabled = true;
return;
}
if (this.workerInstance) {
this.workerInstance.postMessage({
msgType: 'non-streaming-asr-vad-decode',
filename: result[0],
});
} else {
console.log(`this worker instance is undefined ${this.workerInstance}`);
}
}).catch((err: BusinessError) => {
console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
})
})
Text(`Supported languages: ${this.lang}`)
if (this.progressForFile > 0) {
Row() {
Progress({ value: 0, total: 100, type: ProgressType.Capsule })
.width('80%')
.height(20)
.value(this.progressForFile);
Text(`${this.progressForFile.toFixed(2)}%`).width('15%')
}.width('100%').justifyContent(FlexAlign.Center)
}
TextArea({ text: this.resultFromFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
}
.alignItems(HorizontalAlign.Center)
.justifyContent(FlexAlign.Start)
}.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc_default')))
TabContent() {
Column() {
Text(this.message)
.fontSize(50)
.fontWeight(FontWeight.Bold);
}
}
.tabBar(this.TabBuilder('From mic', 1, $r('app.media.ic_public_input_voice'),
$r('app.media.ic_public_input_voice_default')))
TabContent() {
Column() {
Text("Everything is open-sourced");
Divider();
Text("It runs locally, without accessing the network");
Divider();
Text("See also https://github.com/k2-fsa/sherpa-onnx");
Divider();
Text("and https://k2-fsa.github.io/sherpa/social-groups.html");
}.justifyContent(FlexAlign.Start)
}.tabBar(this.TabBuilder('Help', 2, $r('app.media.info_circle'),
$r('app.media.info_circle_default')))
}.scrollable(false)
}
.width('100%')
.justifyContent(FlexAlign.Start)
}
}

View File

@@ -0,0 +1,237 @@
// Please keep in sync with
// https://github.com/k2-fsa/sherpa-onnx/blob/master/sherpa-onnx/kotlin-api/OfflineRecognizer.kt#L184
import { OfflineModelConfig } from 'sherpa_onnx';
export function getOfflineModelConfig(type: number): OfflineModelConfig {
const c = new OfflineModelConfig();
switch (type) {
case 0: {
const modelDir = 'sherpa-onnx-paraformer-zh-2023-09-14'
c.paraformer.model = `${modelDir}/model.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "paraformer";
break;
}
case 1: {
const modelDir = 'icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04'
c.transducer.encoder = `$modelDir}/encoder-epoch-30-avg-4.int8.onnx`;
c.transducer.decoder = `${modelDir}/decoder-epoch-30-avg-4.onnx`;
c.transducer.encoder = `${modelDir}/joiner-epoch-30-avg-4.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "transducer";
break;
}
case 2: {
const modelDir = 'sherpa-onnx-whisper-tiny.en';
c.whisper.encoder = `${modelDir}/tiny.en-encoder.int8.onnx`;
c.whisper.decoder = `${modelDir}/tiny.en-decoder.int8.onnx`;
c.tokens = `${modelDir}/tiny.en-tokens.txt`;
c.modelType = "whisper";
break;
}
case 3: {
const modelDir = 'sherpa-onnx-whisper-base.en';
c.whisper.encoder = `${modelDir}/base.en-encoder.int8.onnx`;
c.whisper.decoder = `${modelDir}/base.en-decoder.int8.onnx`;
c.tokens = `${modelDir}/base.en-tokens.txt`;
c.modelType = "whisper";
break;
}
case 4: {
const modelDir = "icefall-asr-zipformer-wenetspeech-20230615";
c.transducer.encoder = `${modelDir}/encoder-epoch-12-avg-4.int8.onnx`;
c.transducer.decoder = `${modelDir}/decoder-epoch-12-avg-4.onnx`;
c.transducer.joiner = `${modelDir}/joiner-epoch-12-avg-4.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "transducer";
break;
}
case 5: {
const modelDir = "sherpa-onnx-zipformer-multi-zh-hans-2023-9-2";
c.transducer.encoder = `${modelDir}/encoder-epoch-20-avg-1.int8.onnx`;
c.transducer.decoder = `${modelDir}/decoder-epoch-20-avg-1.onnx`;
c.transducer.joiner = `${modelDir}/joiner-epoch-20-avg-1.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "transducer";
break;
}
case 6: {
const modelDir = "sherpa-onnx-nemo-ctc-en-citrinet-512";
c.nemoCtc.model = `${modelDir}/model.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
break;
}
case 7: {
const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-be-de-en-es-fr-hr-it-pl-ru-uk-20k"
c.nemoCtc.model = `${modelDir}/model.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
break;
}
case 8: {
const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-24500"
c.nemoCtc.model = `${modelDir}/model.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
break;
}
case 9: {
const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-en-de-es-fr-14288"
c.nemoCtc.model = `${modelDir}/model.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
break;
}
case 10: {
const modelDir = "sherpa-onnx-nemo-fast-conformer-ctc-es-1424"
c.nemoCtc.model = `${modelDir}/model.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
break;
}
case 11: {
const modelDir = "sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04"
c.telespeechCtc = `${modelDir}/model.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "telespeech_ctc";
break;
}
case 12: {
const modelDir = "sherpa-onnx-zipformer-thai-2024-06-20"
c.transducer.encoder = `${modelDir}/encoder-epoch-12-avg-5.int8.onnx`;
c.transducer.decoder = `${modelDir}/decoder-epoch-12-avg-5.onnx`;
c.transducer.joiner = `${modelDir}/joiner-epoch-12-avg-5.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "transducer";
break;
}
case 13: {
const modelDir = "sherpa-onnx-zipformer-korean-2024-06-24";
c.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
c.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
c.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "transducer";
break;
}
case 14: {
const modelDir = "sherpa-onnx-paraformer-zh-small-2024-03-09";
c.paraformer.model = `${modelDir}/model.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "paraformer";
break;
}
case 15: {
const modelDir = "sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17";
c.senseVoice.model = `${modelDir}/model.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
break;
}
case 16: {
const modelDir = "sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01";
c.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
c.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
c.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "transducer";
break;
}
case 17: {
const modelDir = "sherpa-onnx-zipformer-ru-2024-09-18";
c.transducer.encoder = `${modelDir}/encoder.int8.onnx`;
c.transducer.decoder = `${modelDir}/decoder.onnx`;
c.transducer.joiner = `${modelDir}/joiner.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "transducer";
break;
}
case 18: {
const modelDir = "sherpa-onnx-small-zipformer-ru-2024-09-18";
c.transducer.encoder = `${modelDir}/encoder.int8.onnx`;
c.transducer.decoder = `${modelDir}/decoder.onnx`;
c.transducer.joiner = `${modelDir}/joiner.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "transducer";
break;
}
case 19: {
const modelDir = "sherpa-onnx-nemo-ctc-giga-am-russian-2024-10-24";
c.nemoCtc.model = `${modelDir}/model.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
break;
}
case 20: {
const modelDir = "sherpa-onnx-nemo-transducer-giga-am-russian-2024-10-24";
c.transducer.encoder = `${modelDir}/encoder.int8.onnx`;
c.transducer.decoder = `${modelDir}/decoder.onnx`;
c.transducer.joiner = `${modelDir}/joiner.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
c.modelType = "nemo_transducer";
break;
}
case 21: {
const modelDir = "sherpa-onnx-moonshine-tiny-en-int8";
c.moonshine.preprocessor = `${modelDir}/preprocess.onnx`;
c.moonshine.encoder = `${modelDir}/encode.int8.onnx`;
c.moonshine.uncachedDecoder = `${modelDir}/uncached_decode.int8.onnx`;
c.moonshine.cachedDecoder = `${modelDir}/cached_decode.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
break;
}
case 22: {
const modelDir = "sherpa-onnx-moonshine-base-en-int8";
c.moonshine.preprocessor = `${modelDir}/preprocess.onnx`;
c.moonshine.encoder = `${modelDir}/encode.int8.onnx`;
c.moonshine.uncachedDecoder = `${modelDir}/uncached_decode.int8.onnx`;
c.moonshine.cachedDecoder = `${modelDir}/cached_decode.int8.onnx`;
c.tokens = `${modelDir}/tokens.txt`;
break;
}
}
console.log(`Please specify a supported type. Given type ${type}`);
return c;
}