diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets index e32b4eb7..5aef4d9a 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Index.ets @@ -3,47 +3,127 @@ import worker, { MessageEvents } from '@ohos.worker'; import { BusinessError } from '@kit.BasicServicesKit'; import { picker } from '@kit.CoreFileKit'; +import { Permissions } from '@kit.AbilityKit'; +import { allAllowed, requestPermissions } from './Permission'; +import { audio } from '@kit.AudioKit'; + @Entry @Component struct Index { @State currentIndex: number = 0; - @State resultFromFile: string = ''; + @State resultForFile: string = ''; @State progressForFile: number = 0; @State selectFileBtnEnabled: boolean = false; - @State message: string = 'To be implemented'; @State lang: string = 'English'; + @State resultForMic: string = ''; + @State micStarted: boolean = false; + @State message: string = 'Start recording'; + @State micInitDone: boolean = false; private controller: TabsController = new TabsController(); private workerInstance?: worker.ThreadWorker private readonly scriptURL: string = 'entry/ets/workers/NonStreamingAsrWithVadWorker.ets' + private mic?: audio.AudioCapturer; + private sampleList: Float32Array[] = [] - aboutToAppear(): void { + flatten(samples: Float32Array[]): Float32Array { + let n = 0; + for (let i = 0; i < samples.length; ++i) { + n += samples[i].length; + } + + const ans: Float32Array = new Float32Array(n); + let offset: number = 0; + for (let i = 0; i < samples.length; ++i) { + ans.set(samples[i], offset); + offset += samples[i].length; + } + + return ans; + } + + async initMic() { + const permissions: Permissions[] = ["ohos.permission.MICROPHONE"]; + let allowed: boolean = await allAllowed(permissions); + if (!allowed) { + requestPermissions(permissions); + console.log("request to access the microphone"); + + allowed = await allAllowed(permissions); + if (!allowed) { + console.error('failed to get microphone permission'); + this.resultForMic = "Failed to get microphone permission. Please retry"; + return; + } + } else { + console.log("allowed to access microphone"); + } + + const audioStreamInfo: audio.AudioStreamInfo = { + samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, + channels: audio.AudioChannel.CHANNEL_1, + sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE, + encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW, + }; + + const audioCapturerInfo: audio.AudioCapturerInfo = { + source: audio.SourceType.SOURCE_TYPE_MIC, + capturerFlags: 0 + }; + + const audioCapturerOptions: audio.AudioCapturerOptions = { + streamInfo: audioStreamInfo, + capturerInfo: audioCapturerInfo + + }; + audio.createAudioCapturer(audioCapturerOptions, (err, data) => { + if (err) { + console.error(`error code is ${err.code}, error message is ${err.message}`); + this.resultForMic = 'Failed to init microphone'; + } else { + console.info(`init mic successfully`); + this.mic = data; + this.mic.on('readData', this.micCallback); + + if (this.workerInstance) { + this.workerInstance.postMessage({ msgType: 'init-vad-mic', context: getContext() }); + } + } + }); + } + + async aboutToAppear() { this.workerInstance = new worker.ThreadWorker(this.scriptURL, { name: 'NonStreaming ASR worker' }); this.workerInstance.onmessage = (e: MessageEvents) => { const msgType = e.data['msgType'] as string; - console.log(`received data ${msgType}`); + console.log(`received msg from worker: ${msgType}`); + + if (msgType == 'init-vad-mic-done') { + this.micInitDone = true; + } if (msgType == 'init-non-streaming-asr-done') { this.selectFileBtnEnabled = true; + this.resultForFile = `Initializing done.\n\nPlease select a wave file of 16kHz in language ${this.lang}`; } if (msgType == 'non-streaming-asr-vad-decode-done') { - this.resultFromFile = e.data['text'] as string + '\n'; + this.resultForFile = e.data['text'] as string + '\n'; } if (msgType == 'non-streaming-asr-vad-decode-partial') { - if (this.resultFromFile == '') { - this.resultFromFile = e.data['text'] as string; + if (this.resultForFile == '') { + this.resultForFile = e.data['text'] as string; } else { - this.resultFromFile += '\n\n' + e.data['text'] as string; + this.resultForFile += '\n\n' + e.data['text'] as string; } } if (msgType == 'non-streaming-asr-vad-decode-error') { - this.resultFromFile = e.data['text'] as string; + this.resultForFile = e.data['text'] as string; } if (msgType == 'non-streaming-asr-vad-decode-progress') { @@ -51,11 +131,26 @@ struct Index { this.selectFileBtnEnabled = this.progressForFile >= 100; } + + if (msgType == 'non-streaming-asr-vad-mic-partial') { + if (this.resultForMic == '') { + this.resultForMic = e.data['text'] as string; + } else { + this.resultForMic += '\n\n' + e.data['text'] as string; + } + } + + if (msgType == 'non-streaming-asr-vad-mic-error') { + this.resultForMic = e.data['text'] as string; + } } const context = getContext(); + this.resultForFile = 'Initializing models'; this.workerInstance.postMessage({ msgType: 'init-vad', context }); this.workerInstance.postMessage({ msgType: 'init-non-streaming-asr', context }); + + await this.initMic(); } @Builder @@ -86,13 +181,13 @@ struct Index { .lineHeight(41) .fontWeight(500) - Button('Select .wav file ') + Button('Select .wav file (16kHz) ') .enabled(this.selectFileBtnEnabled) .fontSize(13) .width(296) .height(60) .onClick(() => { - this.resultFromFile = ''; + this.resultForFile = ''; this.progressForFile = 0; const documentSelectOptions = new picker.DocumentSelectOptions(); @@ -103,7 +198,7 @@ struct Index { console.log(`Result: ${result}`); if (!result[0]) { - this.resultFromFile = 'Please select a file to decode'; + this.resultForFile = 'Please select a file to decode'; this.selectFileBtnEnabled = true; return; } @@ -135,7 +230,7 @@ struct Index { }.width('100%').justifyContent(FlexAlign.Center) } - TextArea({ text: this.resultFromFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP }); + TextArea({ text: this.resultForFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP }); } .alignItems(HorizontalAlign.Center) @@ -144,10 +239,50 @@ struct Index { TabContent() { Column() { - Text(this.message) - .fontSize(50) - .fontWeight(FontWeight.Bold); + Button(this.message) + .enabled(this.micInitDone) + .onClick(() => { + console.log('clicked mic button'); + this.resultForMic = ''; + if (this.mic) { + if (this.micStarted) { + this.mic.stop(); + this.message = "Start recording"; + this.micStarted = false; + console.log('mic stopped'); + + const samples = this.flatten(this.sampleList); + let s = 0; + for (let i = 0; i < samples.length; ++i) { + s += samples[i]; + } + console.log(`samples ${samples.length}, sum: ${s}`); + + if (this.workerInstance) { + console.log('decode mic'); + this.workerInstance.postMessage({ + msgType: 'non-streaming-asr-vad-mic', + samples, + }); + } else { + console.log(`this worker instance is undefined ${this.workerInstance}`); + } + } else { + this.sampleList = []; + this.mic.start(); + this.message = "Stop recording"; + this.micStarted = true; + console.log('mic started'); + } + } + }); + + Text(`Supported languages: ${this.lang}`) + + TextArea({ text: this.resultForMic }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP }); } + .alignItems(HorizontalAlign.Center) + .justifyContent(FlexAlign.Start) } .tabBar(this.TabBuilder('From mic', 1, $r('app.media.ic_public_input_voice'), $r('app.media.ic_public_input_voice_default'))) @@ -170,4 +305,14 @@ struct Index { .width('100%') .justifyContent(FlexAlign.Start) } + + private micCallback = (buffer: ArrayBuffer) => { + const view: Int16Array = new Int16Array(buffer); + + const samplesFloat: Float32Array = new Float32Array(view.length); + for (let i = 0; i < view.length; ++i) { + samplesFloat[i] = view[i] / 32768.0; + } + this.sampleList.push(samplesFloat); + } } \ No newline at end of file diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets index a6524248..4a1af646 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/NonStreamingAsrModels.ets @@ -229,9 +229,10 @@ export function getOfflineModelConfig(type: number): OfflineModelConfig { break; } + default: { + console.log(`Please specify a supported type. Given type ${type}`); + } } - console.log(`Please specify a supported type. Given type ${type}`); - return c; } diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Permission.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Permission.ets new file mode 100644 index 00000000..40ef391a --- /dev/null +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/pages/Permission.ets @@ -0,0 +1,26 @@ +// This file is modified from +// https://gitee.com/ukSir/hmchat2/blob/master/entry/src/main/ets/utils/permissionMananger.ets +import { abilityAccessCtrl, bundleManager, common, Permissions } from '@kit.AbilityKit'; + +export function allAllowed(permissions: Permissions[]): boolean { + if (permissions.length == 0) { + return false; + } + + const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager(); + + const bundleInfo = bundleManager.getBundleInfoForSelfSync(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION); + + let tokenID: number = bundleInfo.appInfo.accessTokenId; + + return permissions.every(permission => abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED == + mgr.checkAccessTokenSync(tokenID, permission)); +} + +export async function requestPermissions(permissions: Permissions[]): Promise { + const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager(); + const context: Context = getContext() as common.UIAbilityContext; + + const result = await mgr.requestPermissionsFromUser(context, permissions); + return result.authResults.length > 0 && result.authResults.every(authResults => authResults == 0); +} \ No newline at end of file diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets index 346be595..3076183d 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/ets/workers/NonStreamingAsrWithVadWorker.ets @@ -13,11 +13,13 @@ import { import { Context } from '@kit.AbilityKit'; import { fileIo } from '@kit.CoreFileKit'; import { getOfflineModelConfig } from '../pages/NonStreamingAsrModels'; +import { BusinessError } from '@kit.BasicServicesKit'; const workerPort: ThreadWorkerGlobalScope = worker.workerPort; let recognizer: OfflineRecognizer; let vad: Vad; // vad for decoding files +let vadMic: Vad; // vad for mic function initVad(context: Context): Vad { let mgr = context.resourceManager; @@ -73,7 +75,7 @@ interface Wave { sampleRate: number; } -function decode(filename: string): string { +function decodeFile(filename: string): string { vad.reset(); const fp = fileIo.openSync(filename); @@ -83,6 +85,9 @@ function decode(filename: string): string { const data: Uint8Array = new Uint8Array(arrayBuffer); const wave: Wave = readWaveFromBinary(data); + if (wave.sampleRate != 16000) { + return `the sample rate in ${filename} is not 16000Hz. Given: ${wave.sampleRate}Hz.\nPlease select a wav file of 16kHz.`; + } console.log(`sample rate ${wave.sampleRate}`); console.log(`samples length ${wave.samples.length}`); @@ -130,6 +135,47 @@ function decode(filename: string): string { return resultList.join('\n\n'); } +function decodeMic(samples: Float32Array) { + const resultList: string[] = []; + + const windowSize: number = vad.config.sileroVad.windowSize; + for (let i = 0; i < samples.length; i += windowSize) { + const thisWindow: Float32Array = samples.subarray(i, i + windowSize) + vad.acceptWaveform(thisWindow); + if (i + windowSize >= samples.length) { + vad.flush(); + } + while (!vad.isEmpty()) { + const segment: SpeechSegment = vad.front(); + const _startTime: number = (segment.start / 16000); + const _endTime: number = _startTime + segment.samples.length / 16000; + + if (_endTime - _startTime < 0.2) { + vad.pop(); + continue; + } + + const startTime: string = _startTime.toFixed(2); + const endTime: string = _endTime.toFixed(2); + + const stream: OfflineStream = recognizer.createStream(); + stream.acceptWaveform({ samples: segment.samples, sampleRate: 16000 }); + recognizer.decode(stream); + const result: OnlineRecognizerResult = recognizer.getResult(stream); + + const text: string = `${startTime} -- ${endTime} ${result.text}` + resultList.push(text); + console.log(`partial result ${text}`); + + workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-mic-partial', text }); + + vad.pop(); + } + } + + return resultList.join('\n\n'); +} + /** * Defines the event handler to be called when the worker thread receives a message sent by the host thread. * The event handler is executed in the worker thread. @@ -146,6 +192,13 @@ workerPort.onmessage = (e: MessageEvents) => { workerPort.postMessage({ 'msgType': 'init-vad-done' }); } + if (msgType == 'init-vad-mic' && !vadMic) { + const context = e.data['context'] as Context; + vadMic = initVad(context); + console.log('init vad mic done'); + workerPort.postMessage({ 'msgType': 'init-vad-mic-done' }); + } + if (msgType == 'init-non-streaming-asr' && !recognizer) { const context = e.data['context'] as Context; recognizer = initNonStreamingAsr(context); @@ -157,7 +210,7 @@ workerPort.onmessage = (e: MessageEvents) => { const filename = e.data['filename'] as string; console.log(`decoding ${filename}`); try { - const text = decode(filename); + const text = decodeFile(filename); workerPort.postMessage({ msgType: 'non-streaming-asr-vad-decode-done', text }); } catch (e) { workerPort.postMessage({ msgType: 'non-streaming-asr-vad-decode-error', text: `Failed to decode ${filename}` }); @@ -165,6 +218,17 @@ workerPort.onmessage = (e: MessageEvents) => { workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-progress', progress: 100 }); } + + if (msgType == 'non-streaming-asr-vad-mic') { + const samples: Float32Array = e.data['samples'] as Float32Array; + vadMic.reset(); + try { + const text = decodeMic(samples); + workerPort.postMessage({ msgType: 'non-streaming-asr-vad-mic-done', text }); + } catch (e) { + workerPort.postMessage({ msgType: 'non-streaming-asr-vad-mic-error', text: `Failed to decode` }); + } + } } /** diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5 b/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5 index a1cea8b6..e8c24aeb 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5 +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/module.json5 @@ -47,6 +47,18 @@ } ], } + ], + "requestPermissions": [ + { + "name": "ohos.permission.MICROPHONE", + "reason": "$string:mic_reason", + "usedScene": { + "abilities": [ + "FormAbility", + ], + "when": "always", + } + } ] } } \ No newline at end of file diff --git a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json index 3b3015be..09e201b5 100644 --- a/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json +++ b/harmony-os/SherpaOnnxVadAsr/entry/src/main/resources/base/element/string.json @@ -11,6 +11,10 @@ { "name": "EntryAbility_label", "value": "VAD_ASR" + }, + { + "name": "mic_reason", + "value": "access the microhone for speech recognition" } ] } \ No newline at end of file diff --git a/scripts/hap/build-hap-vad-asr.sh.in b/scripts/hap/build-hap-vad-asr.sh.in index d4fabf06..7020d927 100644 --- a/scripts/hap/build-hap-vad-asr.sh.in +++ b/scripts/hap/build-hap-vad-asr.sh.in @@ -90,7 +90,7 @@ hvigorw assembleHap --mode module -p product=default -p buildMode=release --no-d ls -lh ./entry/build/default/outputs/default/entry-default-unsigned.hap -in_file=./entry/build/default/outputs/default/entry-default-unsigned.hap +in_file=$PWD/entry/build/default/outputs/default/entry-default-unsigned.hap out_file=$PWD/entry/build/default/outputs/default/entry-default-signed.hap java -jar $jar sign-app -keyAlias "$HAP_KEY_ALIAS" -signAlg "SHA256withECDSA" -mode "localSign" \ @@ -100,11 +100,12 @@ java -jar $jar sign-app -keyAlias "$HAP_KEY_ALIAS" -signAlg "SHA256withECDSA" -m ls -l $in_file $out_file ls -lh $in_file $out_file -rm $in_file rm -rf ./entry/src/main/resources/rawfile/$model_name popd -mv $out_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap +# Use unsigned hap +mv $in_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap +# mv $out_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap ls -lh haps