Add microphone demo about VAD+ASR for HarmonyOS (#1581)
This commit is contained in:
@@ -3,47 +3,127 @@ import worker, { MessageEvents } from '@ohos.worker';
|
||||
import { BusinessError } from '@kit.BasicServicesKit';
|
||||
import { picker } from '@kit.CoreFileKit';
|
||||
|
||||
import { Permissions } from '@kit.AbilityKit';
|
||||
import { allAllowed, requestPermissions } from './Permission';
|
||||
import { audio } from '@kit.AudioKit';
|
||||
|
||||
|
||||
@Entry
|
||||
@Component
|
||||
struct Index {
|
||||
@State currentIndex: number = 0;
|
||||
@State resultFromFile: string = '';
|
||||
@State resultForFile: string = '';
|
||||
@State progressForFile: number = 0;
|
||||
@State selectFileBtnEnabled: boolean = false;
|
||||
@State message: string = 'To be implemented';
|
||||
@State lang: string = 'English';
|
||||
@State resultForMic: string = '';
|
||||
@State micStarted: boolean = false;
|
||||
@State message: string = 'Start recording';
|
||||
@State micInitDone: boolean = false;
|
||||
private controller: TabsController = new TabsController();
|
||||
private workerInstance?: worker.ThreadWorker
|
||||
private readonly scriptURL: string = 'entry/ets/workers/NonStreamingAsrWithVadWorker.ets'
|
||||
private mic?: audio.AudioCapturer;
|
||||
private sampleList: Float32Array[] = []
|
||||
|
||||
aboutToAppear(): void {
|
||||
flatten(samples: Float32Array[]): Float32Array {
|
||||
let n = 0;
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
n += samples[i].length;
|
||||
}
|
||||
|
||||
const ans: Float32Array = new Float32Array(n);
|
||||
let offset: number = 0;
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
ans.set(samples[i], offset);
|
||||
offset += samples[i].length;
|
||||
}
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
async initMic() {
|
||||
const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
|
||||
let allowed: boolean = await allAllowed(permissions);
|
||||
if (!allowed) {
|
||||
requestPermissions(permissions);
|
||||
console.log("request to access the microphone");
|
||||
|
||||
allowed = await allAllowed(permissions);
|
||||
if (!allowed) {
|
||||
console.error('failed to get microphone permission');
|
||||
this.resultForMic = "Failed to get microphone permission. Please retry";
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
console.log("allowed to access microphone");
|
||||
}
|
||||
|
||||
const audioStreamInfo: audio.AudioStreamInfo = {
|
||||
samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000,
|
||||
channels: audio.AudioChannel.CHANNEL_1,
|
||||
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
|
||||
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW,
|
||||
};
|
||||
|
||||
const audioCapturerInfo: audio.AudioCapturerInfo = {
|
||||
source: audio.SourceType.SOURCE_TYPE_MIC,
|
||||
capturerFlags: 0
|
||||
};
|
||||
|
||||
const audioCapturerOptions: audio.AudioCapturerOptions = {
|
||||
streamInfo: audioStreamInfo,
|
||||
capturerInfo: audioCapturerInfo
|
||||
|
||||
};
|
||||
audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
|
||||
if (err) {
|
||||
console.error(`error code is ${err.code}, error message is ${err.message}`);
|
||||
this.resultForMic = 'Failed to init microphone';
|
||||
} else {
|
||||
console.info(`init mic successfully`);
|
||||
this.mic = data;
|
||||
this.mic.on('readData', this.micCallback);
|
||||
|
||||
if (this.workerInstance) {
|
||||
this.workerInstance.postMessage({ msgType: 'init-vad-mic', context: getContext() });
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async aboutToAppear() {
|
||||
this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
|
||||
name: 'NonStreaming ASR worker'
|
||||
});
|
||||
|
||||
this.workerInstance.onmessage = (e: MessageEvents) => {
|
||||
const msgType = e.data['msgType'] as string;
|
||||
console.log(`received data ${msgType}`);
|
||||
console.log(`received msg from worker: ${msgType}`);
|
||||
|
||||
if (msgType == 'init-vad-mic-done') {
|
||||
this.micInitDone = true;
|
||||
}
|
||||
|
||||
if (msgType == 'init-non-streaming-asr-done') {
|
||||
this.selectFileBtnEnabled = true;
|
||||
this.resultForFile = `Initializing done.\n\nPlease select a wave file of 16kHz in language ${this.lang}`;
|
||||
}
|
||||
|
||||
if (msgType == 'non-streaming-asr-vad-decode-done') {
|
||||
this.resultFromFile = e.data['text'] as string + '\n';
|
||||
this.resultForFile = e.data['text'] as string + '\n';
|
||||
}
|
||||
|
||||
if (msgType == 'non-streaming-asr-vad-decode-partial') {
|
||||
if (this.resultFromFile == '') {
|
||||
this.resultFromFile = e.data['text'] as string;
|
||||
if (this.resultForFile == '') {
|
||||
this.resultForFile = e.data['text'] as string;
|
||||
} else {
|
||||
this.resultFromFile += '\n\n' + e.data['text'] as string;
|
||||
this.resultForFile += '\n\n' + e.data['text'] as string;
|
||||
}
|
||||
}
|
||||
|
||||
if (msgType == 'non-streaming-asr-vad-decode-error') {
|
||||
this.resultFromFile = e.data['text'] as string;
|
||||
this.resultForFile = e.data['text'] as string;
|
||||
}
|
||||
|
||||
if (msgType == 'non-streaming-asr-vad-decode-progress') {
|
||||
@@ -51,11 +131,26 @@ struct Index {
|
||||
|
||||
this.selectFileBtnEnabled = this.progressForFile >= 100;
|
||||
}
|
||||
|
||||
if (msgType == 'non-streaming-asr-vad-mic-partial') {
|
||||
if (this.resultForMic == '') {
|
||||
this.resultForMic = e.data['text'] as string;
|
||||
} else {
|
||||
this.resultForMic += '\n\n' + e.data['text'] as string;
|
||||
}
|
||||
}
|
||||
|
||||
if (msgType == 'non-streaming-asr-vad-mic-error') {
|
||||
this.resultForMic = e.data['text'] as string;
|
||||
}
|
||||
}
|
||||
|
||||
const context = getContext();
|
||||
this.resultForFile = 'Initializing models';
|
||||
this.workerInstance.postMessage({ msgType: 'init-vad', context });
|
||||
this.workerInstance.postMessage({ msgType: 'init-non-streaming-asr', context });
|
||||
|
||||
await this.initMic();
|
||||
}
|
||||
|
||||
@Builder
|
||||
@@ -86,13 +181,13 @@ struct Index {
|
||||
.lineHeight(41)
|
||||
.fontWeight(500)
|
||||
|
||||
Button('Select .wav file ')
|
||||
Button('Select .wav file (16kHz) ')
|
||||
.enabled(this.selectFileBtnEnabled)
|
||||
.fontSize(13)
|
||||
.width(296)
|
||||
.height(60)
|
||||
.onClick(() => {
|
||||
this.resultFromFile = '';
|
||||
this.resultForFile = '';
|
||||
this.progressForFile = 0;
|
||||
|
||||
const documentSelectOptions = new picker.DocumentSelectOptions();
|
||||
@@ -103,7 +198,7 @@ struct Index {
|
||||
console.log(`Result: ${result}`);
|
||||
|
||||
if (!result[0]) {
|
||||
this.resultFromFile = 'Please select a file to decode';
|
||||
this.resultForFile = 'Please select a file to decode';
|
||||
this.selectFileBtnEnabled = true;
|
||||
return;
|
||||
}
|
||||
@@ -135,7 +230,7 @@ struct Index {
|
||||
}.width('100%').justifyContent(FlexAlign.Center)
|
||||
}
|
||||
|
||||
TextArea({ text: this.resultFromFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
|
||||
TextArea({ text: this.resultForFile }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
|
||||
|
||||
}
|
||||
.alignItems(HorizontalAlign.Center)
|
||||
@@ -144,10 +239,50 @@ struct Index {
|
||||
|
||||
TabContent() {
|
||||
Column() {
|
||||
Text(this.message)
|
||||
.fontSize(50)
|
||||
.fontWeight(FontWeight.Bold);
|
||||
Button(this.message)
|
||||
.enabled(this.micInitDone)
|
||||
.onClick(() => {
|
||||
console.log('clicked mic button');
|
||||
this.resultForMic = '';
|
||||
if (this.mic) {
|
||||
if (this.micStarted) {
|
||||
this.mic.stop();
|
||||
this.message = "Start recording";
|
||||
this.micStarted = false;
|
||||
console.log('mic stopped');
|
||||
|
||||
const samples = this.flatten(this.sampleList);
|
||||
let s = 0;
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
s += samples[i];
|
||||
}
|
||||
console.log(`samples ${samples.length}, sum: ${s}`);
|
||||
|
||||
if (this.workerInstance) {
|
||||
console.log('decode mic');
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'non-streaming-asr-vad-mic',
|
||||
samples,
|
||||
});
|
||||
} else {
|
||||
console.log(`this worker instance is undefined ${this.workerInstance}`);
|
||||
}
|
||||
} else {
|
||||
this.sampleList = [];
|
||||
this.mic.start();
|
||||
this.message = "Stop recording";
|
||||
this.micStarted = true;
|
||||
console.log('mic started');
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Text(`Supported languages: ${this.lang}`)
|
||||
|
||||
TextArea({ text: this.resultForMic }).width('100%').lineSpacing({ value: 10, unit: LengthUnit.VP });
|
||||
}
|
||||
.alignItems(HorizontalAlign.Center)
|
||||
.justifyContent(FlexAlign.Start)
|
||||
}
|
||||
.tabBar(this.TabBuilder('From mic', 1, $r('app.media.ic_public_input_voice'),
|
||||
$r('app.media.ic_public_input_voice_default')))
|
||||
@@ -170,4 +305,14 @@ struct Index {
|
||||
.width('100%')
|
||||
.justifyContent(FlexAlign.Start)
|
||||
}
|
||||
|
||||
private micCallback = (buffer: ArrayBuffer) => {
|
||||
const view: Int16Array = new Int16Array(buffer);
|
||||
|
||||
const samplesFloat: Float32Array = new Float32Array(view.length);
|
||||
for (let i = 0; i < view.length; ++i) {
|
||||
samplesFloat[i] = view[i] / 32768.0;
|
||||
}
|
||||
this.sampleList.push(samplesFloat);
|
||||
}
|
||||
}
|
||||
@@ -229,9 +229,10 @@ export function getOfflineModelConfig(type: number): OfflineModelConfig {
|
||||
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
console.log(`Please specify a supported type. Given type ${type}`);
|
||||
}
|
||||
}
|
||||
|
||||
console.log(`Please specify a supported type. Given type ${type}`);
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
// This file is modified from
|
||||
// https://gitee.com/ukSir/hmchat2/blob/master/entry/src/main/ets/utils/permissionMananger.ets
|
||||
import { abilityAccessCtrl, bundleManager, common, Permissions } from '@kit.AbilityKit';
|
||||
|
||||
export function allAllowed(permissions: Permissions[]): boolean {
|
||||
if (permissions.length == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
|
||||
|
||||
const bundleInfo = bundleManager.getBundleInfoForSelfSync(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION);
|
||||
|
||||
let tokenID: number = bundleInfo.appInfo.accessTokenId;
|
||||
|
||||
return permissions.every(permission => abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED ==
|
||||
mgr.checkAccessTokenSync(tokenID, permission));
|
||||
}
|
||||
|
||||
export async function requestPermissions(permissions: Permissions[]): Promise<boolean> {
|
||||
const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
|
||||
const context: Context = getContext() as common.UIAbilityContext;
|
||||
|
||||
const result = await mgr.requestPermissionsFromUser(context, permissions);
|
||||
return result.authResults.length > 0 && result.authResults.every(authResults => authResults == 0);
|
||||
}
|
||||
@@ -13,11 +13,13 @@ import {
|
||||
import { Context } from '@kit.AbilityKit';
|
||||
import { fileIo } from '@kit.CoreFileKit';
|
||||
import { getOfflineModelConfig } from '../pages/NonStreamingAsrModels';
|
||||
import { BusinessError } from '@kit.BasicServicesKit';
|
||||
|
||||
const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
|
||||
|
||||
let recognizer: OfflineRecognizer;
|
||||
let vad: Vad; // vad for decoding files
|
||||
let vadMic: Vad; // vad for mic
|
||||
|
||||
function initVad(context: Context): Vad {
|
||||
let mgr = context.resourceManager;
|
||||
@@ -73,7 +75,7 @@ interface Wave {
|
||||
sampleRate: number;
|
||||
}
|
||||
|
||||
function decode(filename: string): string {
|
||||
function decodeFile(filename: string): string {
|
||||
vad.reset();
|
||||
|
||||
const fp = fileIo.openSync(filename);
|
||||
@@ -83,6 +85,9 @@ function decode(filename: string): string {
|
||||
const data: Uint8Array = new Uint8Array(arrayBuffer);
|
||||
|
||||
const wave: Wave = readWaveFromBinary(data);
|
||||
if (wave.sampleRate != 16000) {
|
||||
return `the sample rate in ${filename} is not 16000Hz. Given: ${wave.sampleRate}Hz.\nPlease select a wav file of 16kHz.`;
|
||||
}
|
||||
|
||||
console.log(`sample rate ${wave.sampleRate}`);
|
||||
console.log(`samples length ${wave.samples.length}`);
|
||||
@@ -130,6 +135,47 @@ function decode(filename: string): string {
|
||||
return resultList.join('\n\n');
|
||||
}
|
||||
|
||||
function decodeMic(samples: Float32Array) {
|
||||
const resultList: string[] = [];
|
||||
|
||||
const windowSize: number = vad.config.sileroVad.windowSize;
|
||||
for (let i = 0; i < samples.length; i += windowSize) {
|
||||
const thisWindow: Float32Array = samples.subarray(i, i + windowSize)
|
||||
vad.acceptWaveform(thisWindow);
|
||||
if (i + windowSize >= samples.length) {
|
||||
vad.flush();
|
||||
}
|
||||
while (!vad.isEmpty()) {
|
||||
const segment: SpeechSegment = vad.front();
|
||||
const _startTime: number = (segment.start / 16000);
|
||||
const _endTime: number = _startTime + segment.samples.length / 16000;
|
||||
|
||||
if (_endTime - _startTime < 0.2) {
|
||||
vad.pop();
|
||||
continue;
|
||||
}
|
||||
|
||||
const startTime: string = _startTime.toFixed(2);
|
||||
const endTime: string = _endTime.toFixed(2);
|
||||
|
||||
const stream: OfflineStream = recognizer.createStream();
|
||||
stream.acceptWaveform({ samples: segment.samples, sampleRate: 16000 });
|
||||
recognizer.decode(stream);
|
||||
const result: OnlineRecognizerResult = recognizer.getResult(stream);
|
||||
|
||||
const text: string = `${startTime} -- ${endTime} ${result.text}`
|
||||
resultList.push(text);
|
||||
console.log(`partial result ${text}`);
|
||||
|
||||
workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-mic-partial', text });
|
||||
|
||||
vad.pop();
|
||||
}
|
||||
}
|
||||
|
||||
return resultList.join('\n\n');
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the event handler to be called when the worker thread receives a message sent by the host thread.
|
||||
* The event handler is executed in the worker thread.
|
||||
@@ -146,6 +192,13 @@ workerPort.onmessage = (e: MessageEvents) => {
|
||||
workerPort.postMessage({ 'msgType': 'init-vad-done' });
|
||||
}
|
||||
|
||||
if (msgType == 'init-vad-mic' && !vadMic) {
|
||||
const context = e.data['context'] as Context;
|
||||
vadMic = initVad(context);
|
||||
console.log('init vad mic done');
|
||||
workerPort.postMessage({ 'msgType': 'init-vad-mic-done' });
|
||||
}
|
||||
|
||||
if (msgType == 'init-non-streaming-asr' && !recognizer) {
|
||||
const context = e.data['context'] as Context;
|
||||
recognizer = initNonStreamingAsr(context);
|
||||
@@ -157,7 +210,7 @@ workerPort.onmessage = (e: MessageEvents) => {
|
||||
const filename = e.data['filename'] as string;
|
||||
console.log(`decoding ${filename}`);
|
||||
try {
|
||||
const text = decode(filename);
|
||||
const text = decodeFile(filename);
|
||||
workerPort.postMessage({ msgType: 'non-streaming-asr-vad-decode-done', text });
|
||||
} catch (e) {
|
||||
workerPort.postMessage({ msgType: 'non-streaming-asr-vad-decode-error', text: `Failed to decode ${filename}` });
|
||||
@@ -165,6 +218,17 @@ workerPort.onmessage = (e: MessageEvents) => {
|
||||
|
||||
workerPort.postMessage({ 'msgType': 'non-streaming-asr-vad-decode-progress', progress: 100 });
|
||||
}
|
||||
|
||||
if (msgType == 'non-streaming-asr-vad-mic') {
|
||||
const samples: Float32Array = e.data['samples'] as Float32Array;
|
||||
vadMic.reset();
|
||||
try {
|
||||
const text = decodeMic(samples);
|
||||
workerPort.postMessage({ msgType: 'non-streaming-asr-vad-mic-done', text });
|
||||
} catch (e) {
|
||||
workerPort.postMessage({ msgType: 'non-streaming-asr-vad-mic-error', text: `Failed to decode` });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -47,6 +47,18 @@
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
"requestPermissions": [
|
||||
{
|
||||
"name": "ohos.permission.MICROPHONE",
|
||||
"reason": "$string:mic_reason",
|
||||
"usedScene": {
|
||||
"abilities": [
|
||||
"FormAbility",
|
||||
],
|
||||
"when": "always",
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,10 @@
|
||||
{
|
||||
"name": "EntryAbility_label",
|
||||
"value": "VAD_ASR"
|
||||
},
|
||||
{
|
||||
"name": "mic_reason",
|
||||
"value": "access the microhone for speech recognition"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -90,7 +90,7 @@ hvigorw assembleHap --mode module -p product=default -p buildMode=release --no-d
|
||||
|
||||
ls -lh ./entry/build/default/outputs/default/entry-default-unsigned.hap
|
||||
|
||||
in_file=./entry/build/default/outputs/default/entry-default-unsigned.hap
|
||||
in_file=$PWD/entry/build/default/outputs/default/entry-default-unsigned.hap
|
||||
out_file=$PWD/entry/build/default/outputs/default/entry-default-signed.hap
|
||||
|
||||
java -jar $jar sign-app -keyAlias "$HAP_KEY_ALIAS" -signAlg "SHA256withECDSA" -mode "localSign" \
|
||||
@@ -100,11 +100,12 @@ java -jar $jar sign-app -keyAlias "$HAP_KEY_ALIAS" -signAlg "SHA256withECDSA" -m
|
||||
|
||||
ls -l $in_file $out_file
|
||||
ls -lh $in_file $out_file
|
||||
rm $in_file
|
||||
rm -rf ./entry/src/main/resources/rawfile/$model_name
|
||||
popd
|
||||
|
||||
mv $out_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap
|
||||
# Use unsigned hap
|
||||
mv $in_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap
|
||||
# mv $out_file ./haps/sherpa-onnx-${SHERPA_ONNX_VERSION}-vad_asr-$lang-$short_name.hap
|
||||
|
||||
ls -lh haps
|
||||
|
||||
|
||||
Reference in New Issue
Block a user