Add on-device real-time ASR demo for HarmonyOS (#1606)
This commit is contained in:
@@ -0,0 +1,43 @@
|
||||
import AbilityConstant from '@ohos.app.ability.AbilityConstant';
|
||||
import hilog from '@ohos.hilog';
|
||||
import UIAbility from '@ohos.app.ability.UIAbility';
|
||||
import Want from '@ohos.app.ability.Want';
|
||||
import window from '@ohos.window';
|
||||
|
||||
export default class EntryAbility extends UIAbility {
|
||||
onCreate(want: Want, launchParam: AbilityConstant.LaunchParam): void {
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onCreate');
|
||||
}
|
||||
|
||||
onDestroy(): void {
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onDestroy');
|
||||
}
|
||||
|
||||
onWindowStageCreate(windowStage: window.WindowStage): void {
|
||||
// Main window is created, set main page for this ability
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageCreate');
|
||||
|
||||
windowStage.loadContent('pages/Index', (err) => {
|
||||
if (err.code) {
|
||||
hilog.error(0x0000, 'testTag', 'Failed to load the content. Cause: %{public}s', JSON.stringify(err) ?? '');
|
||||
return;
|
||||
}
|
||||
hilog.info(0x0000, 'testTag', 'Succeeded in loading the content.');
|
||||
});
|
||||
}
|
||||
|
||||
onWindowStageDestroy(): void {
|
||||
// Main window is destroyed, release UI related resources
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onWindowStageDestroy');
|
||||
}
|
||||
|
||||
onForeground(): void {
|
||||
// Ability has brought to foreground
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onForeground');
|
||||
}
|
||||
|
||||
onBackground(): void {
|
||||
// Ability has back to background
|
||||
hilog.info(0x0000, 'testTag', '%{public}s', 'Ability onBackground');
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
import hilog from '@ohos.hilog';
|
||||
import BackupExtensionAbility, { BundleVersion } from '@ohos.application.BackupExtensionAbility';
|
||||
|
||||
export default class EntryBackupAbility extends BackupExtensionAbility {
|
||||
async onBackup() {
|
||||
hilog.info(0x0000, 'testTag', 'onBackup ok');
|
||||
}
|
||||
|
||||
async onRestore(bundleVersion: BundleVersion) {
|
||||
hilog.info(0x0000, 'testTag', 'onRestore ok %{public}s', JSON.stringify(bundleVersion));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,428 @@
|
||||
import { LengthUnit } from '@kit.ArkUI';
|
||||
import worker, { MessageEvents } from '@ohos.worker';
|
||||
import { BusinessError } from '@kit.BasicServicesKit';
|
||||
import { picker } from '@kit.CoreFileKit';
|
||||
import systemTime from '@ohos.systemTime';
|
||||
import { Permissions } from '@kit.AbilityKit';
|
||||
import { allAllowed, requestPermissions } from './Permission';
|
||||
import { audio } from '@kit.AudioKit';
|
||||
import fs from '@ohos.file.fs';
|
||||
|
||||
|
||||
function savePcmToWav(filename: string, samples: Int16Array, sampleRate: number) {
|
||||
const fp = fs.openSync(filename, fs.OpenMode.READ_WRITE | fs.OpenMode.CREATE);
|
||||
|
||||
const header = new ArrayBuffer(44);
|
||||
const view = new DataView(header);
|
||||
|
||||
// http://soundfile.sapp.org/doc/WaveFormat/
|
||||
// F F I R
|
||||
view.setUint32(0, 0x46464952, true); // chunkID
|
||||
view.setUint32(4, 36 + samples.length * 2, true); // chunkSize // E V A W
|
||||
view.setUint32(8, 0x45564157, true); // format // // t m f
|
||||
view.setUint32(12, 0x20746d66, true); // subchunk1ID
|
||||
view.setUint32(16, 16, true); // subchunk1Size, 16 for PCM
|
||||
view.setUint32(20, 1, true); // audioFormat, 1 for PCM
|
||||
view.setUint16(22, 1, true); // numChannels: 1 channel
|
||||
view.setUint32(24, sampleRate, true); // sampleRate
|
||||
view.setUint32(28, sampleRate * 2, true); // byteRate
|
||||
view.setUint16(32, 2, true); // blockAlign
|
||||
view.setUint16(34, 16, true); // bitsPerSample
|
||||
view.setUint32(36, 0x61746164, true); // Subchunk2ID
|
||||
view.setUint32(40, samples.length * 2, true); // subchunk2Size
|
||||
|
||||
fs.writeSync(fp.fd, new Uint8Array(header).buffer, { length: header.byteLength });
|
||||
fs.writeSync(fp.fd, samples.buffer, { length: samples.buffer.byteLength });
|
||||
|
||||
fs.closeSync(fp.fd);
|
||||
}
|
||||
|
||||
function toInt16Samples(samples: Float32Array): Int16Array {
|
||||
const int16Samples = new Int16Array(samples.length);
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
let s = samples[i] * 32767;
|
||||
s = s > 32767 ? 32767 : s;
|
||||
s = s < -32768 ? -32768 : s;
|
||||
int16Samples[i] = s;
|
||||
}
|
||||
|
||||
return int16Samples;
|
||||
}
|
||||
|
||||
|
||||
@Entry
|
||||
@Component
|
||||
struct Index {
|
||||
@State title: string = 'Next-gen Kaldi: Real-time speech recognition';
|
||||
@State titleFontSize: number = 15;
|
||||
@State currentIndex: number = 0;
|
||||
@State lang: string = 'English';
|
||||
@State resultForFile: string = ''
|
||||
@State resultForMic: string = ''
|
||||
@State selectFileBtnEnabled: boolean = false;
|
||||
@State micBtnCaption: string = 'Start';
|
||||
@State micStarted: boolean = false;
|
||||
@State micAllowed: boolean = false;
|
||||
@State micBtnEnabled: boolean = false;
|
||||
@State micSaveBtnCaption: string = 'Save recorded audio';
|
||||
@State micSaveBtnEnabled: boolean = false;
|
||||
@State info: string = '';
|
||||
@State micInfo: string = '';
|
||||
@State micInitDone: boolean = false;
|
||||
private resultListForMic: string[] = [];
|
||||
private controller: TabsController = new TabsController();
|
||||
private workerInstance?: worker.ThreadWorker
|
||||
private readonly scriptURL: string = 'entry/ets/workers/StreamingAsrWorker.ets'
|
||||
private startTime: number = 0;
|
||||
private stopTime: number = 0;
|
||||
private sampleRate: number = 48000;
|
||||
private sampleList: Float32Array[] = []
|
||||
private mic?: audio.AudioCapturer;
|
||||
|
||||
flatten(samples: Float32Array[]): Float32Array {
|
||||
let n = 0;
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
n += samples[i].length;
|
||||
}
|
||||
|
||||
const ans: Float32Array = new Float32Array(n);
|
||||
let offset: number = 0;
|
||||
for (let i = 0; i < samples.length; ++i) {
|
||||
ans.set(samples[i], offset);
|
||||
offset += samples[i].length;
|
||||
}
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
async initMic() {
|
||||
const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
|
||||
let allowed: boolean = await allAllowed(permissions);
|
||||
if (!allowed) {
|
||||
console.log("request to access the microphone");
|
||||
const status: boolean = await requestPermissions(permissions);
|
||||
|
||||
if (!status) {
|
||||
console.error('access to microphone is denied')
|
||||
this.resultForMic = "Failed to get microphone permission. Please retry";
|
||||
return;
|
||||
}
|
||||
|
||||
allowed = await allAllowed(permissions);
|
||||
if (!allowed) {
|
||||
console.error('failed to get microphone permission');
|
||||
this.resultForMic = "Failed to get microphone permission. Please retry";
|
||||
return;
|
||||
}
|
||||
this.micAllowed = true;
|
||||
} else {
|
||||
console.log("allowed to access microphone");
|
||||
this.micAllowed = true;
|
||||
}
|
||||
|
||||
const audioStreamInfo: audio.AudioStreamInfo = {
|
||||
samplingRate: this.sampleRate,
|
||||
channels: audio.AudioChannel.CHANNEL_1,
|
||||
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
|
||||
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW,
|
||||
};
|
||||
|
||||
const audioCapturerInfo: audio.AudioCapturerInfo = {
|
||||
source: audio.SourceType.SOURCE_TYPE_MIC, capturerFlags: 0
|
||||
};
|
||||
|
||||
const audioCapturerOptions: audio.AudioCapturerOptions = {
|
||||
streamInfo: audioStreamInfo, capturerInfo: audioCapturerInfo
|
||||
|
||||
};
|
||||
audio.createAudioCapturer(audioCapturerOptions, (err, data) => {
|
||||
if (err) {
|
||||
console.error(`error code is ${err.code}, error message is ${err.message}`);
|
||||
this.resultForMic = 'Failed to init microphone';
|
||||
} else {
|
||||
console.info(`init mic successfully`);
|
||||
this.mic = data;
|
||||
this.mic.on('readData', this.micCallback);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
async aboutToAppear() {
|
||||
this.workerInstance = new worker.ThreadWorker(this.scriptURL, {
|
||||
name: 'Streaming ASR worker'
|
||||
});
|
||||
|
||||
this.workerInstance.onmessage = (e: MessageEvents) => {
|
||||
const msgType = e.data['msgType'] as string;
|
||||
console.log(`received msg from worker: ${msgType}`);
|
||||
|
||||
if (msgType == 'init-streaming-asr-done') {
|
||||
this.selectFileBtnEnabled = true;
|
||||
this.micBtnEnabled = true;
|
||||
this.info = `Initializing done.\n\nPlease select a wave file of 16kHz in language ${this.lang}`;
|
||||
this.micInfo = `Initializing done.\n\nPlease click Start and speak`;
|
||||
}
|
||||
|
||||
if (msgType == 'streaming-asr-decode-file-done') {
|
||||
const text = e.data['text'] as string;
|
||||
this.resultForFile = text;
|
||||
this.selectFileBtnEnabled = true;
|
||||
|
||||
systemTime.getRealTime((err, data) => {
|
||||
if (err) {
|
||||
console.log('Failed to get stop time');
|
||||
} else {
|
||||
this.stopTime = data;
|
||||
|
||||
const audioDuration = e.data['duration'] as number;
|
||||
const elapsedSeconds = (this.stopTime - this.startTime) / 1000;
|
||||
const RTF = elapsedSeconds / audioDuration;
|
||||
this.info = `Audio duration: ${audioDuration.toFixed(2)} s
|
||||
Elapsed: ${elapsedSeconds.toFixed(2)} s
|
||||
RTF = ${elapsedSeconds.toFixed(2)}/${audioDuration.toFixed(2)} = ${RTF.toFixed(3)}
|
||||
`;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (msgType == 'streaming-asr-decode-mic-result') {
|
||||
const text = e.data['text'] as string;
|
||||
if (text.trim() == '') {
|
||||
return;
|
||||
}
|
||||
|
||||
const isEndpoint = e.data['isEndpoint'] as boolean;
|
||||
|
||||
let s = '';
|
||||
let i = 0;
|
||||
for (; i < this.resultListForMic.length; ++i) {
|
||||
s += `${i}: ${this.resultListForMic[i]}\n`
|
||||
}
|
||||
|
||||
s += `${i}: ${text}`;
|
||||
this.resultForMic = s;
|
||||
|
||||
if (isEndpoint) {
|
||||
this.resultListForMic.push(text);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const context = getContext();
|
||||
this.workerInstance.postMessage({ msgType: 'init-streaming-asr', context });
|
||||
this.info = 'Initializing ASR model.\nPlease wait';
|
||||
this.micInfo = 'Initializing ASR model.\nPlease wait';
|
||||
|
||||
await this.initMic();
|
||||
}
|
||||
|
||||
@Builder
|
||||
TabBuilder(title: string, targetIndex: number, selectedImg: Resource, normalImg: Resource) {
|
||||
Column() {
|
||||
Image(this.currentIndex == targetIndex ? selectedImg : normalImg).size({ width: 25, height: 25 })
|
||||
Text(title).fontColor(this.currentIndex == targetIndex ? '#28bff1' : '#8a8a8a')
|
||||
}.width('100%').height(50).justifyContent(FlexAlign.Center).onClick(() => {
|
||||
this.currentIndex = targetIndex;
|
||||
this.controller.changeIndex(this.currentIndex);
|
||||
})
|
||||
}
|
||||
|
||||
build() {
|
||||
Column() {
|
||||
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
|
||||
TabContent() {
|
||||
Column({ space: 10 }) {
|
||||
Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
|
||||
Button('Select .wav file (16kHz) ')
|
||||
.enabled(this.selectFileBtnEnabled)
|
||||
.fontSize(13)
|
||||
.width(296)
|
||||
.height(60)
|
||||
.onClick(() => {
|
||||
this.resultForFile = '';
|
||||
this.info = '';
|
||||
this.selectFileBtnEnabled = false;
|
||||
|
||||
const documentSelectOptions = new picker.DocumentSelectOptions();
|
||||
documentSelectOptions.maxSelectNumber = 1;
|
||||
documentSelectOptions.fileSuffixFilters = ['.wav'];
|
||||
const documentViewPicker = new picker.DocumentViewPicker();
|
||||
|
||||
documentViewPicker.select(documentSelectOptions).then((result: Array<string>) => {
|
||||
console.log(`select file result: ${result}`);
|
||||
|
||||
if (!result[0]) {
|
||||
this.resultForFile = 'Please select a file to decode';
|
||||
this.selectFileBtnEnabled = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if (this.workerInstance) {
|
||||
systemTime.getRealTime((err, data) => {
|
||||
if (err) {
|
||||
console.log('Failed to get start time');
|
||||
} else {
|
||||
this.startTime = data;
|
||||
}
|
||||
});
|
||||
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'streaming-asr-decode-file', filename: result[0],
|
||||
});
|
||||
this.info = `Decoding ${result[0]} ... ...`;
|
||||
} else {
|
||||
console.log(`this worker instance is undefined ${this.workerInstance}`);
|
||||
}
|
||||
|
||||
}).catch((err: BusinessError) => {
|
||||
console.error(`Failed to select file, code is ${err.code}, message is ${err.message}`);
|
||||
this.selectFileBtnEnabled = true;
|
||||
})
|
||||
})
|
||||
|
||||
Text(`Supported languages: ${this.lang}`);
|
||||
if (this.info != '') {
|
||||
TextArea({ text: this.info }).focusable(false);
|
||||
}
|
||||
TextArea({ text: this.resultForFile })
|
||||
.width('100%')
|
||||
.lineSpacing({ value: 10, unit: LengthUnit.VP })
|
||||
.height('100%');
|
||||
}
|
||||
}.tabBar(this.TabBuilder('From file', 0, $r('app.media.icon_doc'), $r('app.media.icon_doc')))
|
||||
|
||||
TabContent() {
|
||||
Column({ space: 10 }) {
|
||||
Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
|
||||
Button(this.micBtnCaption)
|
||||
.enabled(this.micBtnEnabled)
|
||||
.fontSize(13)
|
||||
.width(296)
|
||||
.height(60)
|
||||
.onClick(() => {
|
||||
this.micInfo = '';
|
||||
if (this.mic) {
|
||||
if (this.micStarted) {
|
||||
this.micStarted = false;
|
||||
this.micBtnCaption = 'Start';
|
||||
this.mic.stop();
|
||||
this.micSaveBtnEnabled = true;
|
||||
|
||||
if (this.workerInstance) {
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'streaming-asr-decode-mic-stop'
|
||||
});
|
||||
}
|
||||
} else {
|
||||
this.micStarted = true;
|
||||
this.micSaveBtnEnabled = false;
|
||||
this.micBtnCaption = 'Stop';
|
||||
this.resultForMic = '';
|
||||
this.resultListForMic = [];
|
||||
|
||||
if (this.workerInstance) {
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'streaming-asr-decode-mic-start'
|
||||
});
|
||||
}
|
||||
|
||||
this.sampleList = [];
|
||||
this.mic.start();
|
||||
}
|
||||
}
|
||||
});
|
||||
Button(this.micSaveBtnCaption)
|
||||
.enabled(this.micSaveBtnEnabled)
|
||||
.fontSize(13)
|
||||
.width(296)
|
||||
.height(60)
|
||||
.onClick(() => {
|
||||
if (this.sampleList.length == 0) {
|
||||
this.micSaveBtnEnabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
const samples = this.flatten(this.sampleList);
|
||||
|
||||
if (samples.length == 0) {
|
||||
this.micSaveBtnEnabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
let uri: string = '';
|
||||
|
||||
|
||||
const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
|
||||
|
||||
const audioViewPicker = new picker.AudioViewPicker();
|
||||
|
||||
audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
|
||||
uri = audioSelectResult[0];
|
||||
savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
|
||||
console.log(`Saved to ${uri}`);
|
||||
this.micInfo += `\nSaved to ${uri}`;
|
||||
});
|
||||
|
||||
})
|
||||
|
||||
|
||||
Text(`Supported languages: ${this.lang}`)
|
||||
|
||||
if (this.micInfo != '') {
|
||||
TextArea({ text: this.micInfo })
|
||||
.focusable(false);
|
||||
}
|
||||
|
||||
TextArea({ text: this.resultForMic })
|
||||
.width('100%')
|
||||
.lineSpacing({ value: 10, unit: LengthUnit.VP })
|
||||
.width('100%')
|
||||
.height('100%');
|
||||
}
|
||||
}.tabBar(this.TabBuilder('From mic', 1, $r('app.media.icon_mic'), $r('app.media.icon_mic')))
|
||||
|
||||
|
||||
TabContent() {
|
||||
Column({ space: 10 }) {
|
||||
Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
|
||||
TextArea({
|
||||
text: `
|
||||
Everyting is open-sourced.
|
||||
|
||||
It runs locally, without accessing the network
|
||||
|
||||
See also https://github.com/k2-fsa/sherpa-onnx
|
||||
|
||||
新一代 Kaldi QQ 和微信交流群: 请看
|
||||
|
||||
https://k2-fsa.github.io/sherpa/social-groups.html
|
||||
|
||||
微信公众号: 新一代 Kaldi
|
||||
`
|
||||
}).width('100%').height('100%').focusable(false)
|
||||
}.justifyContent(FlexAlign.Start)
|
||||
}.tabBar(this.TabBuilder('Help', 2, $r('app.media.info'), $r('app.media.info')))
|
||||
}.scrollable(false)
|
||||
}.width('100%')
|
||||
}
|
||||
|
||||
private micCallback = (buffer: ArrayBuffer) => {
|
||||
const view: Int16Array = new Int16Array(buffer);
|
||||
|
||||
const samplesFloat: Float32Array = new Float32Array(view.length);
|
||||
for (let i = 0; i < view.length; ++i) {
|
||||
samplesFloat[i] = view[i] / 32768.0;
|
||||
}
|
||||
|
||||
this.sampleList.push(samplesFloat);
|
||||
|
||||
if (this.workerInstance) {
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'streaming-asr-decode-mic-samples',
|
||||
samples: samplesFloat,
|
||||
sampleRate: this.sampleRate,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,26 @@
|
||||
// This file is modified from
|
||||
// https://gitee.com/ukSir/hmchat2/blob/master/entry/src/main/ets/utils/permissionMananger.ets
|
||||
import { abilityAccessCtrl, bundleManager, common, Permissions } from '@kit.AbilityKit';
|
||||
|
||||
export function allAllowed(permissions: Permissions[]): boolean {
|
||||
if (permissions.length == 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
|
||||
|
||||
const bundleInfo = bundleManager.getBundleInfoForSelfSync(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION);
|
||||
|
||||
let tokenID: number = bundleInfo.appInfo.accessTokenId;
|
||||
|
||||
return permissions.every(permission => abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED ==
|
||||
mgr.checkAccessTokenSync(tokenID, permission));
|
||||
}
|
||||
|
||||
export async function requestPermissions(permissions: Permissions[]): Promise<boolean> {
|
||||
const mgr: abilityAccessCtrl.AtManager = abilityAccessCtrl.createAtManager();
|
||||
const context: Context = getContext() as common.UIAbilityContext;
|
||||
|
||||
const result = await mgr.requestPermissionsFromUser(context, permissions);
|
||||
return result.authResults.length > 0 && result.authResults.every(authResults => authResults == 0);
|
||||
}
|
||||
@@ -0,0 +1,294 @@
|
||||
import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
|
||||
import {
|
||||
OnlineModelConfig,
|
||||
OnlineRecognizer,
|
||||
OnlineRecognizerConfig,
|
||||
OnlineStream,
|
||||
readWaveFromBinary,
|
||||
Samples
|
||||
} from 'sherpa_onnx';
|
||||
import { fileIo } from '@kit.CoreFileKit';
|
||||
|
||||
const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
|
||||
|
||||
|
||||
let recognizer: OnlineRecognizer;
|
||||
let micStream: OnlineStream;
|
||||
|
||||
function getModelConfig(type: number): OnlineModelConfig {
|
||||
const modelConfig = new OnlineModelConfig();
|
||||
switch (type) {
|
||||
case 0: {
|
||||
const modelDir = 'sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20';
|
||||
modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'zipformer';
|
||||
break;
|
||||
}
|
||||
|
||||
case 1: {
|
||||
const modelDir = 'sherpa-onnx-lstm-zh-2023-02-20';
|
||||
modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-11-avg-1.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-11-avg-1.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-11-avg-1.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'lstm';
|
||||
break;
|
||||
}
|
||||
|
||||
case 2: {
|
||||
const modelDir = 'sherpa-onnx-lstm-en-2023-02-17';
|
||||
modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'lstm';
|
||||
break;
|
||||
}
|
||||
|
||||
case 3: {
|
||||
const modelDir = 'icefall-asr-zipformer-streaming-wenetspeech-20230615';
|
||||
modelConfig.transducer.encoder = `${modelDir}/exp/encoder-epoch-12-avg-4-chunk-16-left-128.int8.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/data/lang_char/tokens.txt`;
|
||||
modelConfig.modelType = 'zipformer2';
|
||||
break;
|
||||
}
|
||||
|
||||
case 4: {
|
||||
const modelDir = 'icefall-asr-zipformer-streaming-wenetspeech-20230615';
|
||||
modelConfig.transducer.encoder = `${modelDir}/exp/encoder-epoch-12-avg-4-chunk-16-left-128.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/exp/decoder-epoch-12-avg-4-chunk-16-left-128.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/exp/joiner-epoch-12-avg-4-chunk-16-left-128.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/data/lang_char/tokens.txt`;
|
||||
modelConfig.modelType = 'zipformer2';
|
||||
break;
|
||||
}
|
||||
|
||||
case 5: {
|
||||
const modelDir = 'sherpa-onnx-streaming-paraformer-bilingual-zh-en';
|
||||
modelConfig.paraformer.encoder = `${modelDir}/encoder.int8.onnx`;
|
||||
modelConfig.paraformer.decoder = `${modelDir}/decoder.int8.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'paraformer';
|
||||
break;
|
||||
}
|
||||
|
||||
case 6: {
|
||||
const modelDir = 'sherpa-onnx-streaming-zipformer-en-2023-06-26';
|
||||
modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1-chunk-16-left-128.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1-chunk-16-left-128.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'zipformer2';
|
||||
break;
|
||||
}
|
||||
|
||||
case 7: {
|
||||
const modelDir = 'sherpa-onnx-streaming-zipformer-fr-2023-04-14';
|
||||
modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-29-avg-9-with-averaged-model.int8.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-29-avg-9-with-averaged-model.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-29-avg-9-with-averaged-model.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'zipformer';
|
||||
break;
|
||||
}
|
||||
|
||||
case 8: {
|
||||
const modelDir = 'sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20';
|
||||
modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'zipformer';
|
||||
break;
|
||||
}
|
||||
|
||||
case 9: {
|
||||
const modelDir = 'sherpa-onnx-streaming-zipformer-zh-14M-2023-02-23'
|
||||
modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'zipformer';
|
||||
break;
|
||||
}
|
||||
|
||||
case 10: {
|
||||
const modelDir = 'sherpa-onnx-streaming-zipformer-en-20M-2023-02-17';
|
||||
modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'zipformer';
|
||||
break;
|
||||
}
|
||||
|
||||
case 14: {
|
||||
const modelDir = 'sherpa-onnx-streaming-zipformer-korean-2024-06-16';
|
||||
modelConfig.transducer.encoder = `${modelDir}/encoder-epoch-99-avg-1.int8.onnx`;
|
||||
modelConfig.transducer.decoder = `${modelDir}/decoder-epoch-99-avg-1.onnx`;
|
||||
modelConfig.transducer.joiner = `${modelDir}/joiner-epoch-99-avg-1.int8.onnx`;
|
||||
modelConfig.tokens = `${modelDir}/tokens.txt`;
|
||||
modelConfig.modelType = 'zipformer';
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
console.log(`Please specify a supported type. Given type ${type}`);
|
||||
}
|
||||
}
|
||||
return modelConfig;
|
||||
}
|
||||
|
||||
function initStreamingAsr(context: Context): OnlineRecognizer {
|
||||
let type: number;
|
||||
|
||||
/*
|
||||
|
||||
If you use type = 8, then you should have the following directory structure in the rawfile directory
|
||||
|
||||
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
|
||||
/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxStreamingAsr/entry/src/main/resources/rawfile
|
||||
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls
|
||||
sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ tree .
|
||||
.
|
||||
└── sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
||||
├── decoder-epoch-99-avg-1.onnx
|
||||
├── encoder-epoch-99-avg-1.int8.onnx
|
||||
├── joiner-epoch-99-avg-1.int8.onnx
|
||||
└── tokens.txt
|
||||
|
||||
1 directory, 4 files
|
||||
|
||||
You can download model files from
|
||||
https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
|
||||
|
||||
Note that please delete files that are not used. Otherwise, you APP will be very large
|
||||
due to containing unused large files.
|
||||
|
||||
*/
|
||||
type = 8;
|
||||
|
||||
const config: OnlineRecognizerConfig = new OnlineRecognizerConfig();
|
||||
config.modelConfig = getModelConfig(type);
|
||||
config.modelConfig.debug = true;
|
||||
config.modelConfig.numThreads = 2;
|
||||
config.enableEndpoint = true;
|
||||
|
||||
return new OnlineRecognizer(config, context.resourceManager);
|
||||
}
|
||||
|
||||
interface DecodeFileResult {
|
||||
text: string;
|
||||
duration: number;
|
||||
}
|
||||
|
||||
function decodeFile(filename: string): DecodeFileResult {
|
||||
const fp = fileIo.openSync(filename);
|
||||
const stat = fileIo.statSync(fp.fd);
|
||||
const arrayBuffer = new ArrayBuffer(stat.size);
|
||||
fileIo.readSync(fp.fd, arrayBuffer);
|
||||
const data: Uint8Array = new Uint8Array(arrayBuffer);
|
||||
const wave: Samples = readWaveFromBinary(data) as Samples;
|
||||
console.log(`Sample rate: ${wave.sampleRate}`);
|
||||
|
||||
const stream = recognizer.createStream();
|
||||
stream.acceptWaveform(wave);
|
||||
const tailPadding = new Float32Array(0.5 * wave.sampleRate);
|
||||
tailPadding.fill(0);
|
||||
|
||||
stream.acceptWaveform({ samples: tailPadding, sampleRate: wave.sampleRate });
|
||||
|
||||
while (recognizer.isReady(stream)) {
|
||||
recognizer.decode(stream);
|
||||
}
|
||||
|
||||
const audioDuration = wave.samples.length / wave.sampleRate;
|
||||
|
||||
return { text: recognizer.getResult(stream).text, duration: audioDuration };
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the event handler to be called when the worker thread receives a message sent by the host thread.
|
||||
* The event handler is executed in the worker thread.
|
||||
*
|
||||
* @param e message data
|
||||
*/
|
||||
workerPort.onmessage = (e: MessageEvents) => {
|
||||
const msgType = e.data['msgType'] as string;
|
||||
|
||||
if (msgType != 'streaming-asr-decode-mic-samples') {
|
||||
console.log(`from the main thread, msg-type: ${msgType}`);
|
||||
}
|
||||
|
||||
if (msgType == 'init-streaming-asr' && !recognizer) {
|
||||
console.log('initializing streaming ASR...');
|
||||
const context = e.data['context'] as Context;
|
||||
recognizer = initStreamingAsr(context);
|
||||
console.log('streaming ASR is initialized. ');
|
||||
workerPort.postMessage({ 'msgType': 'init-streaming-asr-done' });
|
||||
}
|
||||
|
||||
if (msgType == 'streaming-asr-decode-file') {
|
||||
const filename = e.data['filename'] as string;
|
||||
console.log(`decoding ${filename}`);
|
||||
const result = decodeFile(filename);
|
||||
workerPort.postMessage({
|
||||
'msgType': 'streaming-asr-decode-file-done', text: result.text, duration: result.duration
|
||||
});
|
||||
}
|
||||
|
||||
if (msgType == 'streaming-asr-decode-mic-start') {
|
||||
micStream = recognizer.createStream();
|
||||
}
|
||||
|
||||
if (msgType == 'streaming-asr-decode-mic-stop') { // nothing to do
|
||||
}
|
||||
|
||||
if (msgType == 'streaming-asr-decode-mic-samples') {
|
||||
const samples = e.data['samples'] as Float32Array;
|
||||
const sampleRate = e.data['sampleRate'] as number;
|
||||
|
||||
micStream.acceptWaveform({ samples, sampleRate });
|
||||
while (recognizer.isReady(micStream)) {
|
||||
recognizer.decode(micStream);
|
||||
|
||||
let isEndpoint = false;
|
||||
let text = recognizer.getResult(micStream).text;
|
||||
|
||||
if (recognizer.isEndpoint(micStream)) {
|
||||
isEndpoint = true;
|
||||
recognizer.reset(micStream);
|
||||
}
|
||||
|
||||
if (text.trim() != '') {
|
||||
workerPort.postMessage({
|
||||
'msgType': 'streaming-asr-decode-mic-result', text: text, isEndpoint: isEndpoint,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the event handler to be called when the worker receives a message that cannot be deserialized.
|
||||
* The event handler is executed in the worker thread.
|
||||
*
|
||||
* @param e message data
|
||||
*/
|
||||
workerPort.onmessageerror = (e: MessageEvents) => {
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines the event handler to be called when an exception occurs during worker execution.
|
||||
* The event handler is executed in the worker thread.
|
||||
*
|
||||
* @param e error message
|
||||
*/
|
||||
workerPort.onerror = (e: ErrorEvent) => {
|
||||
}
|
||||
Reference in New Issue
Block a user