Add missing changes about speaker identfication demo for HarmonyOS (#1612)
This commit is contained in:
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"meta": {
|
||||
"stableOrder": true
|
||||
},
|
||||
"lockfileVersion": 3,
|
||||
"ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
|
||||
"specifiers": {
|
||||
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
|
||||
"sherpa_onnx@sherpa_onnx_2.har": "sherpa_onnx@sherpa_onnx_2.har"
|
||||
},
|
||||
"packages": {
|
||||
"libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
|
||||
"name": "libsherpa_onnx.so",
|
||||
"version": "1.0.0",
|
||||
"resolved": "../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
|
||||
"registryType": "local"
|
||||
},
|
||||
"sherpa_onnx@sherpa_onnx_2.har": {
|
||||
"name": "sherpa_onnx",
|
||||
"version": "1.10.33",
|
||||
"resolved": "sherpa_onnx_2.har",
|
||||
"registryType": "local",
|
||||
"dependencies": {
|
||||
"libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -72,7 +72,7 @@ struct Index {
|
||||
|
||||
@State currentIndex: number = 0;
|
||||
|
||||
@State message: string = 'Hello World';
|
||||
private threshold: string = '0.5';
|
||||
|
||||
private workerInstance?: worker.ThreadWorker
|
||||
private readonly scriptURL: string = 'entry/ets/workers/SpeakerIdentificationWorker.ets'
|
||||
@@ -83,15 +83,21 @@ struct Index {
|
||||
@State btnSaveAudioEnabled: boolean = false;
|
||||
@State btnAddEnabled: boolean = false;
|
||||
|
||||
private sampleRate: number = 16000;
|
||||
private sampleList: Float32Array[] = []
|
||||
private sampleRate: number = 48000;
|
||||
private sampleListForAdding: Float32Array[] = []
|
||||
private sampleListForTesting: Float32Array[] = []
|
||||
private mic?: audio.AudioCapturer;
|
||||
|
||||
@State infoHome: string = '';
|
||||
@State infoAdd: string = '';
|
||||
|
||||
@State micBtnCaption: string = 'Start recording';
|
||||
@State micStarted: boolean = false;
|
||||
@State micBtnCaptionForAdding: string = 'Start recording';
|
||||
@State micStartedForAdding: boolean = false;
|
||||
@State micBtnEnabledForAdding: boolean = true;
|
||||
|
||||
@State micBtnCaptionForTesting: string = 'Start recording';
|
||||
@State micStartedForTesting: boolean = false;
|
||||
@State micBtnEnabledForTesting: boolean = true;
|
||||
|
||||
async initMic() {
|
||||
const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
|
||||
@@ -158,6 +164,23 @@ struct Index {
|
||||
if (msgType == 'manager-all-speaker-names') {
|
||||
this.allSpeakerNames = e.data['allSpeakers'] as string[];
|
||||
}
|
||||
|
||||
if (msgType == 'manager-add-speaker-done') {
|
||||
const ok: boolean = e.data['ok'] as boolean;
|
||||
const status: string = e.data['status'] as string;
|
||||
this.infoAdd += '\n' + status;
|
||||
|
||||
if (ok) {
|
||||
this.sampleListForAdding = [];
|
||||
this.btnSaveAudioEnabled = false;
|
||||
this.btnAddEnabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (msgType == 'manager-search-speaker-done') {
|
||||
const name = e.data['name'] as string;
|
||||
this.infoHome = name;
|
||||
}
|
||||
};
|
||||
|
||||
this.workerInstance.postMessage({ msgType: 'init-extractor', context: getContext()});
|
||||
@@ -181,7 +204,97 @@ struct Index {
|
||||
Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
|
||||
TabContent() {
|
||||
Column({ space: 10 }) {
|
||||
Button('Home')
|
||||
Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
|
||||
Row() {
|
||||
Text('Similary threshold').width('60%');
|
||||
|
||||
TextInput({ text: this.threshold }).onChange((text) => {
|
||||
this.threshold = text.trim();
|
||||
}).width('20%')
|
||||
}
|
||||
Row() {
|
||||
Button(this.micBtnCaptionForTesting)
|
||||
.enabled(this.micBtnEnabledForTesting)
|
||||
.onClick(()=>{
|
||||
if (this.allSpeakerNames.length == 0) {
|
||||
this.infoHome = 'There are no speakers registered. Please add them first';
|
||||
return;
|
||||
}
|
||||
|
||||
let threshold = parseFloat(this.threshold);
|
||||
if (isNaN(threshold)) {
|
||||
this.infoHome = 'Please enter a valid threshold';
|
||||
return;
|
||||
}
|
||||
|
||||
if (threshold <= 0) {
|
||||
this.infoHome = 'Please enter a positive threshold';
|
||||
return;
|
||||
}
|
||||
console.log(`threshold: ${threshold}`);
|
||||
|
||||
if (this.micStartedForTesting) {
|
||||
this.micStartedForTesting = false;
|
||||
this.micBtnCaptionForTesting = 'Start';
|
||||
this.micBtnEnabledForAdding = true;
|
||||
this.mic?.stop();
|
||||
|
||||
const samples = flatten(this.sampleListForTesting);
|
||||
const duration = samples.length / this.sampleRate;
|
||||
if (duration < 0.5) {
|
||||
this.infoHome = `Please speak for a longer time! Current duration: ${duration}`;
|
||||
return;
|
||||
}
|
||||
if (this.workerInstance) {
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'manager-search-speaker',
|
||||
samples: samples,
|
||||
sampleRate: this.sampleRate,
|
||||
threshold,
|
||||
});
|
||||
}
|
||||
} else {
|
||||
this.sampleListForTesting = [];
|
||||
this.micStartedForTesting = true;
|
||||
this.micBtnCaptionForTesting = 'Stop';
|
||||
this.micBtnEnabledForAdding = false;
|
||||
this.mic?.start();
|
||||
this.infoHome = `Use threshold: ${threshold}`;
|
||||
this.infoHome += '\nPlease speak and then click Stop';
|
||||
}
|
||||
})
|
||||
|
||||
Button('Save audio')
|
||||
.enabled(!this.micStartedForTesting)
|
||||
.onClick(()=>{
|
||||
if (this.sampleListForTesting.length == 0) {
|
||||
this.infoHome = 'No audio samples recorded';
|
||||
return;
|
||||
}
|
||||
const samples = flatten(this.sampleListForTesting);
|
||||
|
||||
if (samples.length == 0) {
|
||||
this.infoHome = 'Empty samples';
|
||||
return;
|
||||
}
|
||||
|
||||
let uri: string = '';
|
||||
|
||||
const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
|
||||
|
||||
const audioViewPicker = new picker.AudioViewPicker();
|
||||
|
||||
audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
|
||||
uri = audioSelectResult[0];
|
||||
savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
|
||||
console.log(`Saved to ${uri}`);
|
||||
this.infoHome+= `\nSaved to ${uri}`;
|
||||
});
|
||||
})
|
||||
}
|
||||
TextArea({text: this.infoHome})
|
||||
.height('100%')
|
||||
.focusable(false)
|
||||
}
|
||||
}.tabBar(this.TabBuilder('Home', 0, $r('app.media.icon_home'), $r('app.media.icon_home')))
|
||||
|
||||
@@ -244,22 +357,25 @@ struct Index {
|
||||
}.width('100%')
|
||||
|
||||
Row({space: 10}) {
|
||||
Button(this.micBtnCaption)
|
||||
Button(this.micBtnCaptionForAdding)
|
||||
.enabled(this.micBtnEnabledForAdding)
|
||||
.onClick(()=> {
|
||||
if (this.mic) {
|
||||
if (this.micStarted) {
|
||||
this.micStarted = false;
|
||||
this.micBtnCaption = 'Start recording';
|
||||
if (this.micStartedForAdding) {
|
||||
this.micStartedForAdding = false;
|
||||
this.micBtnEnabledForTesting = true;
|
||||
this.micBtnCaptionForAdding = 'Start recording';
|
||||
this.mic.stop();
|
||||
this.infoAdd = '';
|
||||
if (this.sampleList.length > 0) {
|
||||
if (this.sampleListForAdding.length > 0) {
|
||||
this.btnAddEnabled = true;
|
||||
this.btnSaveAudioEnabled = true;
|
||||
}
|
||||
} else {
|
||||
this.micStarted = true;
|
||||
this.micBtnCaption = 'Stop recording';
|
||||
this.sampleList = [];
|
||||
this.micStartedForAdding = true;
|
||||
this.micBtnEnabledForTesting = false;
|
||||
this.micBtnCaptionForAdding = 'Stop recording';
|
||||
this.sampleListForAdding = [];
|
||||
this.mic.start();
|
||||
this.infoAdd = '';
|
||||
|
||||
@@ -267,30 +383,41 @@ struct Index {
|
||||
this.btnSaveAudioEnabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
})
|
||||
|
||||
Button('Add')
|
||||
.enabled(this.btnAddEnabled)
|
||||
.onClick(()=>{
|
||||
if (this.inputSpeakerName.trim() == '') {
|
||||
this.infoAdd += 'Please input a speaker name first';
|
||||
this.infoAdd += '\nPlease input a speaker name first';
|
||||
return;
|
||||
}
|
||||
|
||||
const samples = flatten(this.sampleList);
|
||||
console.log(`number of samples: ${samples.length}, ${samples.length / this.sampleRate}`);
|
||||
const samples = flatten(this.sampleListForAdding);
|
||||
const duration = samples.length / this.sampleRate;
|
||||
if (duration < 0.5) {
|
||||
this.infoAdd = `Please speak for a longer time. Current duration: ${duration}`;
|
||||
return;
|
||||
}
|
||||
if (this.workerInstance) {
|
||||
this.workerInstance.postMessage({
|
||||
msgType: 'manager-add-speaker',
|
||||
name: this.inputSpeakerName,
|
||||
samples: samples,
|
||||
sampleRate: this.sampleRate,
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
Button('Save audio')
|
||||
.enabled(this.btnSaveAudioEnabled)
|
||||
.onClick(()=>{
|
||||
if (this.sampleList.length == 0) {
|
||||
if (this.sampleListForAdding.length == 0) {
|
||||
this.btnSaveAudioEnabled = false;
|
||||
return;
|
||||
}
|
||||
|
||||
const samples = flatten(this.sampleList);
|
||||
const samples = flatten(this.sampleListForAdding);
|
||||
|
||||
if (samples.length == 0) {
|
||||
this.btnSaveAudioEnabled = false;
|
||||
@@ -352,6 +479,12 @@ https://k2-fsa.github.io/sherpa/social-groups.html
|
||||
samplesFloat[i] = view[i] / 32768.0;
|
||||
}
|
||||
|
||||
this.sampleList.push(samplesFloat);
|
||||
if (this.micStartedForAdding) {
|
||||
this.sampleListForAdding.push(samplesFloat);
|
||||
}
|
||||
|
||||
if (this.micStartedForTesting) {
|
||||
this.sampleListForTesting.push(samplesFloat);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker';
|
||||
import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
|
||||
import {
|
||||
OnlineStream,
|
||||
readWaveFromBinary,
|
||||
Samples,
|
||||
SpeakerEmbeddingExtractor,
|
||||
SpeakerEmbeddingExtractorConfig,
|
||||
SpeakerEmbeddingManager
|
||||
} from 'sherpa_onnx';
|
||||
import { fileIo } from '@kit.CoreFileKit';
|
||||
|
||||
const workerPort: ThreadWorkerGlobalScope = worker.workerPort;
|
||||
|
||||
@@ -19,7 +19,19 @@ function readWaveFromRawfile(filename: string, context: Context): Samples {
|
||||
}
|
||||
|
||||
function initExtractor(context: Context): SpeakerEmbeddingExtractor {
|
||||
const config = new SpeakerEmbeddingExtractorConfig();
|
||||
const config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
|
||||
|
||||
// Please put the model file inside the directory
|
||||
// harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
|
||||
/*
|
||||
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
|
||||
/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
|
||||
(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh
|
||||
total 77336
|
||||
-rw-r--r-- 1 fangjun staff 38M Dec 9 19:34 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
|
||||
*/
|
||||
// You can find more models at
|
||||
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
|
||||
config.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
|
||||
config.numThreads = 2;
|
||||
config.debug = true;
|
||||
@@ -28,7 +40,7 @@ function initExtractor(context: Context): SpeakerEmbeddingExtractor {
|
||||
}
|
||||
|
||||
function extractEmbedding(samples: Samples): Float32Array {
|
||||
const stream = extractor.createStream();
|
||||
const stream: OnlineStream = extractor.createStream();
|
||||
stream.acceptWaveform(samples);
|
||||
return extractor.compute(stream);
|
||||
}
|
||||
@@ -49,30 +61,6 @@ workerPort.onmessage = (e: MessageEvents) => {
|
||||
extractor = initExtractor(context);
|
||||
manager = new SpeakerEmbeddingManager(extractor.dim);
|
||||
|
||||
const filename1 = 'sr-data/enroll/fangjun-sr-1.wav';
|
||||
const samples1 = readWaveFromRawfile(filename1, context);
|
||||
console.log(`sample rate: ${samples1.sampleRate}`);
|
||||
let ok = manager.add({ name: 'fangjun0', v: extractEmbedding(samples1) });
|
||||
ok = manager.add({ name: 'fangjun1', v: extractEmbedding(samples1) });
|
||||
/*
|
||||
ok = manager.add({ name: 'fangjun2', v: extractEmbedding(samples1) });
|
||||
ok = manager.add({ name: 'fangjun3', v: extractEmbedding(samples1) });
|
||||
ok = manager.add({ name: 'fangjun4', v: extractEmbedding(samples1) });
|
||||
ok = manager.add({ name: 'fangjun5', v: extractEmbedding(samples1) });
|
||||
ok = manager.add({ name: 'fangjun6', v: extractEmbedding(samples1) });
|
||||
ok = manager.add({ name: 'fangjun7', v: extractEmbedding(samples1) });
|
||||
ok = manager.add({ name: 'fangjun8', v: extractEmbedding(samples1) });
|
||||
ok = manager.add({ name: 'fangjun9', v: extractEmbedding(samples1) });
|
||||
ok = manager.add({ name: 'fangjun10', v: extractEmbedding(samples1) });
|
||||
*/
|
||||
|
||||
if (ok) {
|
||||
console.log(`Added fangjun`);
|
||||
let n = manager.getNumSpeakers();
|
||||
console.log(`number of speakers: ${n}`);
|
||||
console.log(`speaker names: ${manager.getAllSpeakerNames().join('\n')}`);
|
||||
}
|
||||
|
||||
workerPort.postMessage({
|
||||
msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
|
||||
});
|
||||
@@ -80,7 +68,7 @@ workerPort.onmessage = (e: MessageEvents) => {
|
||||
|
||||
if (msgType == 'manager-delete-speaker') {
|
||||
const name = e.data['name'] as string;
|
||||
const ok = manager.remove(name);
|
||||
const ok: boolean = manager.remove(name);
|
||||
if (ok) {
|
||||
console.log(`Removed ${name}.`);
|
||||
|
||||
@@ -92,6 +80,48 @@ workerPort.onmessage = (e: MessageEvents) => {
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (msgType == 'manager-add-speaker') {
|
||||
const name = e.data['name'] as string;
|
||||
const samples = e.data['samples'] as Float32Array;
|
||||
const sampleRate = e.data['sampleRate'] as number;
|
||||
|
||||
const v = extractEmbedding({ samples, sampleRate });
|
||||
const ok: boolean = manager.add({ name, v });
|
||||
if (ok) {
|
||||
workerPort.postMessage({
|
||||
msgType: 'manager-add-speaker-done',
|
||||
status: `Added ${name}`,
|
||||
ok,
|
||||
});
|
||||
workerPort.postMessage({
|
||||
msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
|
||||
}
|
||||
);
|
||||
} else {
|
||||
workerPort.postMessage({
|
||||
msgType: 'manager-add-speaker-done',
|
||||
status: `Failed to add ${name}. Possibly due to exsiting speaker name. Please recheck`,
|
||||
ok,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (msgType == 'manager-search-speaker') {
|
||||
const threshold = e.data['threshold'] as number;
|
||||
const samples = e.data['samples'] as Float32Array;
|
||||
const sampleRate = e.data['sampleRate'] as number;
|
||||
|
||||
const v = extractEmbedding({ samples, sampleRate });
|
||||
let name: string = manager.search({ threshold, v });
|
||||
if (name == '' || name == undefined) {
|
||||
name = "===<Unknown>===";
|
||||
}
|
||||
workerPort.postMessage({
|
||||
msgType: 'manager-search-speaker-done',
|
||||
name
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -110,4 +140,4 @@ workerPort.onmessageerror = (e: MessageEvents) => {
|
||||
* @param e error message
|
||||
*/
|
||||
workerPort.onerror = (e: ErrorEvent) => {
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user