Add missing changes about speaker identfication demo for HarmonyOS (#1612)

2024-12-11 12:01:13 +08:00
parent e011e849d4
commit 9d4659fd29
3 changed files with 243 additions and 52 deletions
--- a/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/oh-package-lock.json5
@@ -0,0 +1,28 @@
+{
+  "meta": {
+    "stableOrder": true
+  },
+  "lockfileVersion": 3,
+  "ATTENTION": "THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.",
+  "specifiers": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+    "sherpa_onnx@sherpa_onnx_2.har": "sherpa_onnx@sherpa_onnx_2.har"
+  },
+  "packages": {
+    "libsherpa_onnx.so@../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx": {
+      "name": "libsherpa_onnx.so",
+      "version": "1.0.0",
+      "resolved": "../oh_modules/.ohpm/sherpa_onnx@1y+qvabrznvcerrtte4uydjhwfdt7hfnlsk0jsnicmy=/oh_modules/sherpa_onnx/src/main/cpp/types/libsherpa_onnx",
+      "registryType": "local"
+    },
+    "sherpa_onnx@sherpa_onnx_2.har": {
+      "name": "sherpa_onnx",
+      "version": "1.10.33",
+      "resolved": "sherpa_onnx_2.har",
+      "registryType": "local",
+      "dependencies": {
+        "libsherpa_onnx.so": "file:./src/main/cpp/types/libsherpa_onnx"
+      }
+    }
+  }
+}
--- a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/pages/Index.ets
@@ -72,7 +72,7 @@ struct Index {

  @State currentIndex: number = 0;

-  @State message: string = 'Hello World';
+  private threshold: string = '0.5';

  private workerInstance?: worker.ThreadWorker
  private readonly scriptURL: string = 'entry/ets/workers/SpeakerIdentificationWorker.ets'
@@ -83,15 +83,21 @@ struct Index {
  @State btnSaveAudioEnabled: boolean = false;
  @State btnAddEnabled: boolean = false;

-  private sampleRate: number = 16000;
-  private sampleList: Float32Array[] = []
+  private sampleRate: number = 48000;
+  private sampleListForAdding: Float32Array[] = []
+  private sampleListForTesting: Float32Array[] = []
  private mic?: audio.AudioCapturer;

  @State infoHome: string = '';
  @State infoAdd: string = '';

-  @State micBtnCaption: string = 'Start recording';
-  @State micStarted: boolean = false;
+  @State micBtnCaptionForAdding: string = 'Start recording';
+  @State micStartedForAdding: boolean = false;
+  @State micBtnEnabledForAdding: boolean = true;
+
+  @State micBtnCaptionForTesting: string = 'Start recording';
+  @State micStartedForTesting: boolean = false;
+  @State micBtnEnabledForTesting: boolean = true;

  async initMic() {
    const permissions: Permissions[] = ["ohos.permission.MICROPHONE"];
@@ -158,6 +164,23 @@ struct Index {
      if (msgType == 'manager-all-speaker-names') {
        this.allSpeakerNames = e.data['allSpeakers'] as string[];
      }
+
+      if (msgType == 'manager-add-speaker-done') {
+        const ok: boolean = e.data['ok'] as boolean;
+        const status: string = e.data['status'] as string;
+        this.infoAdd += '\n' + status;
+
+        if (ok) {
+          this.sampleListForAdding = [];
+          this.btnSaveAudioEnabled = false;
+          this.btnAddEnabled = false;
+        }
+      }
+
+      if (msgType == 'manager-search-speaker-done') {
+        const name = e.data['name'] as string;
+        this.infoHome = name;
+      }
    };

    this.workerInstance.postMessage({ msgType: 'init-extractor', context: getContext()});
@@ -181,7 +204,97 @@ struct Index {
      Tabs({ barPosition: BarPosition.End, controller: this.controller }) {
        TabContent() {
          Column({ space: 10 }) {
-            Button('Home')
+            Text(this.title).fontSize(this.titleFontSize).fontWeight(FontWeight.Bold);
+            Row() {
+              Text('Similary threshold').width('60%');
+
+              TextInput({ text: this.threshold }).onChange((text) => {
+                this.threshold = text.trim();
+              }).width('20%')
+            }
+            Row() {
+              Button(this.micBtnCaptionForTesting)
+                .enabled(this.micBtnEnabledForTesting)
+                .onClick(()=>{
+                  if (this.allSpeakerNames.length == 0) {
+                    this.infoHome = 'There are no speakers registered. Please add them first';
+                    return;
+                  }
+
+                  let threshold = parseFloat(this.threshold);
+                  if (isNaN(threshold)) {
+                    this.infoHome = 'Please enter a valid threshold';
+                    return;
+                  }
+
+                  if (threshold <= 0) {
+                    this.infoHome = 'Please enter a positive threshold';
+                    return;
+                  }
+                  console.log(`threshold: ${threshold}`);
+
+                  if (this.micStartedForTesting) {
+                    this.micStartedForTesting = false;
+                    this.micBtnCaptionForTesting = 'Start';
+                    this.micBtnEnabledForAdding = true;
+                    this.mic?.stop();
+
+                    const samples = flatten(this.sampleListForTesting);
+                    const duration = samples.length / this.sampleRate;
+                    if (duration < 0.5) {
+                      this.infoHome = `Please speak for a longer time! Current duration: ${duration}`;
+                      return;
+                    }
+                    if (this.workerInstance) {
+                      this.workerInstance.postMessage({
+                        msgType: 'manager-search-speaker',
+                        samples: samples,
+                        sampleRate: this.sampleRate,
+                        threshold,
+                      });
+                    }
+                  } else {
+                    this.sampleListForTesting = [];
+                    this.micStartedForTesting = true;
+                    this.micBtnCaptionForTesting = 'Stop';
+                    this.micBtnEnabledForAdding = false;
+                    this.mic?.start();
+                    this.infoHome = `Use threshold: ${threshold}`;
+                    this.infoHome += '\nPlease speak and then click Stop';
+                  }
+                })
+
+              Button('Save audio')
+                .enabled(!this.micStartedForTesting)
+                .onClick(()=>{
+                  if (this.sampleListForTesting.length == 0) {
+                    this.infoHome = 'No audio samples recorded';
+                    return;
+                  }
+                  const samples = flatten(this.sampleListForTesting);
+
+                  if (samples.length == 0) {
+                    this.infoHome = 'Empty samples';
+                    return;
+                  }
+
+                  let uri: string = '';
+
+                  const audioOptions = new picker.AudioSaveOptions(); // audioOptions.newFileNames = ['o.wav'];
+
+                  const audioViewPicker = new picker.AudioViewPicker();
+
+                  audioViewPicker.save(audioOptions).then((audioSelectResult: Array<string>) => {
+                    uri = audioSelectResult[0];
+                    savePcmToWav(uri, toInt16Samples(samples), this.sampleRate);
+                    console.log(`Saved to ${uri}`);
+                    this.infoHome+= `\nSaved to ${uri}`;
+                  });
+                })
+            }
+            TextArea({text: this.infoHome})
+              .height('100%')
+              .focusable(false)
          }
        }.tabBar(this.TabBuilder('Home', 0, $r('app.media.icon_home'), $r('app.media.icon_home')))

@@ -244,22 +357,25 @@ struct Index {
            }.width('100%')

            Row({space: 10}) {
-              Button(this.micBtnCaption)
+              Button(this.micBtnCaptionForAdding)
+                .enabled(this.micBtnEnabledForAdding)
                .onClick(()=> {
                  if (this.mic) {
-                    if (this.micStarted) {
-                      this.micStarted = false;
-                      this.micBtnCaption = 'Start recording';
+                    if (this.micStartedForAdding) {
+                      this.micStartedForAdding = false;
+                      this.micBtnEnabledForTesting = true;
+                      this.micBtnCaptionForAdding = 'Start recording';
                      this.mic.stop();
                      this.infoAdd = '';
-                      if (this.sampleList.length > 0) {
+                      if (this.sampleListForAdding.length > 0) {
                        this.btnAddEnabled = true;
                        this.btnSaveAudioEnabled = true;
                      }
                    } else {
-                      this.micStarted = true;
-                      this.micBtnCaption = 'Stop recording';
-                      this.sampleList = [];
+                      this.micStartedForAdding = true;
+                      this.micBtnEnabledForTesting = false;
+                      this.micBtnCaptionForAdding = 'Stop recording';
+                      this.sampleListForAdding = [];
                      this.mic.start();
                      this.infoAdd = '';

@@ -267,30 +383,41 @@ struct Index {
                      this.btnSaveAudioEnabled = false;
                    }
                  }
-
                })

              Button('Add')
                .enabled(this.btnAddEnabled)
                .onClick(()=>{
                  if (this.inputSpeakerName.trim() == '') {
-                    this.infoAdd += 'Please input a speaker name first';
+                    this.infoAdd += '\nPlease input a speaker name first';
                    return;
                  }

-                  const samples = flatten(this.sampleList);
-                  console.log(`number of samples: ${samples.length}, ${samples.length / this.sampleRate}`);
+                  const samples = flatten(this.sampleListForAdding);
+                  const duration = samples.length / this.sampleRate;
+                  if (duration < 0.5) {
+                    this.infoAdd = `Please speak for a longer time. Current duration: ${duration}`;
+                    return;
+                  }
+                  if (this.workerInstance) {
+                    this.workerInstance.postMessage({
+                      msgType: 'manager-add-speaker',
+                      name: this.inputSpeakerName,
+                      samples: samples,
+                      sampleRate: this.sampleRate,
+                    })
+                  }
                })

              Button('Save audio')
                .enabled(this.btnSaveAudioEnabled)
                .onClick(()=>{
-                  if (this.sampleList.length == 0) {
+                  if (this.sampleListForAdding.length == 0) {
                    this.btnSaveAudioEnabled = false;
                    return;
                  }

-                  const samples = flatten(this.sampleList);
+                  const samples = flatten(this.sampleListForAdding);

                  if (samples.length == 0) {
                    this.btnSaveAudioEnabled = false;
@@ -352,6 +479,12 @@ https://k2-fsa.github.io/sherpa/social-groups.html
      samplesFloat[i] = view[i] / 32768.0;
    }

-    this.sampleList.push(samplesFloat);
+    if (this.micStartedForAdding) {
+      this.sampleListForAdding.push(samplesFloat);
+    }
+
+    if (this.micStartedForTesting) {
+      this.sampleListForTesting.push(samplesFloat);
+    }
  }
-}
+}
--- a/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets
+++ b/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/ets/workers/SpeakerIdentificationWorker.ets
@@ -1,12 +1,12 @@
-import worker, { ThreadWorkerGlobalScope, MessageEvents, ErrorEvent } from '@ohos.worker';
+import worker, { ErrorEvent, MessageEvents, ThreadWorkerGlobalScope } from '@ohos.worker';
 import {
+  OnlineStream,
  readWaveFromBinary,
  Samples,
  SpeakerEmbeddingExtractor,
  SpeakerEmbeddingExtractorConfig,
  SpeakerEmbeddingManager
 } from 'sherpa_onnx';
-import { fileIo } from '@kit.CoreFileKit';

 const workerPort: ThreadWorkerGlobalScope = worker.workerPort;

@@ -19,7 +19,19 @@ function readWaveFromRawfile(filename: string, context: Context): Samples {
 }

 function initExtractor(context: Context): SpeakerEmbeddingExtractor {
-  const config = new SpeakerEmbeddingExtractorConfig();
+  const config: SpeakerEmbeddingExtractorConfig = new SpeakerEmbeddingExtractorConfig();
+
+  // Please put the model file inside the directory
+  // harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
+/*
+(py38) fangjuns-MacBook-Pro:rawfile fangjun$ pwd
+/Users/fangjun/open-source/sherpa-onnx/harmony-os/SherpaOnnxSpeakerIdentification/entry/src/main/resources/rawfile
+(py38) fangjuns-MacBook-Pro:rawfile fangjun$ ls -lh
+total 77336
+-rw-r--r--  1 fangjun  staff    38M Dec  9 19:34 3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx
+ */
+  // You can find more models at
+  // https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models
  config.model = '3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx';
  config.numThreads = 2;
  config.debug = true;
@@ -28,7 +40,7 @@ function initExtractor(context: Context): SpeakerEmbeddingExtractor {
 }

 function extractEmbedding(samples: Samples): Float32Array {
-  const stream = extractor.createStream();
+  const stream: OnlineStream = extractor.createStream();
  stream.acceptWaveform(samples);
  return extractor.compute(stream);
 }
@@ -49,30 +61,6 @@ workerPort.onmessage = (e: MessageEvents) => {
    extractor = initExtractor(context);
    manager = new SpeakerEmbeddingManager(extractor.dim);

-    const filename1 = 'sr-data/enroll/fangjun-sr-1.wav';
-    const samples1 = readWaveFromRawfile(filename1, context);
-    console.log(`sample rate: ${samples1.sampleRate}`);
-    let ok = manager.add({ name: 'fangjun0', v: extractEmbedding(samples1) });
-    ok = manager.add({ name: 'fangjun1', v: extractEmbedding(samples1) });
-    /*
-    ok = manager.add({ name: 'fangjun2', v: extractEmbedding(samples1) });
-    ok = manager.add({ name: 'fangjun3', v: extractEmbedding(samples1) });
-    ok = manager.add({ name: 'fangjun4', v: extractEmbedding(samples1) });
-    ok = manager.add({ name: 'fangjun5', v: extractEmbedding(samples1) });
-    ok = manager.add({ name: 'fangjun6', v: extractEmbedding(samples1) });
-    ok = manager.add({ name: 'fangjun7', v: extractEmbedding(samples1) });
-    ok = manager.add({ name: 'fangjun8', v: extractEmbedding(samples1) });
-    ok = manager.add({ name: 'fangjun9', v: extractEmbedding(samples1) });
-    ok = manager.add({ name: 'fangjun10', v: extractEmbedding(samples1) });
-    */
-
-    if (ok) {
-      console.log(`Added fangjun`);
-      let n = manager.getNumSpeakers();
-      console.log(`number of speakers: ${n}`);
-      console.log(`speaker names: ${manager.getAllSpeakerNames().join('\n')}`);
-    }
-
    workerPort.postMessage({
      msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
    });
@@ -80,7 +68,7 @@ workerPort.onmessage = (e: MessageEvents) => {

  if (msgType == 'manager-delete-speaker') {
    const name = e.data['name'] as string;
-    const ok = manager.remove(name);
+    const ok: boolean = manager.remove(name);
    if (ok) {
      console.log(`Removed ${name}.`);

@@ -92,6 +80,48 @@ workerPort.onmessage = (e: MessageEvents) => {
      });
    }
  }
+
+  if (msgType == 'manager-add-speaker') {
+    const name = e.data['name'] as string;
+    const samples = e.data['samples'] as Float32Array;
+    const sampleRate = e.data['sampleRate'] as number;
+
+    const v = extractEmbedding({ samples, sampleRate });
+    const ok: boolean = manager.add({ name, v });
+    if (ok) {
+      workerPort.postMessage({
+        msgType: 'manager-add-speaker-done',
+        status: `Added ${name}`,
+        ok,
+      });
+      workerPort.postMessage({
+        msgType: 'manager-all-speaker-names', allSpeakers: manager.getAllSpeakerNames(),
+      }
+      );
+    } else {
+      workerPort.postMessage({
+        msgType: 'manager-add-speaker-done',
+        status: `Failed to add ${name}. Possibly due to exsiting speaker name. Please recheck`,
+        ok,
+      });
+    }
+  }
+
+  if (msgType == 'manager-search-speaker') {
+    const threshold = e.data['threshold'] as number;
+    const samples = e.data['samples'] as Float32Array;
+    const sampleRate = e.data['sampleRate'] as number;
+
+    const v = extractEmbedding({ samples, sampleRate });
+    let name: string = manager.search({ threshold, v });
+    if (name == '' || name == undefined) {
+      name = "===<Unknown>===";
+    }
+    workerPort.postMessage({
+      msgType: 'manager-search-speaker-done',
+      name
+    });
+  }
 }

 /**
@@ -110,4 +140,4 @@ workerPort.onmessageerror = (e: MessageEvents) => {
 * @param e error message
 */
 workerPort.onerror = (e: ErrorEvent) => {
-}
+}