Add speaker identification APIs for node-addon-api (#874)

This commit is contained in:
Fangjun Kuang
2024-05-14 13:28:50 +08:00
committed by GitHub
parent 0895b64850
commit 388e6a98fc
16 changed files with 1034 additions and 3 deletions

View File

@@ -4,6 +4,7 @@ const non_streaming_asr = require('./non-streaming-asr.js');
const non_streaming_tts = require('./non-streaming-tts.js');
const vad = require('./vad.js');
const slid = require('./spoken-language-identification.js');
const sid = require('./speaker-identification.js');
module.exports = {
OnlineRecognizer: streaming_asr.OnlineRecognizer,
@@ -15,4 +16,6 @@ module.exports = {
Vad: vad.Vad,
CircularBuffer: vad.CircularBuffer,
SpokenLanguageIdentification: slid.SpokenLanguageIdentification,
SpeakerEmbeddingExtractor: sid.SpeakerEmbeddingExtractor,
SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager,
}

View File

@@ -0,0 +1,102 @@
const addon = require('./addon.js');
const streaming_asr = require('./streaming-asr.js');
class SpeakerEmbeddingExtractor {
constructor(config) {
this.handle = addon.createSpeakerEmbeddingExtractor(config);
this.config = config;
this.dim = addon.speakerEmbeddingExtractorDim(this.handle);
}
createStream() {
return new streaming_asr.OnlineStream(
addon.speakerEmbeddingExtractorCreateStream(this.handle));
}
isReady(stream) {
return addon.speakerEmbeddingExtractorIsReady(this.handle, stream.handle);
}
// return a float32 array
compute(stream) {
return addon.speakerEmbeddingExtractorComputeEmbedding(
this.handle, stream.handle);
}
}
function flatten(arrayList) {
let n = 0;
for (let i = 0; i < arrayList.length; ++i) {
n += arrayList[i].length;
}
let ans = new Float32Array(n);
let offset = 0;
for (let i = 0; i < arrayList.length; ++i) {
ans.set(arrayList[i], offset);
offset += arrayList[i].length;
}
return ans;
}
class SpeakerEmbeddingManager {
constructor(dim) {
this.handle = addon.createSpeakerEmbeddingManager(dim);
this.dim = dim;
}
/*
obj = {name: "xxx", v: a-float32-array}
*/
add(obj) {
return addon.speakerEmbeddingManagerAdd(this.handle, obj);
}
/*
* obj =
* {name: "xxx", v: [float32_array1, float32_array2, ..., float32_arrayn]
*/
addMulti(obj) {
const c = {
name: obj.name,
vv: flatten(obj.v),
n: obj.v.length,
};
return addon.speakerEmbeddingManagerAddListFlattened(this.handle, c);
}
remove(name) {
return addon.speakerEmbeddingManagerRemove(this.handle, name);
}
/*
* obj = {v: a-float32-array, threshold: a-float }
*/
search(obj) {
return addon.speakerEmbeddingManagerSearch(this.handle, obj);
}
/*
* obj = {name: 'xxx', v: a-float32-array, threshold: a-float }
*/
verify(obj) {
return addon.speakerEmbeddingManagerVerify(this.handle, obj);
}
contains(name) {
return addon.speakerEmbeddingManagerContains(this.handle, name);
}
getNumSpeakers() {
return addon.speakerEmbeddingManagerNumSpeakers(this.handle);
}
getAllSpeakerNames() {
return addon.speakerEmbeddingManagerGetAllSpeakers(this.handle);
}
}
module.exports = {
SpeakerEmbeddingExtractor,
SpeakerEmbeddingManager,
}

View File

@@ -64,5 +64,6 @@ class OnlineRecognizer {
module.exports = {
OnlineRecognizer,
OnlineStream,
Display
}