diff --git a/.github/scripts/node-addon/run.sh b/.github/scripts/node-addon/run.sh index 0c9e32fe..6b1c8615 100755 --- a/.github/scripts/node-addon/run.sh +++ b/.github/scripts/node-addon/run.sh @@ -18,7 +18,7 @@ fi SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" -# SHERPA_ONNX_VERSION=1.0.20 +# SHERPA_ONNX_VERSION=1.0.21 if [ -z $owner ]; then owner=k2-fsa diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh index 65c8b0e0..e13af102 100755 --- a/.github/scripts/test-nodejs-addon-npm.sh +++ b/.github/scripts/test-nodejs-addon-npm.sh @@ -6,6 +6,16 @@ d=nodejs-addon-examples echo "dir: $d" cd $d +echo "----------speaker identification----------" +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +git clone https://github.com/csukuangfj/sr-data + +node ./test_speaker_identification.js + +rm *.onnx +rm -rf sr-data + echo "----------spoken language identification----------" curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-whisper-tiny.tar.bz2 diff --git a/.github/workflows/npm-addon-linux-aarch64.yaml b/.github/workflows/npm-addon-linux-aarch64.yaml index c8460094..d72e760c 100644 --- a/.github/workflows/npm-addon-linux-aarch64.yaml +++ b/.github/workflows/npm-addon-linux-aarch64.yaml @@ -1,6 +1,9 @@ name: npm-addon-linux-aarch64 on: + push: + branches: + - node-addon workflow_dispatch: concurrency: diff --git a/.github/workflows/npm-addon-linux-x64.yaml b/.github/workflows/npm-addon-linux-x64.yaml index 766c7203..b60853b0 100644 --- a/.github/workflows/npm-addon-linux-x64.yaml +++ b/.github/workflows/npm-addon-linux-x64.yaml @@ -1,6 +1,9 @@ name: npm-addon-linux-x64 on: + push: + branches: + - node-addon workflow_dispatch: concurrency: diff --git a/.github/workflows/npm-addon-macos.yaml b/.github/workflows/npm-addon-macos.yaml index 4ba6ad63..520196e0 100644 --- a/.github/workflows/npm-addon-macos.yaml +++ b/.github/workflows/npm-addon-macos.yaml @@ -1,6 +1,9 @@ name: npm-addon-macos on: + push: + branches: + - node-addon workflow_dispatch: concurrency: diff --git a/.github/workflows/npm-addon-win-x64.yaml b/.github/workflows/npm-addon-win-x64.yaml index 896e6659..53c0cfd7 100644 --- a/.github/workflows/npm-addon-win-x64.yaml +++ b/.github/workflows/npm-addon-win-x64.yaml @@ -1,6 +1,9 @@ name: npm-addon-win-x64 on: + push: + branches: + - node-addon workflow_dispatch: concurrency: diff --git a/.github/workflows/npm-addon.yaml b/.github/workflows/npm-addon.yaml index 712dace3..6a79ae48 100644 --- a/.github/workflows/npm-addon.yaml +++ b/.github/workflows/npm-addon.yaml @@ -1,6 +1,9 @@ name: npm-addon on: + push: + branches: + - node-addon workflow_dispatch: concurrency: @@ -52,7 +55,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - # SHERPA_ONNX_VERSION=1.0.20 + # SHERPA_ONNX_VERSION=1.0.21 src_dir=.github/scripts/node-addon sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json diff --git a/nodejs-addon-examples/README.md b/nodejs-addon-examples/README.md index 1fdb44c6..0d9c73a3 100644 --- a/nodejs-addon-examples/README.md +++ b/nodejs-addon-examples/README.md @@ -201,3 +201,16 @@ node ./test_spoken_language_identification.js npm install naudiodon2 node ./test_vad_spoken_language_identification_microphone.js ``` + +## Speaker identification + +You can find more models at + + +```bash +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx + +git clone https://github.com/csukuangfj/sr-data + +node ./test_speaker_identification.js +``` diff --git a/nodejs-addon-examples/test_asr_non_streaming_nemo_ctc.js b/nodejs-addon-examples/test_asr_non_streaming_nemo_ctc.js index 89991ee5..7f51a024 100644 --- a/nodejs-addon-examples/test_asr_non_streaming_nemo_ctc.js +++ b/nodejs-addon-examples/test_asr_non_streaming_nemo_ctc.js @@ -2,7 +2,6 @@ const sherpa_onnx = require('sherpa-onnx-node'); const performance = require('perf_hooks').performance; - // Please download test files from // https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models const config = { diff --git a/nodejs-addon-examples/test_speaker_identification.js b/nodejs-addon-examples/test_speaker_identification.js new file mode 100644 index 00000000..d39aeb5f --- /dev/null +++ b/nodejs-addon-examples/test_speaker_identification.js @@ -0,0 +1,102 @@ +// Copyright (c) 2024 Xiaomi Corporation +const sherpa_onnx = require('sherpa-onnx-node'); +const assert = require('node:assert'); + +// Please download models files from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models +function createSpeakerEmbeddingExtractor() { + const config = { + model: './3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx', + numThreads: 1, + debug: true, + }; + return new sherpa_onnx.SpeakerEmbeddingExtractor(config); +} + +function computeEmbedding(extractor, filename) { + const stream = extractor.createStream(); + const wave = sherpa_onnx.readWave(filename); + stream.acceptWaveform({sampleRate: wave.sampleRate, samples: wave.samples}); + return extractor.compute(stream); +} + +const extractor = createSpeakerEmbeddingExtractor(); +const manager = new sherpa_onnx.SpeakerEmbeddingManager(extractor.dim); + +// Please download test files from +// https://github.com/csukuangfj/sr-data +const spk1Files = [ + './sr-data/enroll/fangjun-sr-1.wav', + './sr-data/enroll/fangjun-sr-2.wav', + './sr-data/enroll/fangjun-sr-3.wav', +]; + +let spk1Vec = []; +for (let f of spk1Files) { + spk1Vec.push(computeEmbedding(extractor, f)); +} + +const spk2Files = [ + './sr-data/enroll/leijun-sr-1.wav', + './sr-data/enroll/leijun-sr-2.wav', +]; + +let spk2Vec = []; +for (let f of spk2Files) { + spk2Vec.push(computeEmbedding(extractor, f)); +} + +let ok = manager.addMulti({name: 'fangjun', v: spk1Vec}); +assert.equal(ok, true); + +ok = manager.addMulti({name: 'leijun', v: spk2Vec}); +assert.equal(ok, true); + +assert.equal(manager.getNumSpeakers(), 2); + +assert.equal(manager.contains('fangjun'), true); +assert.equal(manager.contains('leijun'), true); + +console.log('---All speakers---'); + +console.log(manager.getAllSpeakerNames()); +console.log('------------'); + +const testFiles = [ + './sr-data/test/fangjun-test-sr-1.wav', + './sr-data/test/leijun-test-sr-1.wav', + './sr-data/test/liudehua-test-sr-1.wav', +]; + +const threshold = 0.6; + +for (let f of testFiles) { + const embedding = computeEmbedding(extractor, f); + + let name = manager.search({v: embedding, threshold: threshold}); + if (name == '') { + name = ''; + } + console.log(`${f}: ${name}`); +} + + +ok = manager.verify({ + name: 'fangjun', + v: computeEmbedding(extractor, testFiles[0]), + threshold: threshold +}); + +assert.equal(ok, true); + +ok = manager.remove('fangjun'); +assert.equal(ok, true); + +ok = manager.verify({ + name: 'fangjun', + v: computeEmbedding(extractor, testFiles[0]), + threshold: threshold +}); +assert.equal(ok, false); + +assert.equal(manager.getNumSpeakers(), 1); diff --git a/scripts/node-addon-api/CMakeLists.txt b/scripts/node-addon-api/CMakeLists.txt index 4ac4969e..ffd888db 100644 --- a/scripts/node-addon-api/CMakeLists.txt +++ b/scripts/node-addon-api/CMakeLists.txt @@ -21,6 +21,7 @@ set(srcs src/non-streaming-asr.cc src/non-streaming-tts.cc src/sherpa-onnx-node-addon-api.cc + src/speaker-identification.cc src/spoken-language-identification.cc src/streaming-asr.cc src/vad.cc diff --git a/scripts/node-addon-api/lib/sherpa-onnx.js b/scripts/node-addon-api/lib/sherpa-onnx.js index 9580e014..31cfd936 100644 --- a/scripts/node-addon-api/lib/sherpa-onnx.js +++ b/scripts/node-addon-api/lib/sherpa-onnx.js @@ -4,6 +4,7 @@ const non_streaming_asr = require('./non-streaming-asr.js'); const non_streaming_tts = require('./non-streaming-tts.js'); const vad = require('./vad.js'); const slid = require('./spoken-language-identification.js'); +const sid = require('./speaker-identification.js'); module.exports = { OnlineRecognizer: streaming_asr.OnlineRecognizer, @@ -15,4 +16,6 @@ module.exports = { Vad: vad.Vad, CircularBuffer: vad.CircularBuffer, SpokenLanguageIdentification: slid.SpokenLanguageIdentification, + SpeakerEmbeddingExtractor: sid.SpeakerEmbeddingExtractor, + SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager, } diff --git a/scripts/node-addon-api/lib/speaker-identification.js b/scripts/node-addon-api/lib/speaker-identification.js new file mode 100644 index 00000000..4539cf53 --- /dev/null +++ b/scripts/node-addon-api/lib/speaker-identification.js @@ -0,0 +1,102 @@ +const addon = require('./addon.js'); +const streaming_asr = require('./streaming-asr.js'); + +class SpeakerEmbeddingExtractor { + constructor(config) { + this.handle = addon.createSpeakerEmbeddingExtractor(config); + this.config = config; + this.dim = addon.speakerEmbeddingExtractorDim(this.handle); + } + + createStream() { + return new streaming_asr.OnlineStream( + addon.speakerEmbeddingExtractorCreateStream(this.handle)); + } + + isReady(stream) { + return addon.speakerEmbeddingExtractorIsReady(this.handle, stream.handle); + } + + // return a float32 array + compute(stream) { + return addon.speakerEmbeddingExtractorComputeEmbedding( + this.handle, stream.handle); + } +} + +function flatten(arrayList) { + let n = 0; + for (let i = 0; i < arrayList.length; ++i) { + n += arrayList[i].length; + } + let ans = new Float32Array(n); + + let offset = 0; + for (let i = 0; i < arrayList.length; ++i) { + ans.set(arrayList[i], offset); + offset += arrayList[i].length; + } + return ans; +} + +class SpeakerEmbeddingManager { + constructor(dim) { + this.handle = addon.createSpeakerEmbeddingManager(dim); + this.dim = dim; + } + + /* + obj = {name: "xxx", v: a-float32-array} + */ + add(obj) { + return addon.speakerEmbeddingManagerAdd(this.handle, obj); + } + + /* + * obj = + * {name: "xxx", v: [float32_array1, float32_array2, ..., float32_arrayn] + */ + addMulti(obj) { + const c = { + name: obj.name, + vv: flatten(obj.v), + n: obj.v.length, + }; + return addon.speakerEmbeddingManagerAddListFlattened(this.handle, c); + } + + remove(name) { + return addon.speakerEmbeddingManagerRemove(this.handle, name); + } + + /* + * obj = {v: a-float32-array, threshold: a-float } + */ + search(obj) { + return addon.speakerEmbeddingManagerSearch(this.handle, obj); + } + + /* + * obj = {name: 'xxx', v: a-float32-array, threshold: a-float } + */ + verify(obj) { + return addon.speakerEmbeddingManagerVerify(this.handle, obj); + } + + contains(name) { + return addon.speakerEmbeddingManagerContains(this.handle, name); + } + + getNumSpeakers() { + return addon.speakerEmbeddingManagerNumSpeakers(this.handle); + } + + getAllSpeakerNames() { + return addon.speakerEmbeddingManagerGetAllSpeakers(this.handle); + } +} + +module.exports = { + SpeakerEmbeddingExtractor, + SpeakerEmbeddingManager, +} diff --git a/scripts/node-addon-api/lib/streaming-asr.js b/scripts/node-addon-api/lib/streaming-asr.js index ad598cff..01471af9 100644 --- a/scripts/node-addon-api/lib/streaming-asr.js +++ b/scripts/node-addon-api/lib/streaming-asr.js @@ -64,5 +64,6 @@ class OnlineRecognizer { module.exports = { OnlineRecognizer, + OnlineStream, Display } diff --git a/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc b/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc index 4ddd82f9..450c63b6 100644 --- a/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc +++ b/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc @@ -17,6 +17,8 @@ void InitWaveWriter(Napi::Env env, Napi::Object exports); void InitSpokenLanguageID(Napi::Env env, Napi::Object exports); +void InitSpeakerID(Napi::Env env, Napi::Object exports); + Napi::Object Init(Napi::Env env, Napi::Object exports) { InitStreamingAsr(env, exports); InitNonStreamingAsr(env, exports); @@ -25,6 +27,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { InitWaveReader(env, exports); InitWaveWriter(env, exports); InitSpokenLanguageID(env, exports); + InitSpeakerID(env, exports); return exports; } diff --git a/scripts/node-addon-api/src/speaker-identification.cc b/scripts/node-addon-api/src/speaker-identification.cc new file mode 100644 index 00000000..5d5bb7cf --- /dev/null +++ b/scripts/node-addon-api/src/speaker-identification.cc @@ -0,0 +1,782 @@ +// scripts/node-addon-api/src/speaker-identification.cc +// +// Copyright (c) 2024 Xiaomi Corporation +#include + +#include "macros.h" // NOLINT +#include "napi.h" // NOLINT +#include "sherpa-onnx/c-api/c-api.h" + +static Napi::External +CreateSpeakerEmbeddingExtractorWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsObject()) { + Napi::TypeError::New(env, "You should pass an object as the only argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Object o = info[0].As(); + + SherpaOnnxSpeakerEmbeddingExtractorConfig c; + memset(&c, 0, sizeof(c)); + + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model); + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); + + if (o.Has("debug") && + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) { + if (o.Get("debug").IsBoolean()) { + c.debug = o.Get("debug").As().Value(); + } else { + c.debug = o.Get("debug").As().Int32Value(); + } + } + + SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider); + + const SherpaOnnxSpeakerEmbeddingExtractor *extractor = + SherpaOnnxCreateSpeakerEmbeddingExtractor(&c); + + if (c.model) { + delete[] c.model; + } + + if (c.provider) { + delete[] c.provider; + } + + if (!extractor) { + Napi::TypeError::New(env, "Please check your config!") + .ThrowAsJavaScriptException(); + + return {}; + } + + return Napi::External::New( + env, const_cast(extractor), + [](Napi::Env env, SherpaOnnxSpeakerEmbeddingExtractor *extractor) { + SherpaOnnxDestroySpeakerEmbeddingExtractor(extractor); + }); +} + +static Napi::Number SpeakerEmbeddingExtractorDimWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New( + env, "Argument 0 should be a speaker embedding extractor pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingExtractor *extractor = + info[0].As>().Data(); + + int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor); + + return Napi::Number::New(env, dim); +} + +static Napi::External +SpeakerEmbeddingExtractorCreateStreamWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "You should pass a speaker embedding extractor " + "pointer as the only argument") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingExtractor *extractor = + info[0].As>().Data(); + + const SherpaOnnxOnlineStream *stream = + SherpaOnnxSpeakerEmbeddingExtractorCreateStream(extractor); + + return Napi::External::New( + env, const_cast(stream), + [](Napi::Env env, SherpaOnnxOnlineStream *stream) { + DestroyOnlineStream(stream); + }); +} + +static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New( + env, "Argument 0 should be a speaker embedding extractor pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsExternal()) { + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingExtractor *extractor = + info[0].As>().Data(); + + SherpaOnnxOnlineStream *stream = + info[1].As>().Data(); + + int32_t is_ready = + SherpaOnnxSpeakerEmbeddingExtractorIsReady(extractor, stream); + + return Napi::Boolean::New(env, is_ready); +} + +static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New( + env, "Argument 0 should be a speaker embedding extractor pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsExternal()) { + Napi::TypeError::New(env, "Argument 1 should be an online stream pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingExtractor *extractor = + info[0].As>().Data(); + + SherpaOnnxOnlineStream *stream = + info[1].As>().Data(); + + const float *v = + SherpaOnnxSpeakerEmbeddingExtractorComputeEmbedding(extractor, stream); + + int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor); + + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast(v), sizeof(float) * dim, + [](Napi::Env /*env*/, void *data) { + SherpaOnnxSpeakerEmbeddingExtractorDestroyEmbedding( + reinterpret_cast(data)); + }); + + return Napi::Float32Array::New(env, dim, arrayBuffer, 0); +} + +static Napi::External +CreateSpeakerEmbeddingManagerWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsNumber()) { + Napi::TypeError::New(env, + "You should pass an integer as the only argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + int32_t dim = info[0].As().Int32Value(); + + const SherpaOnnxSpeakerEmbeddingManager *manager = + SherpaOnnxCreateSpeakerEmbeddingManager(dim); + + if (!manager) { + Napi::TypeError::New(env, "Please check your input dim!") + .ThrowAsJavaScriptException(); + + return {}; + } + + return Napi::External::New( + env, const_cast(manager), + [](Napi::Env env, SherpaOnnxSpeakerEmbeddingManager *manager) { + SherpaOnnxDestroySpeakerEmbeddingManager(manager); + }); +} + +static Napi::Boolean SpeakerEmbeddingManagerAddWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "You should pass a speaker embedding manager pointer " + "as the first argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsObject()) { + Napi::TypeError::New(env, "Argument 1 should be an object") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingManager *manager = + info[0].As>().Data(); + + Napi::Object obj = info[1].As(); + + if (!obj.Has("v")) { + Napi::TypeError::New(env, "The argument object should have a field v") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("v").IsTypedArray()) { + Napi::TypeError::New(env, "The object['v'] should be a typed array") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("name")) { + Napi::TypeError::New(env, "The argument object should have a field name") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("name").IsString()) { + Napi::TypeError::New(env, "The object['name'] should be a string") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Float32Array v = obj.Get("v").As(); + Napi::String js_name = obj.Get("name").As(); + std::string name = js_name.Utf8Value(); + + int32_t ok = + SherpaOnnxSpeakerEmbeddingManagerAdd(manager, name.c_str(), v.Data()); + return Napi::Boolean::New(env, ok); +} + +static Napi::Boolean SpeakerEmbeddingManagerAddListFlattenedWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "You should pass a speaker embedding manager pointer " + "as the first argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsObject()) { + Napi::TypeError::New(env, "Argument 1 should be an object") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingManager *manager = + info[0].As>().Data(); + + Napi::Object obj = info[1].As(); + + if (!obj.Has("vv")) { + Napi::TypeError::New(env, "The argument object should have a field vv") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("vv").IsTypedArray()) { + Napi::TypeError::New(env, "The object['vv'] should be a typed array") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("name")) { + Napi::TypeError::New(env, "The argument object should have a field name") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("name").IsString()) { + Napi::TypeError::New(env, "The object['name'] should be a string") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("n")) { + Napi::TypeError::New(env, "The argument object should have a field n") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("n").IsNumber()) { + Napi::TypeError::New(env, "The object['n'] should be an integer") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Float32Array v = obj.Get("vv").As(); + Napi::String js_name = obj.Get("name").As(); + int32_t n = obj.Get("n").As().Int32Value(); + + std::string name = js_name.Utf8Value(); + + int32_t ok = SherpaOnnxSpeakerEmbeddingManagerAddListFlattened( + manager, name.c_str(), v.Data(), n); + + return Napi::Boolean::New(env, ok); +} + +static Napi::Boolean SpeakerEmbeddingManagerRemoveWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "You should pass a speaker embedding manager pointer " + "as the first argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsString()) { + Napi::TypeError::New(env, "Argument 1 should be string") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingManager *manager = + info[0].As>().Data(); + + Napi::String js_name = info[1].As(); + std::string name = js_name.Utf8Value(); + + int32_t ok = SherpaOnnxSpeakerEmbeddingManagerRemove(manager, name.c_str()); + + return Napi::Boolean::New(env, ok); +} + +static Napi::String SpeakerEmbeddingManagerSearchWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "You should pass a speaker embedding manager pointer " + "as the first argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsObject()) { + Napi::TypeError::New(env, "Argument 1 should be an object") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingManager *manager = + info[0].As>().Data(); + + Napi::Object obj = info[1].As(); + + if (!obj.Has("v")) { + Napi::TypeError::New(env, "The argument object should have a field v") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("v").IsTypedArray()) { + Napi::TypeError::New(env, "The object['v'] should be a typed array") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("threshold")) { + Napi::TypeError::New(env, + "The argument object should have a field threshold") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("threshold").IsNumber()) { + Napi::TypeError::New(env, "The object['threshold'] should be a float") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Float32Array v = obj.Get("v").As(); + float threshold = obj.Get("threshold").As().FloatValue(); + + const char *name = + SherpaOnnxSpeakerEmbeddingManagerSearch(manager, v.Data(), threshold); + const char *p = name; + if (!p) { + p = ""; + } + + Napi::String js_name = Napi::String::New(env, p); + SherpaOnnxSpeakerEmbeddingManagerFreeSearch(name); + + return js_name; +} + +static Napi::Boolean SpeakerEmbeddingManagerVerifyWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "You should pass a speaker embedding manager pointer " + "as the first argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsObject()) { + Napi::TypeError::New(env, "Argument 1 should be an object") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingManager *manager = + info[0].As>().Data(); + + Napi::Object obj = info[1].As(); + + if (!obj.Has("v")) { + Napi::TypeError::New(env, "The argument object should have a field v") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("v").IsTypedArray()) { + Napi::TypeError::New(env, "The object['v'] should be a typed array") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("threshold")) { + Napi::TypeError::New(env, + "The argument object should have a field threshold") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("threshold").IsNumber()) { + Napi::TypeError::New(env, "The object['threshold'] should be a float") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("name")) { + Napi::TypeError::New(env, "The argument object should have a field name") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("name").IsString()) { + Napi::TypeError::New(env, "The object['name'] should be a string") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Float32Array v = obj.Get("v").As(); + float threshold = obj.Get("threshold").As().FloatValue(); + + Napi::String js_name = obj.Get("name").As(); + std::string name = js_name.Utf8Value(); + + int32_t found = SherpaOnnxSpeakerEmbeddingManagerVerify(manager, name.c_str(), + v.Data(), threshold); + + return Napi::Boolean::New(env, found); +} + +static Napi::Boolean SpeakerEmbeddingManagerContainsWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "You should pass a speaker embedding manager pointer " + "as the first argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsString()) { + Napi::TypeError::New(env, "Argument 1 should be a string") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingManager *manager = + info[0].As>().Data(); + + Napi::String js_name = info[1].As(); + std::string name = js_name.Utf8Value(); + + int32_t exists = + SherpaOnnxSpeakerEmbeddingManagerContains(manager, name.c_str()); + + return Napi::Boolean::New(env, exists); +} + +static Napi::Number SpeakerEmbeddingManagerNumSpeakersWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "You should pass a speaker embedding manager pointer " + "as the first argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingManager *manager = + info[0].As>().Data(); + + int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager); + + return Napi::Number::New(env, num_speakers); +} + +static Napi::Array SpeakerEmbeddingManagerGetAllSpeakersWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "You should pass a speaker embedding manager pointer " + "as the first argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxSpeakerEmbeddingManager *manager = + info[0].As>().Data(); + + int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager); + if (num_speakers == 0) { + return {}; + } + + const char *const *all_speaker_names = + SherpaOnnxSpeakerEmbeddingManagerGetAllSpeakers(manager); + + Napi::Array ans = Napi::Array::New(env, num_speakers); + for (int32_t i = 0; i != num_speakers; ++i) { + ans[i] = Napi::String::New(env, all_speaker_names[i]); + } + SherpaOnnxSpeakerEmbeddingManagerFreeAllSpeakers(all_speaker_names); + return ans; +} + +void InitSpeakerID(Napi::Env env, Napi::Object exports) { + exports.Set(Napi::String::New(env, "createSpeakerEmbeddingExtractor"), + Napi::Function::New(env, CreateSpeakerEmbeddingExtractorWrapper)); + + exports.Set(Napi::String::New(env, "speakerEmbeddingExtractorDim"), + Napi::Function::New(env, SpeakerEmbeddingExtractorDimWrapper)); + + exports.Set( + Napi::String::New(env, "speakerEmbeddingExtractorCreateStream"), + Napi::Function::New(env, SpeakerEmbeddingExtractorCreateStreamWrapper)); + + exports.Set( + Napi::String::New(env, "speakerEmbeddingExtractorIsReady"), + Napi::Function::New(env, SpeakerEmbeddingExtractorIsReadyWrapper)); + + exports.Set( + Napi::String::New(env, "speakerEmbeddingExtractorComputeEmbedding"), + Napi::Function::New(env, + SpeakerEmbeddingExtractorComputeEmbeddingWrapper)); + + exports.Set(Napi::String::New(env, "createSpeakerEmbeddingManager"), + Napi::Function::New(env, CreateSpeakerEmbeddingManagerWrapper)); + + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerAdd"), + Napi::Function::New(env, SpeakerEmbeddingManagerAddWrapper)); + + exports.Set( + Napi::String::New(env, "speakerEmbeddingManagerAddListFlattened"), + Napi::Function::New(env, SpeakerEmbeddingManagerAddListFlattenedWrapper)); + + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerRemove"), + Napi::Function::New(env, SpeakerEmbeddingManagerRemoveWrapper)); + + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerSearch"), + Napi::Function::New(env, SpeakerEmbeddingManagerSearchWrapper)); + + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerVerify"), + Napi::Function::New(env, SpeakerEmbeddingManagerVerifyWrapper)); + + exports.Set(Napi::String::New(env, "speakerEmbeddingManagerContains"), + Napi::Function::New(env, SpeakerEmbeddingManagerContainsWrapper)); + + exports.Set( + Napi::String::New(env, "speakerEmbeddingManagerNumSpeakers"), + Napi::Function::New(env, SpeakerEmbeddingManagerNumSpeakersWrapper)); + + exports.Set( + Napi::String::New(env, "speakerEmbeddingManagerGetAllSpeakers"), + Napi::Function::New(env, SpeakerEmbeddingManagerGetAllSpeakersWrapper)); +}