Add audio tagging APIs for node-addon-api (#875)

This commit is contained in:
Fangjun Kuang
2024-05-14 17:32:30 +08:00
committed by GitHub
parent 388e6a98fc
commit d19f50b799
12 changed files with 520 additions and 16 deletions

View File

@@ -0,0 +1,227 @@
// scripts/node-addon-api/src/audio-tagging.cc
//
// Copyright (c) 2024 Xiaomi Corporation
#include <sstream>
#include "macros.h" // NOLINT
#include "napi.h" // NOLINT
#include "sherpa-onnx/c-api/c-api.h"
static SherpaOnnxOfflineZipformerAudioTaggingModelConfig
GetAudioTaggingZipformerModelConfig(Napi::Object obj) {
SherpaOnnxOfflineZipformerAudioTaggingModelConfig c;
memset(&c, 0, sizeof(c));
if (!obj.Has("zipformer") || !obj.Get("zipformer").IsObject()) {
return c;
}
Napi::Object o = obj.Get("zipformer").As<Napi::Object>();
SHERPA_ONNX_ASSIGN_ATTR_STR(model, model);
return c;
}
static SherpaOnnxAudioTaggingModelConfig GetAudioTaggingModelConfig(
Napi::Object obj) {
SherpaOnnxAudioTaggingModelConfig c;
memset(&c, 0, sizeof(c));
if (!obj.Has("model") || !obj.Get("model").IsObject()) {
return c;
}
Napi::Object o = obj.Get("model").As<Napi::Object>();
c.zipformer = GetAudioTaggingZipformerModelConfig(o);
SHERPA_ONNX_ASSIGN_ATTR_STR(ced, ced);
SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads);
if (o.Has("debug") &&
(o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) {
if (o.Get("debug").IsBoolean()) {
c.debug = o.Get("debug").As<Napi::Boolean>().Value();
} else {
c.debug = o.Get("debug").As<Napi::Number>().Int32Value();
}
}
SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider);
return c;
}
static Napi::External<SherpaOnnxAudioTagging> CreateAudioTaggingWrapper(
const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsObject()) {
Napi::TypeError::New(env, "You should pass an object as the only argument.")
.ThrowAsJavaScriptException();
return {};
}
Napi::Object o = info[0].As<Napi::Object>();
SherpaOnnxAudioTaggingConfig c;
memset(&c, 0, sizeof(c));
c.model = GetAudioTaggingModelConfig(o);
SHERPA_ONNX_ASSIGN_ATTR_STR(labels, labels);
SHERPA_ONNX_ASSIGN_ATTR_INT32(top_k, topK);
const SherpaOnnxAudioTagging *at = SherpaOnnxCreateAudioTagging(&c);
if (c.model.zipformer.model) {
delete[] c.model.zipformer.model;
}
if (c.model.ced) {
delete[] c.model.ced;
}
if (c.model.provider) {
delete[] c.model.provider;
}
if (c.labels) {
delete[] c.labels;
}
if (!at) {
Napi::TypeError::New(env, "Please check your config!")
.ThrowAsJavaScriptException();
return {};
}
return Napi::External<SherpaOnnxAudioTagging>::New(
env, const_cast<SherpaOnnxAudioTagging *>(at),
[](Napi::Env env, SherpaOnnxAudioTagging *at) {
SherpaOnnxDestroyAudioTagging(at);
});
}
static Napi::External<SherpaOnnxOfflineStream>
AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 1) {
std::ostringstream os;
os << "Expect only 1 argument. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(
env, "You should pass an audio tagging pointer as the only argument")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxAudioTagging *at =
info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
const SherpaOnnxOfflineStream *stream =
SherpaOnnxAudioTaggingCreateOfflineStream(at);
return Napi::External<SherpaOnnxOfflineStream>::New(
env, const_cast<SherpaOnnxOfflineStream *>(stream),
[](Napi::Env env, SherpaOnnxOfflineStream *stream) {
DestroyOfflineStream(stream);
});
}
static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
if (info.Length() != 3) {
std::ostringstream os;
os << "Expect only 3 arguments. Given: " << info.Length();
Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException();
return {};
}
if (!info[0].IsExternal()) {
Napi::TypeError::New(
env, "You should pass an audio tagging pointer as the first argument")
.ThrowAsJavaScriptException();
return {};
}
if (!info[1].IsExternal()) {
Napi::TypeError::New(
env, "You should pass a offline stream pointer as the second argument")
.ThrowAsJavaScriptException();
return {};
}
if (!info[2].IsNumber()) {
Napi::TypeError::New(env,
"You should pass an integer as the third argument")
.ThrowAsJavaScriptException();
return {};
}
SherpaOnnxAudioTagging *at =
info[0].As<Napi::External<SherpaOnnxAudioTagging>>().Data();
SherpaOnnxOfflineStream *stream =
info[1].As<Napi::External<SherpaOnnxOfflineStream>>().Data();
int32_t top_k = info[2].As<Napi::Number>().Int32Value();
const SherpaOnnxAudioEvent *const *events =
SherpaOnnxAudioTaggingCompute(at, stream, top_k);
auto p = events;
int32_t k = 0;
while (p && *p) {
++k;
++p;
}
Napi::Array ans = Napi::Array::New(env, k);
for (int32_t i = 0; i != k; ++i) {
Napi::Object obj = Napi::Object::New(env);
obj.Set(Napi::String::New(env, "name"),
Napi::String::New(env, events[i]->name));
obj.Set(Napi::String::New(env, "index"),
Napi::Number::New(env, events[i]->index));
obj.Set(Napi::String::New(env, "prob"),
Napi::Number::New(env, events[i]->prob));
ans[i] = obj;
}
SherpaOnnxAudioTaggingFreeResults(events);
return ans;
}
void InitAudioTagging(Napi::Env env, Napi::Object exports) {
exports.Set(Napi::String::New(env, "createAudioTagging"),
Napi::Function::New(env, CreateAudioTaggingWrapper));
exports.Set(Napi::String::New(env, "audioTaggingCreateOfflineStream"),
Napi::Function::New(env, AudioTaggingCreateOfflineStreamWrapper));
exports.Set(Napi::String::New(env, "audioTaggingCompute"),
Napi::Function::New(env, AudioTaggingComputeWrapper));
}

View File

@@ -19,6 +19,8 @@ void InitSpokenLanguageID(Napi::Env env, Napi::Object exports);
void InitSpeakerID(Napi::Env env, Napi::Object exports);
void InitAudioTagging(Napi::Env env, Napi::Object exports);
Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitStreamingAsr(env, exports);
InitNonStreamingAsr(env, exports);
@@ -28,6 +30,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) {
InitWaveWriter(env, exports);
InitSpokenLanguageID(env, exports);
InitSpeakerID(env, exports);
InitAudioTagging(env, exports);
return exports;
}