From 2f9553d838ddbf50292d8b0f1d28b1aae49b8aee Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Fri, 3 May 2024 14:47:40 +0800 Subject: [PATCH] Begin to add node-addon-api for sherpa-onnx (#826) --- .github/workflows/test-nodejs-addon-api.yaml | 106 ++++ .gitignore | 2 + scripts/node-addon-api/README.md | 3 + scripts/node-addon-api/binding.gyp | 35 ++ scripts/node-addon-api/lib/sherpa-onnx.js | 7 + scripts/node-addon-api/lib/streaming-asr.js | 41 ++ scripts/node-addon-api/package.json | 53 ++ .../src/sherpa-onnx-node-addon-api.cc | 16 + scripts/node-addon-api/src/streaming-asr.cc | 461 ++++++++++++++++++ scripts/node-addon-api/src/wave-reader.cc | 57 +++ .../test/test_asr_streaming_transducer.js | 53 ++ scripts/node-addon-api/test/test_binding.js | 4 + 12 files changed, 838 insertions(+) create mode 100644 .github/workflows/test-nodejs-addon-api.yaml create mode 100644 scripts/node-addon-api/README.md create mode 100644 scripts/node-addon-api/binding.gyp create mode 100644 scripts/node-addon-api/lib/sherpa-onnx.js create mode 100644 scripts/node-addon-api/lib/streaming-asr.js create mode 100644 scripts/node-addon-api/package.json create mode 100644 scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc create mode 100644 scripts/node-addon-api/src/streaming-asr.cc create mode 100644 scripts/node-addon-api/src/wave-reader.cc create mode 100644 scripts/node-addon-api/test/test_asr_streaming_transducer.js create mode 100644 scripts/node-addon-api/test/test_binding.js diff --git a/.github/workflows/test-nodejs-addon-api.yaml b/.github/workflows/test-nodejs-addon-api.yaml new file mode 100644 index 00000000..4ceba0f9 --- /dev/null +++ b/.github/workflows/test-nodejs-addon-api.yaml @@ -0,0 +1,106 @@ +name: test-node-addon-api + +on: + push: + branches: + - master + paths: + - '.github/workflows/test-node-addon-api.yaml' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + - 'scripts/node-addon-api/**' + + pull_request: + branches: + - master + paths: + - '.github/workflows/test-node-addon-api.yaml' + - 'CMakeLists.txt' + - 'cmake/**' + - 'sherpa-onnx/csrc/*' + - 'sherpa-onnx/c-api/*' + - 'scripts/node-addon-api/**' + + workflow_dispatch: + +concurrency: + group: test-node-addon-api-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test-node-addon-api: + name: ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [macos-latest, macos-14] + python-version: ["3.8"] + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - uses: actions/setup-node@v4 + with: + registry-url: 'https://registry.npmjs.org' + + - name: Display node version + shell: bash + run: | + node --version + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ${{ matrix.os }}-release-shared + + - name: Build sherpa-onnx + shell: bash + run: | + mkdir build + cd build + cmake -DCMAKE_INSTALL_PREFIX=/tmp/sherpa-onnx -DBUILD_SHARED_LIBS=ON .. + make -j + make install + + - name: Build node-addon-api package + shell: bash + run: | + cd scripts/node-addon-api + + export PKG_CONFIG_PATH=/tmp/sherpa-onnx:$PKG_CONFIG_PATH + + ls -lh /tmp/sherpa-onnx + + pkg-config --cflags sherpa-onnx + pkg-config --libs sherpa-onnx + + a=$(pkg-config --cflags sherpa-onnx);a=${a:2};echo $a + + npm i + + ./node_modules/.bin/node-gyp configure build --verbose + + - name: Test streaming transducer + shell: bash + run: | + cd scripts/node-addon-api + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + tar xvf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + rm sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20.tar.bz2 + + node test/test_asr_streaming_transducer.js + + rm -rf sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20 diff --git a/.gitignore b/.gitignore index 0d0d04e8..0f023f0d 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,5 @@ sherpa-onnx-kws-zipformer-wenetspeech-3.3M-2024-01-01 *.tar.bz2 *.zip sherpa-onnx-ced-* +node_modules +package-lock.json diff --git a/scripts/node-addon-api/README.md b/scripts/node-addon-api/README.md new file mode 100644 index 00000000..6ed366ea --- /dev/null +++ b/scripts/node-addon-api/README.md @@ -0,0 +1,3 @@ +# Introduction + +This folder contains `node-addon-api` wrapper for `sherpa-onnx`. diff --git a/scripts/node-addon-api/binding.gyp b/scripts/node-addon-api/binding.gyp new file mode 100644 index 00000000..1294188b --- /dev/null +++ b/scripts/node-addon-api/binding.gyp @@ -0,0 +1,35 @@ +{ + 'targets': [ + { + 'target_name': 'sherpa-onnx-node-addon-api-native', + 'sources': [ + 'src/sherpa-onnx-node-addon-api.cc', + 'src/streaming-asr.cc', + 'src/wave-reader.cc' + ], + 'include_dirs': [ + " + +#include "napi.h" // NOLINT +#include "sherpa-onnx/c-api/c-api.h" +/* +{ + 'featConfig': { + 'sampleRate': 16000, + 'featureDim': 80, + } +}; + */ +static SherpaOnnxFeatureConfig GetFeatureConfig(Napi::Object obj) { + SherpaOnnxFeatureConfig config; + memset(&config, 0, sizeof(config)); + + if (!obj.Has("featConfig") || !obj.Get("featConfig").IsObject()) { + return config; + } + + Napi::Object featConfig = obj.Get("featConfig").As(); + + if (featConfig.Has("sampleRate") && featConfig.Get("sampleRate").IsNumber()) { + config.sample_rate = + featConfig.Get("sampleRate").As().Int32Value(); + } + + if (featConfig.Has("featureDim") && featConfig.Get("featureDim").IsNumber()) { + config.feature_dim = + featConfig.Get("featureDim").As().Int32Value(); + } + + return config; +} +/* +{ + 'transducer': { + 'encoder': './encoder.onnx', + 'decoder': './decoder.onnx', + 'joiner': './joiner.onnx', + } +} + */ + +static SherpaOnnxOnlineTransducerModelConfig GetOnlineTransducerModelConfig( + Napi::Object obj) { + SherpaOnnxOnlineTransducerModelConfig config; + memset(&config, 0, sizeof(config)); + + if (!obj.Has("transducer") || !obj.Get("transducer").IsObject()) { + return config; + } + + Napi::Object o = obj.Get("transducer").As(); + + if (o.Has("encoder") && o.Get("encoder").IsString()) { + Napi::String encoder = o.Get("encoder").As(); + std::string s = encoder.Utf8Value(); + char *p = new char[s.size() + 1]; + std::copy(s.begin(), s.end(), p); + p[s.size()] = 0; + + config.encoder = p; + } + + if (o.Has("decoder") && o.Get("decoder").IsString()) { + Napi::String decoder = o.Get("decoder").As(); + std::string s = decoder.Utf8Value(); + char *p = new char[s.size() + 1]; + std::copy(s.begin(), s.end(), p); + p[s.size()] = 0; + + config.decoder = p; + } + + if (o.Has("joiner") && o.Get("joiner").IsString()) { + Napi::String joiner = o.Get("joiner").As(); + std::string s = joiner.Utf8Value(); + char *p = new char[s.size() + 1]; + std::copy(s.begin(), s.end(), p); + p[s.size()] = 0; + + config.joiner = p; + } + + return config; +} + +static SherpaOnnxOnlineModelConfig GetOnlineModelConfig(Napi::Object obj) { + SherpaOnnxOnlineModelConfig config; + memset(&config, 0, sizeof(config)); + + if (!obj.Has("modelConfig") || !obj.Get("modelConfig").IsObject()) { + return config; + } + + Napi::Object o = obj.Get("modelConfig").As(); + + config.transducer = GetOnlineTransducerModelConfig(o); + + if (o.Has("tokens") && o.Get("tokens").IsString()) { + Napi::String tokens = o.Get("tokens").As(); + std::string s = tokens.Utf8Value(); + char *p = new char[s.size() + 1]; + std::copy(s.begin(), s.end(), p); + p[s.size()] = 0; + + config.tokens = p; + } + + if (o.Has("numThreads") && o.Get("numThreads").IsNumber()) { + config.num_threads = o.Get("numThreads").As().Int32Value(); + } + + if (o.Has("provider") && o.Get("provider").IsString()) { + Napi::String provider = o.Get("provider").As(); + std::string s = provider.Utf8Value(); + char *p = new char[s.size() + 1]; + std::copy(s.begin(), s.end(), p); + p[s.size()] = 0; + + config.provider = p; + } + + if (o.Has("debug") && o.Get("debug").IsNumber()) { + config.debug = o.Get("debug").As().Int32Value(); + } + + if (o.Has("modelType") && o.Get("modelType").IsString()) { + Napi::String model_type = o.Get("modelType").As(); + std::string s = model_type.Utf8Value(); + char *p = new char[s.size() + 1]; + std::copy(s.begin(), s.end(), p); + p[s.size()] = 0; + + config.model_type = p; + } + + return config; +} + +static Napi::External CreateOnlineRecognizerWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsObject()) { + Napi::TypeError::New(env, "Expect an object as the argument") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Object config = info[0].As(); + SherpaOnnxOnlineRecognizerConfig c; + memset(&c, 0, sizeof(c)); + c.feat_config = GetFeatureConfig(config); + c.model_config = GetOnlineModelConfig(config); +#if 0 + printf("encoder: %s\n", c.model_config.transducer.encoder + ? c.model_config.transducer.encoder + : "no"); + printf("decoder: %s\n", c.model_config.transducer.decoder + ? c.model_config.transducer.decoder + : "no"); + printf("joiner: %s\n", c.model_config.transducer.joiner + ? c.model_config.transducer.joiner + : "no"); + + printf("tokens: %s\n", c.model_config.tokens ? c.model_config.tokens : "no"); + printf("num_threads: %d\n", c.model_config.num_threads); + printf("provider: %s\n", + c.model_config.provider ? c.model_config.provider : "no"); + printf("debug: %d\n", c.model_config.debug); + printf("model_type: %s\n", + c.model_config.model_type ? c.model_config.model_type : "no"); +#endif + + SherpaOnnxOnlineRecognizer *recognizer = CreateOnlineRecognizer(&c); + + if (c.model_config.transducer.encoder) { + delete[] c.model_config.transducer.encoder; + } + + if (c.model_config.transducer.decoder) { + delete[] c.model_config.transducer.decoder; + } + + if (c.model_config.transducer.joiner) { + delete[] c.model_config.transducer.joiner; + } + + if (c.model_config.tokens) { + delete[] c.model_config.tokens; + } + + if (c.model_config.provider) { + delete[] c.model_config.provider; + } + + if (c.model_config.model_type) { + delete[] c.model_config.model_type; + } + + if (!recognizer) { + Napi::TypeError::New(env, "Please check your config!") + .ThrowAsJavaScriptException(); + + return {}; + } + + return Napi::External::New( + env, recognizer, + [](Napi::Env env, SherpaOnnxOnlineRecognizer *recognizer) { + DestroyOnlineRecognizer(recognizer); + }); +} + +static Napi::External CreateOnlineStreamWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New( + env, "You should pass a recognizer pointer as the only argument") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxOnlineRecognizer *recognizer = + info[0].As>().Data(); + + SherpaOnnxOnlineStream *stream = CreateOnlineStream(recognizer); + + return Napi::External::New( + env, stream, [](Napi::Env env, SherpaOnnxOnlineStream *stream) { + DestroyOnlineStream(stream); + }); +} + +static void AcceptWaveformWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, "Argument 0 should be a online stream pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + SherpaOnnxOnlineStream *stream = + info[0].As>().Data(); + + if (!info[1].IsObject()) { + Napi::TypeError::New(env, "Argument 1 should be an object") + .ThrowAsJavaScriptException(); + + return; + } + + Napi::Object obj = info[1].As(); + + if (!obj.Has("samples")) { + Napi::TypeError::New(env, "The argument object should have a field samples") + .ThrowAsJavaScriptException(); + + return; + } + + if (!obj.Get("samples").IsTypedArray()) { + Napi::TypeError::New(env, "The object['samples'] should be a typed array") + .ThrowAsJavaScriptException(); + + return; + } + + if (!obj.Has("sampleRate")) { + Napi::TypeError::New(env, + "The argument object should have a field sampleRate") + .ThrowAsJavaScriptException(); + + return; + } + + if (!obj.Get("sampleRate").IsNumber()) { + Napi::TypeError::New(env, "The object['samples'] should be a number") + .ThrowAsJavaScriptException(); + + return; + } + + Napi::Float32Array samples = obj.Get("samples").As(); + int32_t sample_rate = obj.Get("sampleRate").As().Int32Value(); + + AcceptWaveform(stream, sample_rate, samples.Data(), samples.ElementLength()); +} + +static Napi::Boolean IsOnlineStreamReadyWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "Argument 0 should be a online recognizer pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsExternal()) { + Napi::TypeError::New(env, + "Argument 1 should be a online recognizer pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxOnlineRecognizer *recognizer = + info[0].As>().Data(); + + SherpaOnnxOnlineStream *stream = + info[1].As>().Data(); + + int32_t is_ready = IsOnlineStreamReady(recognizer, stream); + + return Napi::Boolean::New(env, is_ready); +} + +static void DecodeOnlineStreamWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "Argument 0 should be a online recognizer pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + if (!info[1].IsExternal()) { + Napi::TypeError::New(env, + "Argument 1 should be a online recognizer pointer.") + .ThrowAsJavaScriptException(); + + return; + } + + SherpaOnnxOnlineRecognizer *recognizer = + info[0].As>().Data(); + + SherpaOnnxOnlineStream *stream = + info[1].As>().Data(); + + DecodeOnlineStream(recognizer, stream); +} + +static Napi::String GetOnlineStreamResultAsJsonWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New(env, + "Argument 0 should be a online recognizer pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsExternal()) { + Napi::TypeError::New(env, + "Argument 1 should be a online recognizer pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxOnlineRecognizer *recognizer = + info[0].As>().Data(); + + SherpaOnnxOnlineStream *stream = + info[1].As>().Data(); + + const char *json = GetOnlineStreamResultAsJson(recognizer, stream); + Napi::String s = Napi::String::New(env, json); + + DestroyOnlineStreamResultJson(json); + + return s; +} + +void InitStreamingAsr(Napi::Env env, Napi::Object exports) { + exports.Set(Napi::String::New(env, "createOnlineRecognizer"), + Napi::Function::New(env, CreateOnlineRecognizerWrapper)); + + exports.Set(Napi::String::New(env, "createOnlineStream"), + Napi::Function::New(env, CreateOnlineStreamWrapper)); + + exports.Set(Napi::String::New(env, "acceptWaveformOnline"), + Napi::Function::New(env, AcceptWaveformWrapper)); + + exports.Set(Napi::String::New(env, "isOnlineStreamReady"), + Napi::Function::New(env, IsOnlineStreamReadyWrapper)); + + exports.Set(Napi::String::New(env, "decodeOnlineStream"), + Napi::Function::New(env, DecodeOnlineStreamWrapper)); + + exports.Set(Napi::String::New(env, "getOnlineStreamResultAsJson"), + Napi::Function::New(env, GetOnlineStreamResultAsJsonWrapper)); +} diff --git a/scripts/node-addon-api/src/wave-reader.cc b/scripts/node-addon-api/src/wave-reader.cc new file mode 100644 index 00000000..b2c8c57b --- /dev/null +++ b/scripts/node-addon-api/src/wave-reader.cc @@ -0,0 +1,57 @@ +// scripts/node-addon-api/src/wave-reader.cc +// +// Copyright (c) 2024 Xiaomi Corporation + +#include + +#include "napi.h" // NOLINT +#include "sherpa-onnx/c-api/c-api.h" + +static Napi::Object ReadWaveWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + if (!info[0].IsString()) { + Napi::TypeError::New(env, "Argument should be a string") + .ThrowAsJavaScriptException(); + + return {}; + } + + std::string filename = info[0].As().Utf8Value(); + + const SherpaOnnxWave *wave = SherpaOnnxReadWave(filename.c_str()); + if (!wave) { + std::ostringstream os; + os << "Failed to read '" << filename << "'"; + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast(wave->samples), + sizeof(float) * wave->num_samples, + [](Napi::Env /*env*/, void * /*data*/, const SherpaOnnxWave *hint) { + SherpaOnnxFreeWave(hint); + }, + wave); + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, wave->num_samples, arrayBuffer, 0); + + Napi::Object obj = Napi::Object::New(env); + obj.Set(Napi::String::New(env, "samples"), float32Array); + obj.Set(Napi::String::New(env, "sampleRate"), wave->sample_rate); + return obj; +} + +void InitWaveReader(Napi::Env env, Napi::Object exports) { + exports.Set(Napi::String::New(env, "readWave"), + Napi::Function::New(env, ReadWaveWrapper)); +} diff --git a/scripts/node-addon-api/test/test_asr_streaming_transducer.js b/scripts/node-addon-api/test/test_asr_streaming_transducer.js new file mode 100644 index 00000000..e61ba94f --- /dev/null +++ b/scripts/node-addon-api/test/test_asr_streaming_transducer.js @@ -0,0 +1,53 @@ +// Copyright (c) 2024 Xiaomi Corporation +const sherpa_onnx = require('../lib/sherpa-onnx.js'); + +const config = { + 'featConfig': { + 'sampleRate': 16000, + 'featureDim': 80, + }, + 'modelConfig': { + 'transducer': { + 'encoder': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/encoder-epoch-99-avg-1.onnx', + 'decoder': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/decoder-epoch-99-avg-1.onnx', + 'joiner': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/joiner-epoch-99-avg-1.onnx', + }, + 'tokens': + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/tokens.txt', + 'numThreads': 2, + 'provider': 'cpu', + 'debug': 1, + 'modelType': 'zipformer', + } +}; + +const waveFilename = + './sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20/test_wavs/0.wav'; + +const recognizer = new sherpa_onnx.OnlineRecognizer(config); +console.log('Started') +let start = performance.now(); +const stream = recognizer.createStream(); +const wave = sherpa_onnx.readWave(waveFilename); +stream.acceptWaveform(wave.samples, wave.sampleRate); + +const tailPadding = new Float32Array(wave.sampleRate * 0.4); +stream.acceptWaveform(tailPadding, wave.sampleRate); + +while (recognizer.isReady(stream)) { + recognizer.decode(stream); +} +result = recognizer.getResult(stream) +let stop = performance.now(); +console.log('Done') + +const elapsed_seconds = (stop - start) / 1000; +const duration = wave.samples.length / wave.sampleRate; +const real_time_factor = elapsed_seconds / duration; +console.log('Wave duration', duration.toFixed(3), 'secodns') +console.log('Elapsed', elapsed_seconds.toFixed(3), 'secodns') +console.log('RTF', real_time_factor.toFixed(3)) +console.log('result', result.text) diff --git a/scripts/node-addon-api/test/test_binding.js b/scripts/node-addon-api/test/test_binding.js new file mode 100644 index 00000000..c37e9987 --- /dev/null +++ b/scripts/node-addon-api/test/test_binding.js @@ -0,0 +1,4 @@ +const sherpa_onnx = require('../lib/sherpa-onnx.js'); +console.log(sherpa_onnx) + +console.log('Tests passed- everything looks OK!');