diff --git a/.github/scripts/node-addon/run.sh b/.github/scripts/node-addon/run.sh index 67a28776..46002623 100755 --- a/.github/scripts/node-addon/run.sh +++ b/.github/scripts/node-addon/run.sh @@ -18,7 +18,7 @@ fi SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" -# SHERPA_ONNX_VERSION=1.0.22 +# SHERPA_ONNX_VERSION=1.0.23 if [ -z $owner ]; then owner=k2-fsa diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh index 9adbaeb4..0e63e00f 100755 --- a/.github/scripts/test-nodejs-addon-npm.sh +++ b/.github/scripts/test-nodejs-addon-npm.sh @@ -6,6 +6,15 @@ d=nodejs-addon-examples echo "dir: $d" cd $d +echo "----------add punctuations----------" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 + +node ./test_punctuation.js +rm -rf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12 + echo "----------audio tagging----------" curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/audio-tagging-models/sherpa-onnx-zipformer-small-audio-tagging-2024-04-15.tar.bz2 diff --git a/.github/workflows/npm-addon.yaml b/.github/workflows/npm-addon.yaml index ddcb4a85..10bb9024 100644 --- a/.github/workflows/npm-addon.yaml +++ b/.github/workflows/npm-addon.yaml @@ -55,7 +55,7 @@ jobs: SHERPA_ONNX_VERSION=$(grep "SHERPA_ONNX_VERSION" ./CMakeLists.txt | cut -d " " -f 2 | cut -d '"' -f 2) echo "SHERPA_ONNX_VERSION $SHERPA_ONNX_VERSION" - # SHERPA_ONNX_VERSION=1.0.22 + # SHERPA_ONNX_VERSION=1.0.23 src_dir=.github/scripts/node-addon sed -i.bak s/SHERPA_ONNX_VERSION/$SHERPA_ONNX_VERSION/g $src_dir/package.json diff --git a/nodejs-addon-examples/README.md b/nodejs-addon-examples/README.md index 62d0e4d6..2a4421d7 100644 --- a/nodejs-addon-examples/README.md +++ b/nodejs-addon-examples/README.md @@ -31,6 +31,12 @@ export LD_LIBRARY_PATH=$PWD/node_modules/sherpa-onnx-linux-arm64:$LD_LIBRARY_PAT The following tables list the examples in this folder. +## Add punctuations to text + +|File| Description| +|---|---| +|[./test_punctuation.js](./test_punctuation.js)| Add punctuations to input text using [CT transformer](https://modelscope.cn/models/iic/punc_ct-transformer_zh-cn-common-vocab272727-pytorch/summary). It supports both Chinese and English.| + ## Voice activity detection (VAD) |File| Description| @@ -309,3 +315,13 @@ git clone https://github.com/csukuangfj/sr-data node ./test_speaker_identification.js ``` + +### Add punctuations + +```bash +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/punctuation-models/sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +tar xvf sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 +rm sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12.tar.bz2 + +node ./test_punctuation.js +``` diff --git a/nodejs-addon-examples/test_punctuation.js b/nodejs-addon-examples/test_punctuation.js new file mode 100644 index 00000000..a896ecad --- /dev/null +++ b/nodejs-addon-examples/test_punctuation.js @@ -0,0 +1,32 @@ +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) + +const sherpa_onnx = require('sherpa-onnx-node'); + +// Please download test files from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/punctuation-models +function createPunctuation() { + const config = { + model: { + ctTransformer: + './sherpa-onnx-punct-ct-transformer-zh-en-vocab272727-2024-04-12/model.onnx', + debug: true, + numThreads: 1, + provider: 'cpu', + }, + }; + return new sherpa_onnx.Punctuation(config); +} + +const punct = createPunctuation(); +const sentences = [ + '这是一个测试你好吗How are you我很好thank you are you ok谢谢你', + '我们都是木头人不会说话不会动', + 'The African blogosphere is rapidly expanding bringing more voices online in the form of commentaries opinions analyses rants and poetry', +]; +console.log('---'); +for (let sentence of sentences) { + const punct_text = punct.addPunct(sentence); + console.log(`Input: ${sentence}`); + console.log(`Output: ${punct_text}`); + console.log('---'); +} diff --git a/nodejs-addon-examples/test_spoken_language_identification.js b/nodejs-addon-examples/test_spoken_language_identification.js index f5ea546d..35356940 100644 --- a/nodejs-addon-examples/test_spoken_language_identification.js +++ b/nodejs-addon-examples/test_spoken_language_identification.js @@ -2,6 +2,8 @@ const sherpa_onnx = require('sherpa-onnx-node'); +// Please download whisper multi-lingual models from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models function createSpokenLanguageID() { const config = { whisper: { diff --git a/scripts/node-addon-api/CMakeLists.txt b/scripts/node-addon-api/CMakeLists.txt index 9d03a1ee..17355d28 100644 --- a/scripts/node-addon-api/CMakeLists.txt +++ b/scripts/node-addon-api/CMakeLists.txt @@ -21,6 +21,7 @@ set(srcs src/audio-tagging.cc src/non-streaming-asr.cc src/non-streaming-tts.cc + src/punctuation.cc src/sherpa-onnx-node-addon-api.cc src/speaker-identification.cc src/spoken-language-identification.cc diff --git a/scripts/node-addon-api/lib/punctuation.js b/scripts/node-addon-api/lib/punctuation.js new file mode 100644 index 00000000..07d948a5 --- /dev/null +++ b/scripts/node-addon-api/lib/punctuation.js @@ -0,0 +1,15 @@ +const addon = require('./addon.js'); + +class Punctuation { + constructor(config) { + this.handle = addon.createOfflinePunctuation(config); + this.config = config; + } + addPunct(text) { + return addon.offlinePunctuationAddPunct(this.handle, text); + } +} + +module.exports = { + Punctuation, +} diff --git a/scripts/node-addon-api/lib/sherpa-onnx.js b/scripts/node-addon-api/lib/sherpa-onnx.js index 33e398cf..7dcd2de5 100644 --- a/scripts/node-addon-api/lib/sherpa-onnx.js +++ b/scripts/node-addon-api/lib/sherpa-onnx.js @@ -6,6 +6,7 @@ const vad = require('./vad.js'); const slid = require('./spoken-language-identification.js'); const sid = require('./speaker-identification.js'); const at = require('./audio-tagg.js'); +const punct = require('./punctuation.js'); module.exports = { OnlineRecognizer: streaming_asr.OnlineRecognizer, @@ -20,4 +21,5 @@ module.exports = { SpeakerEmbeddingExtractor: sid.SpeakerEmbeddingExtractor, SpeakerEmbeddingManager: sid.SpeakerEmbeddingManager, AudioTagging: at.AudioTagging, + Punctuation: punct.Punctuation, } diff --git a/scripts/node-addon-api/src/audio-tagging.cc b/scripts/node-addon-api/src/audio-tagging.cc index 1ec6251b..519fe162 100644 --- a/scripts/node-addon-api/src/audio-tagging.cc +++ b/scripts/node-addon-api/src/audio-tagging.cc @@ -166,7 +166,7 @@ static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) { if (!info[1].IsExternal()) { Napi::TypeError::New( - env, "You should pass a offline stream pointer as the second argument") + env, "You should pass an offline stream pointer as the second argument") .ThrowAsJavaScriptException(); return {}; diff --git a/scripts/node-addon-api/src/punctuation.cc b/scripts/node-addon-api/src/punctuation.cc new file mode 100644 index 00000000..df079b96 --- /dev/null +++ b/scripts/node-addon-api/src/punctuation.cc @@ -0,0 +1,135 @@ +// scripts/node-addon-api/src/punctuation.cc +// +// Copyright (c) 2024 Xiaomi Corporation +#include + +#include "macros.h" // NOLINT +#include "napi.h" // NOLINT +#include "sherpa-onnx/c-api/c-api.h" + +static SherpaOnnxOfflinePunctuationModelConfig GetOfflinePunctuationModelConfig( + Napi::Object obj) { + SherpaOnnxOfflinePunctuationModelConfig c; + memset(&c, 0, sizeof(c)); + + if (!obj.Has("model") || !obj.Get("model").IsObject()) { + return c; + } + + Napi::Object o = obj.Get("model").As(); + + SHERPA_ONNX_ASSIGN_ATTR_STR(ct_transformer, ctTransformer); + + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); + + if (o.Has("debug") && + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) { + if (o.Get("debug").IsBoolean()) { + c.debug = o.Get("debug").As().Value(); + } else { + c.debug = o.Get("debug").As().Int32Value(); + } + } + SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider); + + return c; +} + +static Napi::External +CreateOfflinePunctuationWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsObject()) { + Napi::TypeError::New(env, "You should pass an object as the only argument.") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Object o = info[0].As(); + + SherpaOnnxOfflinePunctuationConfig c; + memset(&c, 0, sizeof(c)); + c.model = GetOfflinePunctuationModelConfig(o); + + const SherpaOnnxOfflinePunctuation *punct = + SherpaOnnxCreateOfflinePunctuation(&c); + + if (c.model.ct_transformer) { + delete[] c.model.ct_transformer; + } + + if (c.model.provider) { + delete[] c.model.provider; + } + + if (!punct) { + Napi::TypeError::New(env, "Please check your config!") + .ThrowAsJavaScriptException(); + + return {}; + } + + return Napi::External::New( + env, const_cast(punct), + [](Napi::Env env, SherpaOnnxOfflinePunctuation *punct) { + SherpaOnnxDestroyOfflinePunctuation(punct); + }); +} + +static Napi::String OfflinePunctuationAddPunctWraper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New( + env, + "You should pass an offline punctuation pointer as the first argument") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[1].IsString()) { + Napi::TypeError::New(env, "You should pass a string as the second argument") + .ThrowAsJavaScriptException(); + + return {}; + } + + SherpaOnnxOfflinePunctuation *punct = + info[0].As>().Data(); + Napi::String js_text = info[1].As(); + std::string text = js_text.Utf8Value(); + + const char *punct_text = + SherpaOfflinePunctuationAddPunct(punct, text.c_str()); + + Napi::String ans = Napi::String::New(env, punct_text); + SherpaOfflinePunctuationFreeText(punct_text); + return ans; +} + +void InitPunctuation(Napi::Env env, Napi::Object exports) { + exports.Set(Napi::String::New(env, "createOfflinePunctuation"), + Napi::Function::New(env, CreateOfflinePunctuationWrapper)); + + exports.Set(Napi::String::New(env, "offlinePunctuationAddPunct"), + Napi::Function::New(env, OfflinePunctuationAddPunctWraper)); +} diff --git a/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc b/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc index d353eed2..99fc5d8c 100644 --- a/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc +++ b/scripts/node-addon-api/src/sherpa-onnx-node-addon-api.cc @@ -21,6 +21,8 @@ void InitSpeakerID(Napi::Env env, Napi::Object exports); void InitAudioTagging(Napi::Env env, Napi::Object exports); +void InitPunctuation(Napi::Env env, Napi::Object exports); + Napi::Object Init(Napi::Env env, Napi::Object exports) { InitStreamingAsr(env, exports); InitNonStreamingAsr(env, exports); @@ -31,6 +33,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { InitSpokenLanguageID(env, exports); InitSpeakerID(env, exports); InitAudioTagging(env, exports); + InitPunctuation(env, exports); return exports; }