diff --git a/.github/scripts/test-nodejs-addon-npm.sh b/.github/scripts/test-nodejs-addon-npm.sh index 37f05508..bcb74ee8 100755 --- a/.github/scripts/test-nodejs-addon-npm.sh +++ b/.github/scripts/test-nodejs-addon-npm.sh @@ -10,6 +10,15 @@ arch=$(node -p "require('os').arch()") platform=$(node -p "require('os').platform()") node_version=$(node -p "process.versions.node.split('.')[0]") +echo "----------non-streaming speech denoiser----------" + +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx +curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav + +node ./test_offline_speech_enhancement_gtcrn.js +rm gtcrn_simple.onnx +ls -lh *.wav + echo "----------non-streaming asr FireRedAsr----------" curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 tar xvf sherpa-onnx-fire-red-asr-large-zh_en-2025-02-16.tar.bz2 diff --git a/flutter/sherpa_onnx/example/example.md b/flutter/sherpa_onnx/example/example.md index 265cbd2d..67e5e576 100644 --- a/flutter/sherpa_onnx/example/example.md +++ b/flutter/sherpa_onnx/example/example.md @@ -22,5 +22,5 @@ Hint: All of the following functions can be used in Flutter, even if some of the |Speaker identification and verification| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speaker-identification)| macOS, Windows, Linux| |Audio tagging| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/audio-tagging)| macOS, Windows, Linux| |Keyword spotter| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/keyword-spotter)| macOS, Windows, Linux| -|Add punctuions| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/add-punctuations)| macOS, Windows, Linux| +|Add punctuations| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/add-punctuations)| macOS, Windows, Linux| |Speech enhancement/denoising| [Address](https://github.com/k2-fsa/sherpa-onnx/tree/master/dart-api-examples/speech-enhancement-gtcrn)| macOS, Windows, Linux| diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt index 26dda178..298f9d38 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/CMakeLists.txt @@ -44,6 +44,7 @@ add_library(sherpa_onnx SHARED keyword-spotting.cc non-streaming-asr.cc non-streaming-speaker-diarization.cc + non-streaming-speech-denoiser.cc non-streaming-tts.cc punctuation.cc sherpa-onnx-node-addon-api.cc diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc index f4d6ac53..249ea9d8 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/audio-tagging.cc @@ -121,7 +121,7 @@ AudioTaggingCreateOfflineStreamWrapper(const Napi::CallbackInfo &info) { return {}; } - SherpaOnnxAudioTagging *at = + const SherpaOnnxAudioTagging *at = info[0].As>().Data(); const SherpaOnnxOfflineStream *stream = @@ -169,10 +169,10 @@ static Napi::Object AudioTaggingComputeWrapper(const Napi::CallbackInfo &info) { return {}; } - SherpaOnnxAudioTagging *at = + const SherpaOnnxAudioTagging *at = info[0].As>().Data(); - SherpaOnnxOfflineStream *stream = + const SherpaOnnxOfflineStream *stream = info[1].As>().Data(); int32_t top_k = info[2].As().Int32Value(); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc index 6ede4893..af14bdc1 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-asr.cc @@ -341,7 +341,7 @@ static Napi::External CreateOfflineStreamWrapper( return {}; } - SherpaOnnxOfflineRecognizer *recognizer = + const SherpaOnnxOfflineRecognizer *recognizer = info[0].As>().Data(); const SherpaOnnxOfflineStream *stream = @@ -373,7 +373,7 @@ static void AcceptWaveformOfflineWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxOfflineStream *stream = + const SherpaOnnxOfflineStream *stream = info[0].As>().Data(); if (!info[1].IsObject()) { @@ -454,10 +454,10 @@ static void DecodeOfflineStreamWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxOfflineRecognizer *recognizer = + const SherpaOnnxOfflineRecognizer *recognizer = info[0].As>().Data(); - SherpaOnnxOfflineStream *stream = + const SherpaOnnxOfflineStream *stream = info[1].As>().Data(); SherpaOnnxDecodeOfflineStream(recognizer, stream); @@ -482,7 +482,7 @@ static Napi::String GetOfflineStreamResultAsJsonWrapper( return {}; } - SherpaOnnxOfflineStream *stream = + const SherpaOnnxOfflineStream *stream = info[0].As>().Data(); const char *json = SherpaOnnxGetOfflineStreamResultAsJson(stream); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speech-denoiser.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speech-denoiser.cc new file mode 100644 index 00000000..5a847fec --- /dev/null +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speech-denoiser.cc @@ -0,0 +1,278 @@ +// scripts/node-addon-api/src/non-streaming-speech-denoiser.cc +// +// Copyright (c) 2025 Xiaomi Corporation +#include + +#include "macros.h" // NOLINT +#include "napi.h" // NOLINT +#include "sherpa-onnx/c-api/c-api.h" + +static SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig +GetOfflineSpeechDenoiserGtcrnModelConfig(Napi::Object obj) { + SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig c; + memset(&c, 0, sizeof(c)); + + if (!obj.Has("gtcrn") || !obj.Get("gtcrn").IsObject()) { + return c; + } + + Napi::Object o = obj.Get("gtcrn").As(); + + SHERPA_ONNX_ASSIGN_ATTR_STR(model, model); + + return c; +} + +static SherpaOnnxOfflineSpeechDenoiserModelConfig +GetOfflineSpeechDenoiserModelConfig(Napi::Object obj) { + SherpaOnnxOfflineSpeechDenoiserModelConfig c; + memset(&c, 0, sizeof(c)); + + if (!obj.Has("model") || !obj.Get("model").IsObject()) { + return c; + } + + Napi::Object o = obj.Get("model").As(); + + c.gtcrn = GetOfflineSpeechDenoiserGtcrnModelConfig(o); + + SHERPA_ONNX_ASSIGN_ATTR_INT32(num_threads, numThreads); + + if (o.Has("debug") && + (o.Get("debug").IsNumber() || o.Get("debug").IsBoolean())) { + if (o.Get("debug").IsBoolean()) { + c.debug = o.Get("debug").As().Value(); + } else { + c.debug = o.Get("debug").As().Int32Value(); + } + } + + SHERPA_ONNX_ASSIGN_ATTR_STR(provider, provider); + + return c; +} + +static Napi::External +CreateOfflineSpeechDenoiserWrapper(const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); +#if __OHOS__ + // the last argument is the NativeResourceManager + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } +#else + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } +#endif + + if (!info[0].IsObject()) { + Napi::TypeError::New(env, "Expect an object as the argument") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Object o = info[0].As(); + + SherpaOnnxOfflineSpeechDenoiserConfig c; + memset(&c, 0, sizeof(c)); + c.model = GetOfflineSpeechDenoiserModelConfig(o); + +#if __OHOS__ + std::unique_ptr + mgr(OH_ResourceManager_InitNativeResourceManager(env, info[1]), + &OH_ResourceManager_ReleaseNativeResourceManager); + + const SherpaOnnxOfflineSpeechDenoiser *sd = + SherpaOnnxCreateOfflineSpeechDenoiserOHOS(&c, mgr.get()); +#else + const SherpaOnnxOfflineSpeechDenoiser *sd = + SherpaOnnxCreateOfflineSpeechDenoiser(&c); +#endif + + SHERPA_ONNX_DELETE_C_STR(c.model.gtcrn.model); + SHERPA_ONNX_DELETE_C_STR(c.model.provider); + + if (!sd) { + Napi::TypeError::New(env, "Please check your config!") + .ThrowAsJavaScriptException(); + + return {}; + } + + return Napi::External::New( + env, const_cast(sd), + [](Napi::Env env, SherpaOnnxOfflineSpeechDenoiser *sd) { + SherpaOnnxDestroyOfflineSpeechDenoiser(sd); + }); +} + +static Napi::Object OfflineSpeechDenoiserRunWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 2) { + std::ostringstream os; + os << "Expect only 2 arguments. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New( + env, "Argument 0 should be an offline speech denoiser pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + const SherpaOnnxOfflineSpeechDenoiser *sd = + info[0].As>().Data(); + + if (!info[1].IsObject()) { + Napi::TypeError::New(env, "Argument 1 should be an object") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Object obj = info[1].As(); + + if (!obj.Has("samples")) { + Napi::TypeError::New(env, "The argument object should have a field samples") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("samples").IsTypedArray()) { + Napi::TypeError::New(env, "The object['samples'] should be a typed array") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Has("sampleRate")) { + Napi::TypeError::New(env, + "The argument object should have a field sampleRate") + .ThrowAsJavaScriptException(); + + return {}; + } + + if (!obj.Get("sampleRate").IsNumber()) { + Napi::TypeError::New(env, "The object['samples'] should be a number") + .ThrowAsJavaScriptException(); + + return {}; + } + + Napi::Float32Array samples = obj.Get("samples").As(); + int32_t sample_rate = obj.Get("sampleRate").As().Int32Value(); + + const SherpaOnnxDenoisedAudio *audio; + +#if __OHOS__ + // Note(fangjun): For unknown reasons on HarmonyOS, we need to divide it by + // sizeof(float) here + audio = SherpaOnnxOfflineSpeechDenoiserRun( + sd, samples.Data(), samples.ElementLength() / sizeof(float), sample_rate); +#else + audio = SherpaOnnxOfflineSpeechDenoiserRun( + sd, samples.Data(), samples.ElementLength(), sample_rate); +#endif + + bool enable_external_buffer = true; + if (obj.Has("enableExternalBuffer") && + obj.Get("enableExternalBuffer").IsBoolean()) { + enable_external_buffer = + obj.Get("enableExternalBuffer").As().Value(); + } + + if (enable_external_buffer) { + Napi::ArrayBuffer arrayBuffer = Napi::ArrayBuffer::New( + env, const_cast(audio->samples), sizeof(float) * audio->n, + [](Napi::Env /*env*/, void * /*data*/, + const SherpaOnnxDenoisedAudio *hint) { + SherpaOnnxDestroyDenoisedAudio(hint); + }, + audio); + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, audio->n, arrayBuffer, 0); + + Napi::Object ans = Napi::Object::New(env); + ans.Set(Napi::String::New(env, "samples"), float32Array); + ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate); + return ans; + } else { + // don't use external buffer + Napi::ArrayBuffer arrayBuffer = + Napi::ArrayBuffer::New(env, sizeof(float) * audio->n); + + Napi::Float32Array float32Array = + Napi::Float32Array::New(env, audio->n, arrayBuffer, 0); + + std::copy(audio->samples, audio->samples + audio->n, float32Array.Data()); + + Napi::Object ans = Napi::Object::New(env); + ans.Set(Napi::String::New(env, "samples"), float32Array); + ans.Set(Napi::String::New(env, "sampleRate"), audio->sample_rate); + SherpaOnnxDestroyDenoisedAudio(audio); + return ans; + } +} + +static Napi::Number OfflineSpeechDenoiserGetSampleRateWrapper( + const Napi::CallbackInfo &info) { + Napi::Env env = info.Env(); + + if (info.Length() != 1) { + std::ostringstream os; + os << "Expect only 1 argument. Given: " << info.Length(); + + Napi::TypeError::New(env, os.str()).ThrowAsJavaScriptException(); + + return {}; + } + + if (!info[0].IsExternal()) { + Napi::TypeError::New( + env, "Argument 0 should be an offline speech denoiser pointer.") + .ThrowAsJavaScriptException(); + + return {}; + } + + const SherpaOnnxOfflineSpeechDenoiser *sd = + info[0].As>().Data(); + + int32_t sample_rate = SherpaOnnxOfflineSpeechDenoiserGetSampleRate(sd); + + return Napi::Number::New(env, sample_rate); +} + +void InitNonStreamingSpeechDenoiser(Napi::Env env, Napi::Object exports) { + exports.Set(Napi::String::New(env, "createOfflineSpeechDenoiser"), + Napi::Function::New(env, CreateOfflineSpeechDenoiserWrapper)); + + exports.Set(Napi::String::New(env, "offlineSpeechDenoiserRunWrapper"), + Napi::Function::New(env, OfflineSpeechDenoiserRunWrapper)); + + exports.Set( + Napi::String::New(env, "offlineSpeechDenoiserGetSampleRateWrapper"), + Napi::Function::New(env, OfflineSpeechDenoiserGetSampleRateWrapper)); +} diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc index 145e7b0d..62b0422d 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-tts.cc @@ -217,7 +217,7 @@ static Napi::Number OfflineTtsSampleRateWrapper( return {}; } - SherpaOnnxOfflineTts *tts = + const SherpaOnnxOfflineTts *tts = info[0].As>().Data(); int32_t sample_rate = SherpaOnnxOfflineTtsSampleRate(tts); @@ -245,7 +245,7 @@ static Napi::Number OfflineTtsNumSpeakersWrapper( return {}; } - SherpaOnnxOfflineTts *tts = + const SherpaOnnxOfflineTts *tts = info[0].As>().Data(); int32_t num_speakers = SherpaOnnxOfflineTtsNumSpeakers(tts); @@ -273,7 +273,7 @@ static Napi::Object OfflineTtsGenerateWrapper(const Napi::CallbackInfo &info) { return {}; } - SherpaOnnxOfflineTts *tts = + const SherpaOnnxOfflineTts *tts = info[0].As>().Data(); if (!info[1].IsObject()) { @@ -418,9 +418,9 @@ using TSFN = Napi::TypedThreadSafeFunction, class TtsGenerateWorker : public Napi::AsyncWorker { public: - TtsGenerateWorker(const Napi::Env &env, TSFN tsfn, SherpaOnnxOfflineTts *tts, - const std::string &text, float speed, int32_t sid, - bool use_external_buffer) + TtsGenerateWorker(const Napi::Env &env, TSFN tsfn, + const SherpaOnnxOfflineTts *tts, const std::string &text, + float speed, int32_t sid, bool use_external_buffer) : tsfn_(tsfn), Napi::AsyncWorker{env, "TtsGenerateWorker"}, deferred_(env), @@ -506,7 +506,7 @@ class TtsGenerateWorker : public Napi::AsyncWorker { private: TSFN tsfn_; Napi::Promise::Deferred deferred_; - SherpaOnnxOfflineTts *tts_; + const SherpaOnnxOfflineTts *tts_; std::string text_; float speed_; int32_t sid_; @@ -537,7 +537,7 @@ static Napi::Object OfflineTtsGenerateAsyncWrapper( return {}; } - SherpaOnnxOfflineTts *tts = + const SherpaOnnxOfflineTts *tts = info[0].As>().Data(); if (!info[1].IsObject()) { diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/punctuation.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/punctuation.cc index df4c920f..eadf8adb 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/punctuation.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/punctuation.cc @@ -108,7 +108,7 @@ static Napi::String OfflinePunctuationAddPunctWraper( return {}; } - SherpaOnnxOfflinePunctuation *punct = + const SherpaOnnxOfflinePunctuation *punct = info[0].As>().Data(); Napi::String js_text = info[1].As(); std::string text = js_text.Utf8Value(); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc index 54f0350f..d9f5dae5 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/sherpa-onnx-node-addon-api.cc @@ -27,6 +27,8 @@ void InitKeywordSpotting(Napi::Env env, Napi::Object exports); void InitNonStreamingSpeakerDiarization(Napi::Env env, Napi::Object exports); +void InitNonStreamingSpeechDenoiser(Napi::Env env, Napi::Object exports); + #if __OHOS__ void InitUtils(Napi::Env env, Napi::Object exports); #endif @@ -44,6 +46,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports) { InitPunctuation(env, exports); InitKeywordSpotting(env, exports); InitNonStreamingSpeakerDiarization(env, exports); + InitNonStreamingSpeechDenoiser(env, exports); #if __OHOS__ InitUtils(env, exports); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc index 5a1ee558..9aaf0c6c 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/speaker-identification.cc @@ -108,7 +108,7 @@ static Napi::Number SpeakerEmbeddingExtractorDimWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingExtractor *extractor = + const SherpaOnnxSpeakerEmbeddingExtractor *extractor = info[0].As>().Data(); int32_t dim = SherpaOnnxSpeakerEmbeddingExtractorDim(extractor); @@ -137,7 +137,7 @@ SpeakerEmbeddingExtractorCreateStreamWrapper(const Napi::CallbackInfo &info) { return {}; } - SherpaOnnxSpeakerEmbeddingExtractor *extractor = + const SherpaOnnxSpeakerEmbeddingExtractor *extractor = info[0].As>().Data(); const SherpaOnnxOnlineStream *stream = @@ -177,10 +177,10 @@ static Napi::Boolean SpeakerEmbeddingExtractorIsReadyWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingExtractor *extractor = + const SherpaOnnxSpeakerEmbeddingExtractor *extractor = info[0].As>().Data(); - SherpaOnnxOnlineStream *stream = + const SherpaOnnxOnlineStream *stream = info[1].As>().Data(); int32_t is_ready = @@ -226,10 +226,10 @@ static Napi::Float32Array SpeakerEmbeddingExtractorComputeEmbeddingWrapper( } } - SherpaOnnxSpeakerEmbeddingExtractor *extractor = + const SherpaOnnxSpeakerEmbeddingExtractor *extractor = info[0].As>().Data(); - SherpaOnnxOnlineStream *stream = + const SherpaOnnxOnlineStream *stream = info[1].As>().Data(); const float *v = @@ -329,7 +329,7 @@ static Napi::Boolean SpeakerEmbeddingManagerAddWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingManager *manager = + const SherpaOnnxSpeakerEmbeddingManager *manager = info[0].As>().Data(); Napi::Object obj = info[1].As(); @@ -399,7 +399,7 @@ static Napi::Boolean SpeakerEmbeddingManagerAddListFlattenedWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingManager *manager = + const SherpaOnnxSpeakerEmbeddingManager *manager = info[0].As>().Data(); Napi::Object obj = info[1].As(); @@ -486,7 +486,7 @@ static Napi::Boolean SpeakerEmbeddingManagerRemoveWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingManager *manager = + const SherpaOnnxSpeakerEmbeddingManager *manager = info[0].As>().Data(); Napi::String js_name = info[1].As(); @@ -525,7 +525,7 @@ static Napi::String SpeakerEmbeddingManagerSearchWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingManager *manager = + const SherpaOnnxSpeakerEmbeddingManager *manager = info[0].As>().Data(); Napi::Object obj = info[1].As(); @@ -603,7 +603,7 @@ static Napi::Boolean SpeakerEmbeddingManagerVerifyWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingManager *manager = + const SherpaOnnxSpeakerEmbeddingManager *manager = info[0].As>().Data(); Napi::Object obj = info[1].As(); @@ -691,7 +691,7 @@ static Napi::Boolean SpeakerEmbeddingManagerContainsWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingManager *manager = + const SherpaOnnxSpeakerEmbeddingManager *manager = info[0].As>().Data(); Napi::String js_name = info[1].As(); @@ -724,7 +724,7 @@ static Napi::Number SpeakerEmbeddingManagerNumSpeakersWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingManager *manager = + const SherpaOnnxSpeakerEmbeddingManager *manager = info[0].As>().Data(); int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager); @@ -753,7 +753,7 @@ static Napi::Array SpeakerEmbeddingManagerGetAllSpeakersWrapper( return {}; } - SherpaOnnxSpeakerEmbeddingManager *manager = + const SherpaOnnxSpeakerEmbeddingManager *manager = info[0].As>().Data(); int32_t num_speakers = SherpaOnnxSpeakerEmbeddingManagerNumSpeakers(manager); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/spoken-language-identification.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/spoken-language-identification.cc index c40e838b..0d77139f 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/spoken-language-identification.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/spoken-language-identification.cc @@ -106,16 +106,17 @@ SpokenLanguageIdentificationCreateOfflineStreamWrapper( return {}; } - SherpaOnnxSpokenLanguageIdentification *slid = + const SherpaOnnxSpokenLanguageIdentification *slid = info[0] .As>() .Data(); - SherpaOnnxOfflineStream *stream = + const SherpaOnnxOfflineStream *stream = SherpaOnnxSpokenLanguageIdentificationCreateOfflineStream(slid); return Napi::External::New( - env, stream, [](Napi::Env env, SherpaOnnxOfflineStream *stream) { + env, const_cast(stream), + [](Napi::Env env, SherpaOnnxOfflineStream *stream) { SherpaOnnxDestroyOfflineStream(stream); }); } @@ -147,12 +148,12 @@ static Napi::String SpokenLanguageIdentificationComputeWrapper( return {}; } - SherpaOnnxSpokenLanguageIdentification *slid = + const SherpaOnnxSpokenLanguageIdentification *slid = info[0] .As>() .Data(); - SherpaOnnxOfflineStream *stream = + const SherpaOnnxOfflineStream *stream = info[1].As>().Data(); const SherpaOnnxSpokenLanguageIdentificationResult *r = diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc index fce28358..5bb0a408 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/streaming-asr.cc @@ -278,7 +278,7 @@ static Napi::External CreateOnlineStreamWrapper( return {}; } - SherpaOnnxOnlineRecognizer *recognizer = + const SherpaOnnxOnlineRecognizer *recognizer = info[0].As>().Data(); const SherpaOnnxOnlineStream *stream = @@ -310,7 +310,7 @@ static void AcceptWaveformWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxOnlineStream *stream = + const SherpaOnnxOnlineStream *stream = info[0].As>().Data(); if (!info[1].IsObject()) { @@ -390,10 +390,10 @@ static Napi::Boolean IsOnlineStreamReadyWrapper( return {}; } - SherpaOnnxOnlineRecognizer *recognizer = + const SherpaOnnxOnlineRecognizer *recognizer = info[0].As>().Data(); - SherpaOnnxOnlineStream *stream = + const SherpaOnnxOnlineStream *stream = info[1].As>().Data(); int32_t is_ready = SherpaOnnxIsOnlineStreamReady(recognizer, stream); @@ -427,10 +427,10 @@ static void DecodeOnlineStreamWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxOnlineRecognizer *recognizer = + const SherpaOnnxOnlineRecognizer *recognizer = info[0].As>().Data(); - SherpaOnnxOnlineStream *stream = + const SherpaOnnxOnlineStream *stream = info[1].As>().Data(); SherpaOnnxDecodeOnlineStream(recognizer, stream); @@ -463,10 +463,10 @@ static Napi::String GetOnlineStreamResultAsJsonWrapper( return {}; } - SherpaOnnxOnlineRecognizer *recognizer = + const SherpaOnnxOnlineRecognizer *recognizer = info[0].As>().Data(); - SherpaOnnxOnlineStream *stream = + const SherpaOnnxOnlineStream *stream = info[1].As>().Data(); const char *json = SherpaOnnxGetOnlineStreamResultAsJson(recognizer, stream); @@ -496,7 +496,7 @@ static void InputFinishedWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxOnlineStream *stream = + const SherpaOnnxOnlineStream *stream = info[0].As>().Data(); SherpaOnnxOnlineStreamInputFinished(stream); @@ -528,10 +528,10 @@ static void ResetOnlineStreamWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxOnlineRecognizer *recognizer = + const SherpaOnnxOnlineRecognizer *recognizer = info[0].As>().Data(); - SherpaOnnxOnlineStream *stream = + const SherpaOnnxOnlineStream *stream = info[1].As>().Data(); SherpaOnnxOnlineStreamReset(recognizer, stream); @@ -563,10 +563,10 @@ static Napi::Boolean IsEndpointWrapper(const Napi::CallbackInfo &info) { return {}; } - SherpaOnnxOnlineRecognizer *recognizer = + const SherpaOnnxOnlineRecognizer *recognizer = info[0].As>().Data(); - SherpaOnnxOnlineStream *stream = + const SherpaOnnxOnlineStream *stream = info[1].As>().Data(); int32_t is_endpoint = SherpaOnnxOnlineStreamIsEndpoint(recognizer, stream); @@ -636,7 +636,7 @@ static void PrintWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxDisplay *display = + const SherpaOnnxDisplay *display = info[0].As>().Data(); int32_t idx = info[1].As().Int32Value(); diff --git a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc index 81eed7c8..b16b5a72 100644 --- a/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc +++ b/harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/vad.cc @@ -28,11 +28,12 @@ static Napi::External CreateCircularBufferWrapper( return {}; } - SherpaOnnxCircularBuffer *buf = + const SherpaOnnxCircularBuffer *buf = SherpaOnnxCreateCircularBuffer(info[0].As().Int32Value()); return Napi::External::New( - env, buf, [](Napi::Env env, SherpaOnnxCircularBuffer *p) { + env, const_cast(buf), + [](Napi::Env env, SherpaOnnxCircularBuffer *p) { SherpaOnnxDestroyCircularBuffer(p); }); } @@ -56,7 +57,7 @@ static void CircularBufferPushWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxCircularBuffer *buf = + const SherpaOnnxCircularBuffer *buf = info[0].As>().Data(); if (!info[1].IsTypedArray()) { @@ -101,7 +102,7 @@ static Napi::Float32Array CircularBufferGetWrapper( return {}; } - SherpaOnnxCircularBuffer *buf = + const SherpaOnnxCircularBuffer *buf = info[0].As>().Data(); if (!info[1].IsNumber()) { @@ -179,7 +180,7 @@ static void CircularBufferPopWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxCircularBuffer *buf = + const SherpaOnnxCircularBuffer *buf = info[0].As>().Data(); if (!info[1].IsNumber()) { @@ -213,7 +214,7 @@ static Napi::Number CircularBufferSizeWrapper(const Napi::CallbackInfo &info) { return {}; } - SherpaOnnxCircularBuffer *buf = + const SherpaOnnxCircularBuffer *buf = info[0].As>().Data(); int32_t size = SherpaOnnxCircularBufferSize(buf); @@ -240,7 +241,7 @@ static Napi::Number CircularBufferHeadWrapper(const Napi::CallbackInfo &info) { return {}; } - SherpaOnnxCircularBuffer *buf = + const SherpaOnnxCircularBuffer *buf = info[0].As>().Data(); int32_t size = SherpaOnnxCircularBufferHead(buf); @@ -267,7 +268,7 @@ static void CircularBufferResetWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxCircularBuffer *buf = + const SherpaOnnxCircularBuffer *buf = info[0].As>().Data(); SherpaOnnxCircularBufferReset(buf); @@ -360,18 +361,19 @@ CreateVoiceActivityDetectorWrapper(const Napi::CallbackInfo &info) { mgr(OH_ResourceManager_InitNativeResourceManager(env, info[2]), &OH_ResourceManager_ReleaseNativeResourceManager); - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = SherpaOnnxCreateVoiceActivityDetectorOHOS(&c, buffer_size_in_seconds, mgr.get()); #else - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = SherpaOnnxCreateVoiceActivityDetector(&c, buffer_size_in_seconds); #endif SHERPA_ONNX_DELETE_C_STR(c.silero_vad.model); SHERPA_ONNX_DELETE_C_STR(c.provider); return Napi::External::New( - env, vad, [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) { + env, const_cast(vad), + [](Napi::Env env, SherpaOnnxVoiceActivityDetector *p) { SherpaOnnxDestroyVoiceActivityDetector(p); }); } @@ -396,7 +398,7 @@ static void VoiceActivityDetectorAcceptWaveformWrapper( return; } - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = info[0].As>().Data(); if (!info[1].IsTypedArray()) { @@ -440,7 +442,7 @@ static Napi::Boolean VoiceActivityDetectorEmptyWrapper( return {}; } - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = info[0].As>().Data(); int32_t is_empty = SherpaOnnxVoiceActivityDetectorEmpty(vad); @@ -468,7 +470,7 @@ static Napi::Boolean VoiceActivityDetectorDetectedWrapper( return {}; } - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = info[0].As>().Data(); int32_t is_detected = SherpaOnnxVoiceActivityDetectorDetected(vad); @@ -495,7 +497,7 @@ static void VoiceActivityDetectorPopWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = info[0].As>().Data(); SherpaOnnxVoiceActivityDetectorPop(vad); @@ -520,7 +522,7 @@ static void VoiceActivityDetectorClearWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = info[0].As>().Data(); SherpaOnnxVoiceActivityDetectorClear(vad); @@ -556,7 +558,7 @@ static Napi::Object VoiceActivityDetectorFrontWrapper( } } - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = info[0].As>().Data(); const SherpaOnnxSpeechSegment *segment = @@ -618,7 +620,7 @@ static void VoiceActivityDetectorResetWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = info[0].As>().Data(); SherpaOnnxVoiceActivityDetectorReset(vad); @@ -643,7 +645,7 @@ static void VoiceActivityDetectorFlushWrapper(const Napi::CallbackInfo &info) { return; } - SherpaOnnxVoiceActivityDetector *vad = + const SherpaOnnxVoiceActivityDetector *vad = info[0].As>().Data(); SherpaOnnxVoiceActivityDetectorFlush(vad); diff --git a/nodejs-addon-examples/README.md b/nodejs-addon-examples/README.md index 3054e6a1..0c15ec13 100644 --- a/nodejs-addon-examples/README.md +++ b/nodejs-addon-examples/README.md @@ -43,6 +43,12 @@ export LD_LIBRARY_PATH=$PWD/node_modules/.pnpm/sherpa-onnx-node@} + */ + run(obj) { + return addon.offlineSpeechDenoiserRunWrapper(this.handle, obj); + } +} + +module.exports = { + OfflineSpeechDenoiser, +} diff --git a/scripts/node-addon-api/lib/sherpa-onnx.js b/scripts/node-addon-api/lib/sherpa-onnx.js index 76dffd62..0ad3afe0 100644 --- a/scripts/node-addon-api/lib/sherpa-onnx.js +++ b/scripts/node-addon-api/lib/sherpa-onnx.js @@ -9,6 +9,7 @@ const at = require('./audio-tagg.js'); const punct = require('./punctuation.js'); const kws = require('./keyword-spotter.js'); const sd = require('./non-streaming-speaker-diarization.js'); +const speech_denoiser = require('./non-streaming-speech-denoiser.js'); module.exports = { OnlineRecognizer: streaming_asr.OnlineRecognizer, @@ -26,4 +27,5 @@ module.exports = { Punctuation: punct.Punctuation, KeywordSpotter: kws.KeywordSpotter, OfflineSpeakerDiarization: sd.OfflineSpeakerDiarization, + OfflineSpeechDenoiser: speech_denoiser.OfflineSpeechDenoiser, } diff --git a/scripts/node-addon-api/src/non-streaming-speech-denoiser.cc b/scripts/node-addon-api/src/non-streaming-speech-denoiser.cc new file mode 120000 index 00000000..6c90a07c --- /dev/null +++ b/scripts/node-addon-api/src/non-streaming-speech-denoiser.cc @@ -0,0 +1 @@ +../../../harmony-os/SherpaOnnxHar/sherpa_onnx/src/main/cpp/non-streaming-speech-denoiser.cc \ No newline at end of file diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 5dc60802..ec2a9374 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -922,22 +922,23 @@ struct SherpaOnnxCircularBuffer { std::unique_ptr impl; }; -SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer(int32_t capacity) { +const SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer( + int32_t capacity) { SherpaOnnxCircularBuffer *buffer = new SherpaOnnxCircularBuffer; buffer->impl = std::make_unique(capacity); return buffer; } -void SherpaOnnxDestroyCircularBuffer(SherpaOnnxCircularBuffer *buffer) { +void SherpaOnnxDestroyCircularBuffer(const SherpaOnnxCircularBuffer *buffer) { delete buffer; } -void SherpaOnnxCircularBufferPush(SherpaOnnxCircularBuffer *buffer, +void SherpaOnnxCircularBufferPush(const SherpaOnnxCircularBuffer *buffer, const float *p, int32_t n) { buffer->impl->Push(p, n); } -const float *SherpaOnnxCircularBufferGet(SherpaOnnxCircularBuffer *buffer, +const float *SherpaOnnxCircularBufferGet(const SherpaOnnxCircularBuffer *buffer, int32_t start_index, int32_t n) { std::vector v = buffer->impl->Get(start_index, n); @@ -948,19 +949,20 @@ const float *SherpaOnnxCircularBufferGet(SherpaOnnxCircularBuffer *buffer, void SherpaOnnxCircularBufferFree(const float *p) { delete[] p; } -void SherpaOnnxCircularBufferPop(SherpaOnnxCircularBuffer *buffer, int32_t n) { +void SherpaOnnxCircularBufferPop(const SherpaOnnxCircularBuffer *buffer, + int32_t n) { buffer->impl->Pop(n); } -int32_t SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer) { +int32_t SherpaOnnxCircularBufferSize(const SherpaOnnxCircularBuffer *buffer) { return buffer->impl->Size(); } -int32_t SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer) { +int32_t SherpaOnnxCircularBufferHead(const SherpaOnnxCircularBuffer *buffer) { return buffer->impl->Head(); } -void SherpaOnnxCircularBufferReset(SherpaOnnxCircularBuffer *buffer) { +void SherpaOnnxCircularBufferReset(const SherpaOnnxCircularBuffer *buffer) { buffer->impl->Reset(); } @@ -1008,7 +1010,7 @@ sherpa_onnx::VadModelConfig GetVadModelConfig( return vad_config; } -SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( +const SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds) { auto vad_config = GetVadModelConfig(config); @@ -1025,35 +1027,37 @@ SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetector( } void SherpaOnnxDestroyVoiceActivityDetector( - SherpaOnnxVoiceActivityDetector *p) { + const SherpaOnnxVoiceActivityDetector *p) { delete p; } void SherpaOnnxVoiceActivityDetectorAcceptWaveform( - SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n) { + const SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n) { p->impl->AcceptWaveform(samples, n); } int32_t SherpaOnnxVoiceActivityDetectorEmpty( - SherpaOnnxVoiceActivityDetector *p) { + const SherpaOnnxVoiceActivityDetector *p) { return p->impl->Empty(); } int32_t SherpaOnnxVoiceActivityDetectorDetected( - SherpaOnnxVoiceActivityDetector *p) { + const SherpaOnnxVoiceActivityDetector *p) { return p->impl->IsSpeechDetected(); } -void SherpaOnnxVoiceActivityDetectorPop(SherpaOnnxVoiceActivityDetector *p) { +void SherpaOnnxVoiceActivityDetectorPop( + const SherpaOnnxVoiceActivityDetector *p) { p->impl->Pop(); } -void SherpaOnnxVoiceActivityDetectorClear(SherpaOnnxVoiceActivityDetector *p) { +void SherpaOnnxVoiceActivityDetectorClear( + const SherpaOnnxVoiceActivityDetector *p) { p->impl->Clear(); } const SherpaOnnxSpeechSegment *SherpaOnnxVoiceActivityDetectorFront( - SherpaOnnxVoiceActivityDetector *p) { + const SherpaOnnxVoiceActivityDetector *p) { const sherpa_onnx::SpeechSegment &segment = p->impl->Front(); SherpaOnnxSpeechSegment *ans = new SherpaOnnxSpeechSegment; @@ -1072,11 +1076,13 @@ void SherpaOnnxDestroySpeechSegment(const SherpaOnnxSpeechSegment *p) { } } -void SherpaOnnxVoiceActivityDetectorReset(SherpaOnnxVoiceActivityDetector *p) { +void SherpaOnnxVoiceActivityDetectorReset( + const SherpaOnnxVoiceActivityDetector *p) { p->impl->Reset(); } -void SherpaOnnxVoiceActivityDetectorFlush(SherpaOnnxVoiceActivityDetector *p) { +void SherpaOnnxVoiceActivityDetectorFlush( + const SherpaOnnxVoiceActivityDetector *p) { p->impl->Flush(); } @@ -1915,7 +1921,7 @@ struct SherpaOnnxLinearResampler { std::unique_ptr impl; }; -SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler( +const SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler( int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz, int32_t num_zeros) { SherpaOnnxLinearResampler *p = new SherpaOnnxLinearResampler; @@ -1925,12 +1931,12 @@ SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler( return p; } -void SherpaOnnxDestroyLinearResampler(SherpaOnnxLinearResampler *p) { +void SherpaOnnxDestroyLinearResampler(const SherpaOnnxLinearResampler *p) { delete p; } const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample( - SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, + const SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, int32_t flush) { std::vector o; p->impl->Resample(input, input_dim, flush, &o); @@ -2320,7 +2326,7 @@ const SherpaOnnxOfflineSpeechDenoiser * SherpaOnnxCreateOfflineSpeechDenoiserOHOS( const SherpaOnnxOfflineSpeechDenoiserConfig *config, NativeResourceManager *mgr) { - auto sd_config = GetOfflineSpeechDenoiserConfia(config); + auto sd_config = GetOfflineSpeechDenoiserConfig(config); SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser; @@ -2361,7 +2367,8 @@ const SherpaOnnxOfflineRecognizer *SherpaOnnxCreateOfflineRecognizerOHOS( return recognizer; } -SherpaOnnxVoiceActivityDetector *SherpaOnnxCreateVoiceActivityDetectorOHOS( +const SherpaOnnxVoiceActivityDetector * +SherpaOnnxCreateVoiceActivityDetectorOHOS( const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds, NativeResourceManager *mgr) { if (mgr == nullptr) { diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 9a40d001..3cd11a79 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -808,15 +808,15 @@ SHERPA_ONNX_API typedef struct SherpaOnnxCircularBuffer // Return an instance of circular buffer. The user has to use // SherpaOnnxDestroyCircularBuffer() to free the returned pointer to avoid // memory leak. -SHERPA_ONNX_API SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer( +SHERPA_ONNX_API const SherpaOnnxCircularBuffer *SherpaOnnxCreateCircularBuffer( int32_t capacity); // Free the pointer returned by SherpaOnnxCreateCircularBuffer() SHERPA_ONNX_API void SherpaOnnxDestroyCircularBuffer( - SherpaOnnxCircularBuffer *buffer); + const SherpaOnnxCircularBuffer *buffer); SHERPA_ONNX_API void SherpaOnnxCircularBufferPush( - SherpaOnnxCircularBuffer *buffer, const float *p, int32_t n); + const SherpaOnnxCircularBuffer *buffer, const float *p, int32_t n); // Return n samples starting at the given index. // @@ -824,27 +824,27 @@ SHERPA_ONNX_API void SherpaOnnxCircularBufferPush( // The user has to use SherpaOnnxCircularBufferFree() to free the returned // pointer to avoid memory leak. SHERPA_ONNX_API const float *SherpaOnnxCircularBufferGet( - SherpaOnnxCircularBuffer *buffer, int32_t start_index, int32_t n); + const SherpaOnnxCircularBuffer *buffer, int32_t start_index, int32_t n); // Free the pointer returned by SherpaOnnxCircularBufferGet(). SHERPA_ONNX_API void SherpaOnnxCircularBufferFree(const float *p); // Remove n elements from the buffer SHERPA_ONNX_API void SherpaOnnxCircularBufferPop( - SherpaOnnxCircularBuffer *buffer, int32_t n); + const SherpaOnnxCircularBuffer *buffer, int32_t n); // Return number of elements in the buffer. SHERPA_ONNX_API int32_t -SherpaOnnxCircularBufferSize(SherpaOnnxCircularBuffer *buffer); +SherpaOnnxCircularBufferSize(const SherpaOnnxCircularBuffer *buffer); // Return the head of the buffer. It's always non-decreasing until you // invoke SherpaOnnxCircularBufferReset() which resets head to 0. SHERPA_ONNX_API int32_t -SherpaOnnxCircularBufferHead(SherpaOnnxCircularBuffer *buffer); +SherpaOnnxCircularBufferHead(const SherpaOnnxCircularBuffer *buffer); // Clear all elements in the buffer SHERPA_ONNX_API void SherpaOnnxCircularBufferReset( - SherpaOnnxCircularBuffer *buffer); + const SherpaOnnxCircularBuffer *buffer); SHERPA_ONNX_API typedef struct SherpaOnnxSpeechSegment { // The start index in samples of this segment @@ -862,40 +862,40 @@ typedef struct SherpaOnnxVoiceActivityDetector SherpaOnnxVoiceActivityDetector; // Return an instance of VoiceActivityDetector. // The user has to use SherpaOnnxDestroyVoiceActivityDetector() to free // the returned pointer to avoid memory leak. -SHERPA_ONNX_API SherpaOnnxVoiceActivityDetector * +SHERPA_ONNX_API const SherpaOnnxVoiceActivityDetector * SherpaOnnxCreateVoiceActivityDetector(const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds); SHERPA_ONNX_API void SherpaOnnxDestroyVoiceActivityDetector( - SherpaOnnxVoiceActivityDetector *p); + const SherpaOnnxVoiceActivityDetector *p); SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorAcceptWaveform( - SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n); + const SherpaOnnxVoiceActivityDetector *p, const float *samples, int32_t n); // Return 1 if there are no speech segments available. // Return 0 if there are speech segments. SHERPA_ONNX_API int32_t -SherpaOnnxVoiceActivityDetectorEmpty(SherpaOnnxVoiceActivityDetector *p); +SherpaOnnxVoiceActivityDetectorEmpty(const SherpaOnnxVoiceActivityDetector *p); // Return 1 if there is voice detected. // Return 0 if voice is silent. -SHERPA_ONNX_API int32_t -SherpaOnnxVoiceActivityDetectorDetected(SherpaOnnxVoiceActivityDetector *p); +SHERPA_ONNX_API int32_t SherpaOnnxVoiceActivityDetectorDetected( + const SherpaOnnxVoiceActivityDetector *p); // Return the first speech segment. // It throws if SherpaOnnxVoiceActivityDetectorEmpty() returns 1. SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorPop( - SherpaOnnxVoiceActivityDetector *p); + const SherpaOnnxVoiceActivityDetector *p); // Clear current speech segments. SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorClear( - SherpaOnnxVoiceActivityDetector *p); + const SherpaOnnxVoiceActivityDetector *p); // Return the first speech segment. // The user has to use SherpaOnnxDestroySpeechSegment() to free the returned // pointer to avoid memory leak. SHERPA_ONNX_API const SherpaOnnxSpeechSegment * -SherpaOnnxVoiceActivityDetectorFront(SherpaOnnxVoiceActivityDetector *p); +SherpaOnnxVoiceActivityDetectorFront(const SherpaOnnxVoiceActivityDetector *p); // Free the pointer returned SherpaOnnxVoiceActivityDetectorFront(). SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment( @@ -903,10 +903,10 @@ SHERPA_ONNX_API void SherpaOnnxDestroySpeechSegment( // Re-initialize the voice activity detector. SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorReset( - SherpaOnnxVoiceActivityDetector *p); + const SherpaOnnxVoiceActivityDetector *p); SHERPA_ONNX_API void SherpaOnnxVoiceActivityDetectorFlush( - SherpaOnnxVoiceActivityDetector *p); + const SherpaOnnxVoiceActivityDetector *p); // ============================================================ // For offline Text-to-Speech (i.e., non-streaming TTS) @@ -1481,15 +1481,16 @@ SHERPA_ONNX_API typedef struct SherpaOnnxLinearResampler */ // The user has to invoke SherpaOnnxDestroyLinearResampler() // to free the returned pointer to avoid memory leak -SHERPA_ONNX_API SherpaOnnxLinearResampler *SherpaOnnxCreateLinearResampler( - int32_t samp_rate_in_hz, int32_t samp_rate_out_hz, float filter_cutoff_hz, - int32_t num_zeros); +SHERPA_ONNX_API const SherpaOnnxLinearResampler * +SherpaOnnxCreateLinearResampler(int32_t samp_rate_in_hz, + int32_t samp_rate_out_hz, + float filter_cutoff_hz, int32_t num_zeros); SHERPA_ONNX_API void SherpaOnnxDestroyLinearResampler( - SherpaOnnxLinearResampler *p); + const SherpaOnnxLinearResampler *p); SHERPA_ONNX_API void SherpaOnnxLinearResamplerReset( - SherpaOnnxLinearResampler *p); + const SherpaOnnxLinearResampler *p); typedef struct SherpaOnnxResampleOut { const float *samples; @@ -1501,7 +1502,7 @@ typedef struct SherpaOnnxResampleOut { // If this is the last segment, you can set flush to 1; otherwise, please // set flush to 0 SHERPA_ONNX_API const SherpaOnnxResampleOut *SherpaOnnxLinearResamplerResample( - SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, + const SherpaOnnxLinearResampler *p, const float *input, int32_t input_dim, int32_t flush); SHERPA_ONNX_API void SherpaOnnxLinearResamplerResampleFree( @@ -1724,7 +1725,7 @@ SherpaOnnxCreateOfflineRecognizerOHOS( // Return an instance of VoiceActivityDetector. // The user has to use SherpaOnnxDestroyVoiceActivityDetector() to free // the returned pointer to avoid memory leak. -SHERPA_ONNX_API SherpaOnnxVoiceActivityDetector * +SHERPA_ONNX_API const SherpaOnnxVoiceActivityDetector * SherpaOnnxCreateVoiceActivityDetectorOHOS( const SherpaOnnxVadModelConfig *config, float buffer_size_in_seconds, NativeResourceManager *mgr);