From c5dbf1177c57b9cba690fcbaa0c26065c00fe56f Mon Sep 17 00:00:00 2001 From: Fangjun Kuang Date: Tue, 11 Mar 2025 15:50:04 +0800 Subject: [PATCH] Add C API for speech enhancement GTCRN models (#1984) --- .github/workflows/c-api.yaml | 34 ++++++++ c-api-examples/CMakeLists.txt | 3 + .../speech-enhancement-gtcrn-c-api.c | 55 ++++++++++++ sherpa-onnx/c-api/c-api.cc | 85 +++++++++++++++++++ sherpa-onnx/c-api/c-api.h | 61 +++++++++++++ .../csrc/offline-speech-denoiser-gtcrn-impl.h | 1 - 6 files changed, 238 insertions(+), 1 deletion(-) create mode 100644 c-api-examples/speech-enhancement-gtcrn-c-api.c diff --git a/.github/workflows/c-api.yaml b/.github/workflows/c-api.yaml index 3f1c9f84..ff207e20 100644 --- a/.github/workflows/c-api.yaml +++ b/.github/workflows/c-api.yaml @@ -79,6 +79,40 @@ jobs: otool -L ./install/lib/libsherpa-onnx-c-api.dylib fi + - name: Test speech enhancement (GTCRN) + shell: bash + run: | + name=speech-enhancement-gtcrn-c-api + gcc -o $name ./c-api-examples/$name.c \ + -I ./build/install/include \ + -L ./build/install/lib/ \ + -l sherpa-onnx-c-api \ + -l onnxruntime + + ls -lh $name + + if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then + ldd ./$name + echo "----" + readelf -d ./$name + fi + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx + curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav + + export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH + export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH + + ./$name + rm -fv *.onnx + mkdir denoised-wavs + cp -v inp_16k.wav denoised-wavs + cp -v enhanced_16k.wav denoised-wavs + + - uses: actions/upload-artifact@v4 + with: + name: denoised-wavs-${{ matrix.os }} + path: ./denoised-wavs/*.wav + - name: Test FireRedAsr shell: bash run: | diff --git a/c-api-examples/CMakeLists.txt b/c-api-examples/CMakeLists.txt index 724977a8..f11a4ec9 100644 --- a/c-api-examples/CMakeLists.txt +++ b/c-api-examples/CMakeLists.txt @@ -7,6 +7,9 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs) add_executable(kws-c-api kws-c-api.c) target_link_libraries(kws-c-api sherpa-onnx-c-api) +add_executable(speech-enhancement-gtcrn-c-api speech-enhancement-gtcrn-c-api.c) +target_link_libraries(speech-enhancement-gtcrn-c-api sherpa-onnx-c-api) + if(SHERPA_ONNX_ENABLE_TTS) add_executable(offline-tts-c-api offline-tts-c-api.c) target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs) diff --git a/c-api-examples/speech-enhancement-gtcrn-c-api.c b/c-api-examples/speech-enhancement-gtcrn-c-api.c new file mode 100644 index 00000000..a0d482d2 --- /dev/null +++ b/c-api-examples/speech-enhancement-gtcrn-c-api.c @@ -0,0 +1,55 @@ +// c-api-examples/speech-enhancement-gtcrn-c-api.c +// +// Copyright (c) 2025 Xiaomi Corporation +// +// We assume you have pre-downloaded model +// from +// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models +// +// +// An example command to download +// clang-format off +/* +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx +wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav +*/ +// clang-format on +#include +#include + +#include "sherpa-onnx/c-api/c-api.h" + +int32_t main() { + SherpaOnnxOfflineSpeechDenoiserConfig config; + const char *wav_filename = "./inp_16k.wav"; + const char *out_wave_filename = "./enhanced_16k.wav"; + + memset(&config, 0, sizeof(config)); + config.model.gtcrn.model = "./gtcrn_simple.onnx"; + + const SherpaOnnxOfflineSpeechDenoiser *sd = + SherpaOnnxCreateOfflineSpeechDenoiser(&config); + if (!sd) { + fprintf(stderr, "Please check your config"); + return -1; + } + + const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename); + if (wave == NULL) { + SherpaOnnxDestroyOfflineSpeechDenoiser(sd); + fprintf(stderr, "Failed to read %s\n", wav_filename); + return -1; + } + + const SherpaOnnxDenoisedAudio *denoised = SherpaOnnxOfflineSpeechDenoiserRun( + sd, wave->samples, wave->num_samples, wave->sample_rate); + + SherpaOnnxWriteWave(denoised->samples, denoised->n, denoised->sample_rate, + out_wave_filename); + + SherpaOnnxDestroyDenoisedAudio(denoised); + SherpaOnnxFreeWave(wave); + SherpaOnnxDestroyOfflineSpeechDenoiser(sd); + + fprintf(stdout, "Saved to %s\n", out_wave_filename); +} diff --git a/sherpa-onnx/c-api/c-api.cc b/sherpa-onnx/c-api/c-api.cc index 0dd7fe0c..5dc60802 100644 --- a/sherpa-onnx/c-api/c-api.cc +++ b/sherpa-onnx/c-api/c-api.cc @@ -24,6 +24,7 @@ #include "sherpa-onnx/csrc/macros.h" #include "sherpa-onnx/csrc/offline-punctuation.h" #include "sherpa-onnx/csrc/offline-recognizer.h" +#include "sherpa-onnx/csrc/offline-speech-denoiser.h" #include "sherpa-onnx/csrc/online-punctuation.h" #include "sherpa-onnx/csrc/online-recognizer.h" #include "sherpa-onnx/csrc/resample.h" @@ -1967,6 +1968,77 @@ int32_t SherpaOnnxFileExists(const char *filename) { return sherpa_onnx::FileExists(filename); } +struct SherpaOnnxOfflineSpeechDenoiser { + std::unique_ptr impl; +}; + +static sherpa_onnx::OfflineSpeechDenoiserConfig GetOfflineSpeechDenoiserConfig( + const SherpaOnnxOfflineSpeechDenoiserConfig *config) { + sherpa_onnx::OfflineSpeechDenoiserConfig c; + c.model.gtcrn.model = SHERPA_ONNX_OR(config->model.gtcrn.model, ""); + c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1); + c.model.debug = config->model.debug; + c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu"); + + if (c.model.debug) { +#if __OHOS__ + SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str()); +#else + SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str()); +#endif + } + + return c; +} + +const SherpaOnnxOfflineSpeechDenoiser *SherpaOnnxCreateOfflineSpeechDenoiser( + const SherpaOnnxOfflineSpeechDenoiserConfig *config) { + auto sd_config = GetOfflineSpeechDenoiserConfig(config); + + if (!sd_config.Validate()) { + SHERPA_ONNX_LOGE("Errors in config"); + return nullptr; + } + + SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser; + + sd->impl = std::make_unique(sd_config); + + return sd; +} + +void SherpaOnnxDestroyOfflineSpeechDenoiser( + const SherpaOnnxOfflineSpeechDenoiser *sd) { + delete sd; +} + +int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate( + const SherpaOnnxOfflineSpeechDenoiser *sd) { + return sd->impl->GetSampleRate(); +} + +const SherpaOnnxDenoisedAudio *SherpaOnnxOfflineSpeechDenoiserRun( + const SherpaOnnxOfflineSpeechDenoiser *sd, const float *samples, int32_t n, + int32_t sample_rate) { + auto audio = sd->impl->Run(samples, n, sample_rate); + + auto ans = new SherpaOnnxDenoisedAudio; + + float *denoised_samples = new float[audio.samples.size()]; + std::copy(audio.samples.begin(), audio.samples.end(), denoised_samples); + + ans->samples = denoised_samples; + ans->n = audio.samples.size(); + ans->sample_rate = audio.sample_rate; + + return ans; +} + +void SherpaOnnxDestroyDenoisedAudio(const SherpaOnnxDenoisedAudio *p) { + delete[] p->samples; + delete p; +} + #if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1 struct SherpaOnnxOfflineSpeakerDiarization { @@ -2244,6 +2316,19 @@ void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( #ifdef __OHOS__ +const SherpaOnnxOfflineSpeechDenoiser * +SherpaOnnxCreateOfflineSpeechDenoiserOHOS( + const SherpaOnnxOfflineSpeechDenoiserConfig *config, + NativeResourceManager *mgr) { + auto sd_config = GetOfflineSpeechDenoiserConfia(config); + + SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser; + + sd->impl = std::make_unique(sd_config); + + return sd; +} + const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS( const SherpaOnnxOnlineRecognizerConfig *config, NativeResourceManager *mgr) { diff --git a/sherpa-onnx/c-api/c-api.h b/sherpa-onnx/c-api/c-api.h index 01177f6f..9a40d001 100644 --- a/sherpa-onnx/c-api/c-api.h +++ b/sherpa-onnx/c-api/c-api.h @@ -1639,11 +1639,72 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg( SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult( const SherpaOnnxOfflineSpeakerDiarizationResult *r); +// ========================================================================= +// For offline speech enhancement +// ========================================================================= +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig { + const char *model; +} SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserModelConfig { + SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn; + int32_t num_threads; + int32_t debug; // true to print debug information of the model + const char *provider; +} SherpaOnnxOfflineSpeechDenoiserModelConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserConfig { + SherpaOnnxOfflineSpeechDenoiserModelConfig model; +} SherpaOnnxOfflineSpeechDenoiserConfig; + +SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiser + SherpaOnnxOfflineSpeechDenoiser; + +// The users has to invoke SherpaOnnxDestroyOfflineSpeechDenoiser() +// to free the returned pointer to avoid memory leak +SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser * +SherpaOnnxCreateOfflineSpeechDenoiser( + const SherpaOnnxOfflineSpeechDenoiserConfig *config); + +// Free the pointer returned by SherpaOnnxCreateOfflineSpeechDenoiser() +SHERPA_ONNX_API void SherpaOnnxDestroyOfflineSpeechDenoiser( + const SherpaOnnxOfflineSpeechDenoiser *sd); + +SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate( + const SherpaOnnxOfflineSpeechDenoiser *sd); + +SHERPA_ONNX_API typedef struct SherpaOnnxDenoisedAudio { + const float *samples; // in the range [-1, 1] + int32_t n; // number of samples + int32_t sample_rate; +} SherpaOnnxDenoisedAudio; + +// Run speech denosing on input samples +// @param samples A 1-D array containing the input audio samples. Each sample +// should be in the range [-1, 1]. +// @param n Number of samples +// @param sample_rate Sample rate of the input samples +// +// The user MUST use SherpaOnnxDestroyDenoisedAudio() to free the returned +// pointer to avoid memory leak. +SHERPA_ONNX_API const SherpaOnnxDenoisedAudio * +SherpaOnnxOfflineSpeechDenoiserRun(const SherpaOnnxOfflineSpeechDenoiser *sd, + const float *samples, int32_t n, + int32_t sample_rate); + +SHERPA_ONNX_API void SherpaOnnxDestroyDenoisedAudio( + const SherpaOnnxDenoisedAudio *p); + #ifdef __OHOS__ // It is for HarmonyOS typedef struct NativeResourceManager NativeResourceManager; +SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser * +SherpaOnnxCreateOfflineSpeechDenoiserOHOS( + const SherpaOnnxOfflineSpeechDenoiserConfig *config, + NativeResourceManager *mgr); + /// @param config Config for the recognizer. /// @return Return a pointer to the recognizer. The user has to invoke // SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak. diff --git a/sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-impl.h b/sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-impl.h index dcd959a6..56ddbecc 100644 --- a/sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-impl.h +++ b/sherpa-onnx/csrc/offline-speech-denoiser-gtcrn-impl.h @@ -33,7 +33,6 @@ class OfflineSpeechDenoiserGtcrnImpl : public OfflineSpeechDenoiserImpl { DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate) const override { - SHERPA_ONNX_LOGE("n: %d, sample_rate: %d", n, sample_rate); const auto &meta = model_.GetMetaData(); std::vector tmp;