Add C API for speech enhancement GTCRN models (#1984)
This commit is contained in:
34
.github/workflows/c-api.yaml
vendored
34
.github/workflows/c-api.yaml
vendored
@@ -79,6 +79,40 @@ jobs:
|
|||||||
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
|
otool -L ./install/lib/libsherpa-onnx-c-api.dylib
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Test speech enhancement (GTCRN)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
name=speech-enhancement-gtcrn-c-api
|
||||||
|
gcc -o $name ./c-api-examples/$name.c \
|
||||||
|
-I ./build/install/include \
|
||||||
|
-L ./build/install/lib/ \
|
||||||
|
-l sherpa-onnx-c-api \
|
||||||
|
-l onnxruntime
|
||||||
|
|
||||||
|
ls -lh $name
|
||||||
|
|
||||||
|
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
|
||||||
|
ldd ./$name
|
||||||
|
echo "----"
|
||||||
|
readelf -d ./$name
|
||||||
|
fi
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
./$name
|
||||||
|
rm -fv *.onnx
|
||||||
|
mkdir denoised-wavs
|
||||||
|
cp -v inp_16k.wav denoised-wavs
|
||||||
|
cp -v enhanced_16k.wav denoised-wavs
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: denoised-wavs-${{ matrix.os }}
|
||||||
|
path: ./denoised-wavs/*.wav
|
||||||
|
|
||||||
- name: Test FireRedAsr
|
- name: Test FireRedAsr
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ target_link_libraries(decode-file-c-api sherpa-onnx-c-api cargs)
|
|||||||
add_executable(kws-c-api kws-c-api.c)
|
add_executable(kws-c-api kws-c-api.c)
|
||||||
target_link_libraries(kws-c-api sherpa-onnx-c-api)
|
target_link_libraries(kws-c-api sherpa-onnx-c-api)
|
||||||
|
|
||||||
|
add_executable(speech-enhancement-gtcrn-c-api speech-enhancement-gtcrn-c-api.c)
|
||||||
|
target_link_libraries(speech-enhancement-gtcrn-c-api sherpa-onnx-c-api)
|
||||||
|
|
||||||
if(SHERPA_ONNX_ENABLE_TTS)
|
if(SHERPA_ONNX_ENABLE_TTS)
|
||||||
add_executable(offline-tts-c-api offline-tts-c-api.c)
|
add_executable(offline-tts-c-api offline-tts-c-api.c)
|
||||||
target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
|
target_link_libraries(offline-tts-c-api sherpa-onnx-c-api cargs)
|
||||||
|
|||||||
55
c-api-examples/speech-enhancement-gtcrn-c-api.c
Normal file
55
c-api-examples/speech-enhancement-gtcrn-c-api.c
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
// c-api-examples/speech-enhancement-gtcrn-c-api.c
|
||||||
|
//
|
||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
//
|
||||||
|
// We assume you have pre-downloaded model
|
||||||
|
// from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// An example command to download
|
||||||
|
// clang-format off
|
||||||
|
/*
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||||
|
*/
|
||||||
|
// clang-format on
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#include "sherpa-onnx/c-api/c-api.h"
|
||||||
|
|
||||||
|
int32_t main() {
|
||||||
|
SherpaOnnxOfflineSpeechDenoiserConfig config;
|
||||||
|
const char *wav_filename = "./inp_16k.wav";
|
||||||
|
const char *out_wave_filename = "./enhanced_16k.wav";
|
||||||
|
|
||||||
|
memset(&config, 0, sizeof(config));
|
||||||
|
config.model.gtcrn.model = "./gtcrn_simple.onnx";
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *sd =
|
||||||
|
SherpaOnnxCreateOfflineSpeechDenoiser(&config);
|
||||||
|
if (!sd) {
|
||||||
|
fprintf(stderr, "Please check your config");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SherpaOnnxWave *wave = SherpaOnnxReadWave(wav_filename);
|
||||||
|
if (wave == NULL) {
|
||||||
|
SherpaOnnxDestroyOfflineSpeechDenoiser(sd);
|
||||||
|
fprintf(stderr, "Failed to read %s\n", wav_filename);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SherpaOnnxDenoisedAudio *denoised = SherpaOnnxOfflineSpeechDenoiserRun(
|
||||||
|
sd, wave->samples, wave->num_samples, wave->sample_rate);
|
||||||
|
|
||||||
|
SherpaOnnxWriteWave(denoised->samples, denoised->n, denoised->sample_rate,
|
||||||
|
out_wave_filename);
|
||||||
|
|
||||||
|
SherpaOnnxDestroyDenoisedAudio(denoised);
|
||||||
|
SherpaOnnxFreeWave(wave);
|
||||||
|
SherpaOnnxDestroyOfflineSpeechDenoiser(sd);
|
||||||
|
|
||||||
|
fprintf(stdout, "Saved to %s\n", out_wave_filename);
|
||||||
|
}
|
||||||
@@ -24,6 +24,7 @@
|
|||||||
#include "sherpa-onnx/csrc/macros.h"
|
#include "sherpa-onnx/csrc/macros.h"
|
||||||
#include "sherpa-onnx/csrc/offline-punctuation.h"
|
#include "sherpa-onnx/csrc/offline-punctuation.h"
|
||||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||||
|
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
|
||||||
#include "sherpa-onnx/csrc/online-punctuation.h"
|
#include "sherpa-onnx/csrc/online-punctuation.h"
|
||||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||||
#include "sherpa-onnx/csrc/resample.h"
|
#include "sherpa-onnx/csrc/resample.h"
|
||||||
@@ -1967,6 +1968,77 @@ int32_t SherpaOnnxFileExists(const char *filename) {
|
|||||||
return sherpa_onnx::FileExists(filename);
|
return sherpa_onnx::FileExists(filename);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct SherpaOnnxOfflineSpeechDenoiser {
|
||||||
|
std::unique_ptr<sherpa_onnx::OfflineSpeechDenoiser> impl;
|
||||||
|
};
|
||||||
|
|
||||||
|
static sherpa_onnx::OfflineSpeechDenoiserConfig GetOfflineSpeechDenoiserConfig(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiserConfig *config) {
|
||||||
|
sherpa_onnx::OfflineSpeechDenoiserConfig c;
|
||||||
|
c.model.gtcrn.model = SHERPA_ONNX_OR(config->model.gtcrn.model, "");
|
||||||
|
c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||||
|
c.model.debug = config->model.debug;
|
||||||
|
c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||||
|
|
||||||
|
if (c.model.debug) {
|
||||||
|
#if __OHOS__
|
||||||
|
SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
|
||||||
|
#else
|
||||||
|
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *SherpaOnnxCreateOfflineSpeechDenoiser(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiserConfig *config) {
|
||||||
|
auto sd_config = GetOfflineSpeechDenoiserConfig(config);
|
||||||
|
|
||||||
|
if (!sd_config.Validate()) {
|
||||||
|
SHERPA_ONNX_LOGE("Errors in config");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser;
|
||||||
|
|
||||||
|
sd->impl = std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(sd_config);
|
||||||
|
|
||||||
|
return sd;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxDestroyOfflineSpeechDenoiser(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *sd) {
|
||||||
|
delete sd;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *sd) {
|
||||||
|
return sd->impl->GetSampleRate();
|
||||||
|
}
|
||||||
|
|
||||||
|
const SherpaOnnxDenoisedAudio *SherpaOnnxOfflineSpeechDenoiserRun(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *sd, const float *samples, int32_t n,
|
||||||
|
int32_t sample_rate) {
|
||||||
|
auto audio = sd->impl->Run(samples, n, sample_rate);
|
||||||
|
|
||||||
|
auto ans = new SherpaOnnxDenoisedAudio;
|
||||||
|
|
||||||
|
float *denoised_samples = new float[audio.samples.size()];
|
||||||
|
std::copy(audio.samples.begin(), audio.samples.end(), denoised_samples);
|
||||||
|
|
||||||
|
ans->samples = denoised_samples;
|
||||||
|
ans->n = audio.samples.size();
|
||||||
|
ans->sample_rate = audio.sample_rate;
|
||||||
|
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SherpaOnnxDestroyDenoisedAudio(const SherpaOnnxDenoisedAudio *p) {
|
||||||
|
delete[] p->samples;
|
||||||
|
delete p;
|
||||||
|
}
|
||||||
|
|
||||||
#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
|
#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
|
||||||
|
|
||||||
struct SherpaOnnxOfflineSpeakerDiarization {
|
struct SherpaOnnxOfflineSpeakerDiarization {
|
||||||
@@ -2244,6 +2316,19 @@ void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
|
|||||||
|
|
||||||
#ifdef __OHOS__
|
#ifdef __OHOS__
|
||||||
|
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *
|
||||||
|
SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiserConfig *config,
|
||||||
|
NativeResourceManager *mgr) {
|
||||||
|
auto sd_config = GetOfflineSpeechDenoiserConfia(config);
|
||||||
|
|
||||||
|
SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser;
|
||||||
|
|
||||||
|
sd->impl = std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(sd_config);
|
||||||
|
|
||||||
|
return sd;
|
||||||
|
}
|
||||||
|
|
||||||
const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS(
|
const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS(
|
||||||
const SherpaOnnxOnlineRecognizerConfig *config,
|
const SherpaOnnxOnlineRecognizerConfig *config,
|
||||||
NativeResourceManager *mgr) {
|
NativeResourceManager *mgr) {
|
||||||
|
|||||||
@@ -1639,11 +1639,72 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
|
|||||||
SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
|
SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
|
||||||
const SherpaOnnxOfflineSpeakerDiarizationResult *r);
|
const SherpaOnnxOfflineSpeakerDiarizationResult *r);
|
||||||
|
|
||||||
|
// =========================================================================
|
||||||
|
// For offline speech enhancement
|
||||||
|
// =========================================================================
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig {
|
||||||
|
const char *model;
|
||||||
|
} SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserModelConfig {
|
||||||
|
SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn;
|
||||||
|
int32_t num_threads;
|
||||||
|
int32_t debug; // true to print debug information of the model
|
||||||
|
const char *provider;
|
||||||
|
} SherpaOnnxOfflineSpeechDenoiserModelConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserConfig {
|
||||||
|
SherpaOnnxOfflineSpeechDenoiserModelConfig model;
|
||||||
|
} SherpaOnnxOfflineSpeechDenoiserConfig;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiser
|
||||||
|
SherpaOnnxOfflineSpeechDenoiser;
|
||||||
|
|
||||||
|
// The users has to invoke SherpaOnnxDestroyOfflineSpeechDenoiser()
|
||||||
|
// to free the returned pointer to avoid memory leak
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser *
|
||||||
|
SherpaOnnxCreateOfflineSpeechDenoiser(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiserConfig *config);
|
||||||
|
|
||||||
|
// Free the pointer returned by SherpaOnnxCreateOfflineSpeechDenoiser()
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineSpeechDenoiser(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *sd);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *sd);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API typedef struct SherpaOnnxDenoisedAudio {
|
||||||
|
const float *samples; // in the range [-1, 1]
|
||||||
|
int32_t n; // number of samples
|
||||||
|
int32_t sample_rate;
|
||||||
|
} SherpaOnnxDenoisedAudio;
|
||||||
|
|
||||||
|
// Run speech denosing on input samples
|
||||||
|
// @param samples A 1-D array containing the input audio samples. Each sample
|
||||||
|
// should be in the range [-1, 1].
|
||||||
|
// @param n Number of samples
|
||||||
|
// @param sample_rate Sample rate of the input samples
|
||||||
|
//
|
||||||
|
// The user MUST use SherpaOnnxDestroyDenoisedAudio() to free the returned
|
||||||
|
// pointer to avoid memory leak.
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxDenoisedAudio *
|
||||||
|
SherpaOnnxOfflineSpeechDenoiserRun(const SherpaOnnxOfflineSpeechDenoiser *sd,
|
||||||
|
const float *samples, int32_t n,
|
||||||
|
int32_t sample_rate);
|
||||||
|
|
||||||
|
SHERPA_ONNX_API void SherpaOnnxDestroyDenoisedAudio(
|
||||||
|
const SherpaOnnxDenoisedAudio *p);
|
||||||
|
|
||||||
#ifdef __OHOS__
|
#ifdef __OHOS__
|
||||||
|
|
||||||
// It is for HarmonyOS
|
// It is for HarmonyOS
|
||||||
typedef struct NativeResourceManager NativeResourceManager;
|
typedef struct NativeResourceManager NativeResourceManager;
|
||||||
|
|
||||||
|
SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser *
|
||||||
|
SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiserConfig *config,
|
||||||
|
NativeResourceManager *mgr);
|
||||||
|
|
||||||
/// @param config Config for the recognizer.
|
/// @param config Config for the recognizer.
|
||||||
/// @return Return a pointer to the recognizer. The user has to invoke
|
/// @return Return a pointer to the recognizer. The user has to invoke
|
||||||
// SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak.
|
// SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak.
|
||||||
|
|||||||
@@ -33,7 +33,6 @@ class OfflineSpeechDenoiserGtcrnImpl : public OfflineSpeechDenoiserImpl {
|
|||||||
|
|
||||||
DenoisedAudio Run(const float *samples, int32_t n,
|
DenoisedAudio Run(const float *samples, int32_t n,
|
||||||
int32_t sample_rate) const override {
|
int32_t sample_rate) const override {
|
||||||
SHERPA_ONNX_LOGE("n: %d, sample_rate: %d", n, sample_rate);
|
|
||||||
const auto &meta = model_.GetMetaData();
|
const auto &meta = model_.GetMetaData();
|
||||||
|
|
||||||
std::vector<float> tmp;
|
std::vector<float> tmp;
|
||||||
|
|||||||
Reference in New Issue
Block a user