Add C API for speech enhancement GTCRN models (#1984)
This commit is contained in:
@@ -24,6 +24,7 @@
|
||||
#include "sherpa-onnx/csrc/macros.h"
|
||||
#include "sherpa-onnx/csrc/offline-punctuation.h"
|
||||
#include "sherpa-onnx/csrc/offline-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/offline-speech-denoiser.h"
|
||||
#include "sherpa-onnx/csrc/online-punctuation.h"
|
||||
#include "sherpa-onnx/csrc/online-recognizer.h"
|
||||
#include "sherpa-onnx/csrc/resample.h"
|
||||
@@ -1967,6 +1968,77 @@ int32_t SherpaOnnxFileExists(const char *filename) {
|
||||
return sherpa_onnx::FileExists(filename);
|
||||
}
|
||||
|
||||
struct SherpaOnnxOfflineSpeechDenoiser {
|
||||
std::unique_ptr<sherpa_onnx::OfflineSpeechDenoiser> impl;
|
||||
};
|
||||
|
||||
static sherpa_onnx::OfflineSpeechDenoiserConfig GetOfflineSpeechDenoiserConfig(
|
||||
const SherpaOnnxOfflineSpeechDenoiserConfig *config) {
|
||||
sherpa_onnx::OfflineSpeechDenoiserConfig c;
|
||||
c.model.gtcrn.model = SHERPA_ONNX_OR(config->model.gtcrn.model, "");
|
||||
c.model.num_threads = SHERPA_ONNX_OR(config->model.num_threads, 1);
|
||||
c.model.debug = config->model.debug;
|
||||
c.model.provider = SHERPA_ONNX_OR(config->model.provider, "cpu");
|
||||
|
||||
if (c.model.debug) {
|
||||
#if __OHOS__
|
||||
SHERPA_ONNX_LOGE("%{public}s\n", c.ToString().c_str());
|
||||
#else
|
||||
SHERPA_ONNX_LOGE("%s\n", c.ToString().c_str());
|
||||
#endif
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
const SherpaOnnxOfflineSpeechDenoiser *SherpaOnnxCreateOfflineSpeechDenoiser(
|
||||
const SherpaOnnxOfflineSpeechDenoiserConfig *config) {
|
||||
auto sd_config = GetOfflineSpeechDenoiserConfig(config);
|
||||
|
||||
if (!sd_config.Validate()) {
|
||||
SHERPA_ONNX_LOGE("Errors in config");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser;
|
||||
|
||||
sd->impl = std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(sd_config);
|
||||
|
||||
return sd;
|
||||
}
|
||||
|
||||
void SherpaOnnxDestroyOfflineSpeechDenoiser(
|
||||
const SherpaOnnxOfflineSpeechDenoiser *sd) {
|
||||
delete sd;
|
||||
}
|
||||
|
||||
int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(
|
||||
const SherpaOnnxOfflineSpeechDenoiser *sd) {
|
||||
return sd->impl->GetSampleRate();
|
||||
}
|
||||
|
||||
const SherpaOnnxDenoisedAudio *SherpaOnnxOfflineSpeechDenoiserRun(
|
||||
const SherpaOnnxOfflineSpeechDenoiser *sd, const float *samples, int32_t n,
|
||||
int32_t sample_rate) {
|
||||
auto audio = sd->impl->Run(samples, n, sample_rate);
|
||||
|
||||
auto ans = new SherpaOnnxDenoisedAudio;
|
||||
|
||||
float *denoised_samples = new float[audio.samples.size()];
|
||||
std::copy(audio.samples.begin(), audio.samples.end(), denoised_samples);
|
||||
|
||||
ans->samples = denoised_samples;
|
||||
ans->n = audio.samples.size();
|
||||
ans->sample_rate = audio.sample_rate;
|
||||
|
||||
return ans;
|
||||
}
|
||||
|
||||
void SherpaOnnxDestroyDenoisedAudio(const SherpaOnnxDenoisedAudio *p) {
|
||||
delete[] p->samples;
|
||||
delete p;
|
||||
}
|
||||
|
||||
#if SHERPA_ONNX_ENABLE_SPEAKER_DIARIZATION == 1
|
||||
|
||||
struct SherpaOnnxOfflineSpeakerDiarization {
|
||||
@@ -2244,6 +2316,19 @@ void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
|
||||
|
||||
#ifdef __OHOS__
|
||||
|
||||
const SherpaOnnxOfflineSpeechDenoiser *
|
||||
SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
|
||||
const SherpaOnnxOfflineSpeechDenoiserConfig *config,
|
||||
NativeResourceManager *mgr) {
|
||||
auto sd_config = GetOfflineSpeechDenoiserConfia(config);
|
||||
|
||||
SherpaOnnxOfflineSpeechDenoiser *sd = new SherpaOnnxOfflineSpeechDenoiser;
|
||||
|
||||
sd->impl = std::make_unique<sherpa_onnx::OfflineSpeechDenoiser>(sd_config);
|
||||
|
||||
return sd;
|
||||
}
|
||||
|
||||
const SherpaOnnxOnlineRecognizer *SherpaOnnxCreateOnlineRecognizerOHOS(
|
||||
const SherpaOnnxOnlineRecognizerConfig *config,
|
||||
NativeResourceManager *mgr) {
|
||||
|
||||
@@ -1639,11 +1639,72 @@ SherpaOnnxOfflineSpeakerDiarizationProcessWithCallbackNoArg(
|
||||
SHERPA_ONNX_API void SherpaOnnxOfflineSpeakerDiarizationDestroyResult(
|
||||
const SherpaOnnxOfflineSpeakerDiarizationResult *r);
|
||||
|
||||
// =========================================================================
|
||||
// For offline speech enhancement
|
||||
// =========================================================================
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig {
|
||||
const char *model;
|
||||
} SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserModelConfig {
|
||||
SherpaOnnxOfflineSpeechDenoiserGtcrnModelConfig gtcrn;
|
||||
int32_t num_threads;
|
||||
int32_t debug; // true to print debug information of the model
|
||||
const char *provider;
|
||||
} SherpaOnnxOfflineSpeechDenoiserModelConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiserConfig {
|
||||
SherpaOnnxOfflineSpeechDenoiserModelConfig model;
|
||||
} SherpaOnnxOfflineSpeechDenoiserConfig;
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxOfflineSpeechDenoiser
|
||||
SherpaOnnxOfflineSpeechDenoiser;
|
||||
|
||||
// The users has to invoke SherpaOnnxDestroyOfflineSpeechDenoiser()
|
||||
// to free the returned pointer to avoid memory leak
|
||||
SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser *
|
||||
SherpaOnnxCreateOfflineSpeechDenoiser(
|
||||
const SherpaOnnxOfflineSpeechDenoiserConfig *config);
|
||||
|
||||
// Free the pointer returned by SherpaOnnxCreateOfflineSpeechDenoiser()
|
||||
SHERPA_ONNX_API void SherpaOnnxDestroyOfflineSpeechDenoiser(
|
||||
const SherpaOnnxOfflineSpeechDenoiser *sd);
|
||||
|
||||
SHERPA_ONNX_API int32_t SherpaOnnxOfflineSpeechDenoiserGetSampleRate(
|
||||
const SherpaOnnxOfflineSpeechDenoiser *sd);
|
||||
|
||||
SHERPA_ONNX_API typedef struct SherpaOnnxDenoisedAudio {
|
||||
const float *samples; // in the range [-1, 1]
|
||||
int32_t n; // number of samples
|
||||
int32_t sample_rate;
|
||||
} SherpaOnnxDenoisedAudio;
|
||||
|
||||
// Run speech denosing on input samples
|
||||
// @param samples A 1-D array containing the input audio samples. Each sample
|
||||
// should be in the range [-1, 1].
|
||||
// @param n Number of samples
|
||||
// @param sample_rate Sample rate of the input samples
|
||||
//
|
||||
// The user MUST use SherpaOnnxDestroyDenoisedAudio() to free the returned
|
||||
// pointer to avoid memory leak.
|
||||
SHERPA_ONNX_API const SherpaOnnxDenoisedAudio *
|
||||
SherpaOnnxOfflineSpeechDenoiserRun(const SherpaOnnxOfflineSpeechDenoiser *sd,
|
||||
const float *samples, int32_t n,
|
||||
int32_t sample_rate);
|
||||
|
||||
SHERPA_ONNX_API void SherpaOnnxDestroyDenoisedAudio(
|
||||
const SherpaOnnxDenoisedAudio *p);
|
||||
|
||||
#ifdef __OHOS__
|
||||
|
||||
// It is for HarmonyOS
|
||||
typedef struct NativeResourceManager NativeResourceManager;
|
||||
|
||||
SHERPA_ONNX_API const SherpaOnnxOfflineSpeechDenoiser *
|
||||
SherpaOnnxCreateOfflineSpeechDenoiserOHOS(
|
||||
const SherpaOnnxOfflineSpeechDenoiserConfig *config,
|
||||
NativeResourceManager *mgr);
|
||||
|
||||
/// @param config Config for the recognizer.
|
||||
/// @return Return a pointer to the recognizer. The user has to invoke
|
||||
// SherpaOnnxDestroyOnlineRecognizer() to free it to avoid memory leak.
|
||||
|
||||
Reference in New Issue
Block a user