Add CXX API for speech enhancement GTCRN models (#1986)
This commit is contained in:
2
.github/workflows/c-api.yaml
vendored
2
.github/workflows/c-api.yaml
vendored
@@ -108,6 +108,8 @@ jobs:
|
|||||||
cp -v inp_16k.wav denoised-wavs
|
cp -v inp_16k.wav denoised-wavs
|
||||||
cp -v enhanced_16k.wav denoised-wavs
|
cp -v enhanced_16k.wav denoised-wavs
|
||||||
|
|
||||||
|
rm $name
|
||||||
|
|
||||||
- uses: actions/upload-artifact@v4
|
- uses: actions/upload-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: denoised-wavs-${{ matrix.os }}
|
name: denoised-wavs-${{ matrix.os }}
|
||||||
|
|||||||
38
.github/workflows/cxx-api.yaml
vendored
38
.github/workflows/cxx-api.yaml
vendored
@@ -81,6 +81,44 @@ jobs:
|
|||||||
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
|
otool -L ./install/lib/libsherpa-onnx-cxx-api.dylib
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Test Speech Enhancement (GTCRN)
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
name=speech-enhancement-gtcrn-cxx-api
|
||||||
|
g++ -std=c++17 -o $name ./cxx-api-examples/$name.cc \
|
||||||
|
-I ./build/install/include \
|
||||||
|
-L ./build/install/lib/ \
|
||||||
|
-l sherpa-onnx-cxx-api \
|
||||||
|
-l sherpa-onnx-c-api \
|
||||||
|
-l onnxruntime
|
||||||
|
|
||||||
|
ls -lh $name
|
||||||
|
|
||||||
|
export LD_LIBRARY_PATH=$PWD/build/install/lib:$LD_LIBRARY_PATH
|
||||||
|
export DYLD_LIBRARY_PATH=$PWD/build/install/lib:$DYLD_LIBRARY_PATH
|
||||||
|
|
||||||
|
if [[ ${{ matrix.os }} == ubuntu-latest || ${{ matrix.os }} == ubuntu-22.04-arm ]]; then
|
||||||
|
ldd ./$name
|
||||||
|
echo "----"
|
||||||
|
readelf -d ./$name
|
||||||
|
fi
|
||||||
|
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||||
|
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||||
|
|
||||||
|
./$name
|
||||||
|
|
||||||
|
mkdir denoised-wavs
|
||||||
|
cp -v inp_16k.wav denoised-wavs
|
||||||
|
cp -v enhanced_16k.wav denoised-wavs
|
||||||
|
|
||||||
|
rm $name
|
||||||
|
|
||||||
|
- uses: actions/upload-artifact@v4
|
||||||
|
with:
|
||||||
|
name: denoised-wavs-cxx-${{ matrix.os }}
|
||||||
|
path: ./denoised-wavs/*.wav
|
||||||
|
|
||||||
- name: Test FireRedAsr
|
- name: Test FireRedAsr
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
@@ -3,6 +3,9 @@ include_directories(${CMAKE_SOURCE_DIR})
|
|||||||
add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc)
|
add_executable(streaming-zipformer-cxx-api ./streaming-zipformer-cxx-api.cc)
|
||||||
target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api)
|
target_link_libraries(streaming-zipformer-cxx-api sherpa-onnx-cxx-api)
|
||||||
|
|
||||||
|
add_executable(speech-enhancement-gtcrn-cxx-api ./speech-enhancement-gtcrn-cxx-api.cc)
|
||||||
|
target_link_libraries(speech-enhancement-gtcrn-cxx-api sherpa-onnx-cxx-api)
|
||||||
|
|
||||||
add_executable(kws-cxx-api ./kws-cxx-api.cc)
|
add_executable(kws-cxx-api ./kws-cxx-api.cc)
|
||||||
target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api)
|
target_link_libraries(kws-cxx-api sherpa-onnx-cxx-api)
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// cxx-api-examples/kokoro-tts-zh-en-cxx-api.c
|
// cxx-api-examples/kokoro-tts-zh-en-cxx-api.cc
|
||||||
//
|
//
|
||||||
// Copyright (c) 2025 Xiaomi Corporation
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// cxx-api-examples/matcha-tts-en-cxx-api.c
|
// cxx-api-examples/matcha-tts-en-cxx-api.cc
|
||||||
//
|
//
|
||||||
// Copyright (c) 2025 Xiaomi Corporation
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
// cxx-api-examples/matcha-tts-zh-cxx-api.c
|
// cxx-api-examples/matcha-tts-zh-cxx-api.cc
|
||||||
//
|
//
|
||||||
// Copyright (c) 2025 Xiaomi Corporation
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
|
||||||
|
|||||||
65
cxx-api-examples/speech-enhancement-gtcrn-cxx-api.cc
Normal file
65
cxx-api-examples/speech-enhancement-gtcrn-cxx-api.cc
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
// cxx-api-examples/speech-enhancement-gtcrn-cxx-api.cc
|
||||||
|
//
|
||||||
|
// Copyright (c) 2025 Xiaomi Corporation
|
||||||
|
//
|
||||||
|
// We assume you have pre-downloaded model
|
||||||
|
// from
|
||||||
|
// https://github.com/k2-fsa/sherpa-onnx/releases/tag/speech-enhancement-models
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// An example command to download
|
||||||
|
// clang-format off
|
||||||
|
/*
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/gtcrn_simple.onnx
|
||||||
|
wget https://github.com/k2-fsa/sherpa-onnx/releases/download/speech-enhancement-models/inp_16k.wav
|
||||||
|
*/
|
||||||
|
// clang-format on
|
||||||
|
#include <chrono> // NOLINT
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
#include "sherpa-onnx/c-api/cxx-api.h"
|
||||||
|
|
||||||
|
int32_t main() {
|
||||||
|
using namespace sherpa_onnx::cxx; // NOLINT
|
||||||
|
|
||||||
|
OfflineSpeechDenoiserConfig config;
|
||||||
|
std::string wav_filename = "./inp_16k.wav";
|
||||||
|
std::string out_wave_filename = "./enhanced_16k.wav";
|
||||||
|
|
||||||
|
config.model.gtcrn.model = "./gtcrn_simple.onnx";
|
||||||
|
|
||||||
|
auto sd = OfflineSpeechDenoiser::Create(config);
|
||||||
|
if (!sd.Get()) {
|
||||||
|
std::cerr << "Please check your config\n";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Wave wave = ReadWave(wav_filename);
|
||||||
|
if (wave.samples.empty()) {
|
||||||
|
std::cerr << "Failed to read: '" << wav_filename << "'\n";
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "Started\n";
|
||||||
|
const auto begin = std::chrono::steady_clock::now();
|
||||||
|
auto denoised =
|
||||||
|
sd.Run(wave.samples.data(), wave.samples.size(), wave.sample_rate);
|
||||||
|
const auto end = std::chrono::steady_clock::now();
|
||||||
|
std::cout << "Done\n";
|
||||||
|
|
||||||
|
WriteWave(out_wave_filename, {denoised.samples, denoised.sample_rate});
|
||||||
|
|
||||||
|
const float elapsed_seconds =
|
||||||
|
std::chrono::duration_cast<std::chrono::milliseconds>(end - begin)
|
||||||
|
.count() /
|
||||||
|
1000.;
|
||||||
|
float duration = wave.samples.size() / static_cast<float>(wave.sample_rate);
|
||||||
|
float rtf = elapsed_seconds / duration;
|
||||||
|
|
||||||
|
std::cout << "Saved to " << out_wave_filename << "\n";
|
||||||
|
printf("Duration: %.3fs\n", duration);
|
||||||
|
printf("Elapsed seconds: %.3fs\n", elapsed_seconds);
|
||||||
|
printf("(Real time factor) RTF = %.3f / %.3f = %.3f\n", elapsed_seconds,
|
||||||
|
duration, rtf);
|
||||||
|
}
|
||||||
@@ -513,4 +513,49 @@ void KeywordSpotter::Reset(const OnlineStream *s) const {
|
|||||||
SherpaOnnxResetKeywordStream(p_, s->Get());
|
SherpaOnnxResetKeywordStream(p_, s->Get());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ============================================================
|
||||||
|
// For Offline Speech Enhancement
|
||||||
|
// ============================================================
|
||||||
|
|
||||||
|
OfflineSpeechDenoiser OfflineSpeechDenoiser::Create(
|
||||||
|
const OfflineSpeechDenoiserConfig &config) {
|
||||||
|
struct SherpaOnnxOfflineSpeechDenoiserConfig c;
|
||||||
|
memset(&c, 0, sizeof(c));
|
||||||
|
|
||||||
|
c.model.gtcrn.model = config.model.gtcrn.model.c_str();
|
||||||
|
|
||||||
|
c.model.num_threads = config.model.num_threads;
|
||||||
|
c.model.provider = config.model.provider.c_str();
|
||||||
|
c.model.debug = config.model.debug;
|
||||||
|
|
||||||
|
auto p = SherpaOnnxCreateOfflineSpeechDenoiser(&c);
|
||||||
|
|
||||||
|
return OfflineSpeechDenoiser(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
void OfflineSpeechDenoiser::Destroy(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *p) const {
|
||||||
|
SherpaOnnxDestroyOfflineSpeechDenoiser(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
OfflineSpeechDenoiser::OfflineSpeechDenoiser(
|
||||||
|
const SherpaOnnxOfflineSpeechDenoiser *p)
|
||||||
|
: MoveOnly<OfflineSpeechDenoiser, SherpaOnnxOfflineSpeechDenoiser>(p) {}
|
||||||
|
|
||||||
|
DenoisedAudio OfflineSpeechDenoiser::Run(const float *samples, int32_t n,
|
||||||
|
int32_t sample_rate) const {
|
||||||
|
auto audio = SherpaOnnxOfflineSpeechDenoiserRun(p_, samples, n, sample_rate);
|
||||||
|
|
||||||
|
DenoisedAudio ans;
|
||||||
|
ans.samples = {audio->samples, audio->samples + audio->n};
|
||||||
|
ans.sample_rate = audio->sample_rate;
|
||||||
|
SherpaOnnxDestroyDenoisedAudio(audio);
|
||||||
|
|
||||||
|
return ans;
|
||||||
|
}
|
||||||
|
|
||||||
|
int32_t OfflineSpeechDenoiser::GetSampleRate() const {
|
||||||
|
return SherpaOnnxOfflineSpeechDenoiserGetSampleRate(p_);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace sherpa_onnx::cxx
|
} // namespace sherpa_onnx::cxx
|
||||||
|
|||||||
@@ -464,6 +464,42 @@ class SHERPA_ONNX_API KeywordSpotter
|
|||||||
explicit KeywordSpotter(const SherpaOnnxKeywordSpotter *p);
|
explicit KeywordSpotter(const SherpaOnnxKeywordSpotter *p);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct OfflineSpeechDenoiserGtcrnModelConfig {
|
||||||
|
std::string model;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct OfflineSpeechDenoiserModelConfig {
|
||||||
|
OfflineSpeechDenoiserGtcrnModelConfig gtcrn;
|
||||||
|
int32_t num_threads = 1;
|
||||||
|
int32_t debug = false;
|
||||||
|
std::string provider = "cpu";
|
||||||
|
};
|
||||||
|
|
||||||
|
struct OfflineSpeechDenoiserConfig {
|
||||||
|
OfflineSpeechDenoiserModelConfig model;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct DenoisedAudio {
|
||||||
|
std::vector<float> samples; // in the range [-1, 1]
|
||||||
|
int32_t sample_rate;
|
||||||
|
};
|
||||||
|
|
||||||
|
class SHERPA_ONNX_API OfflineSpeechDenoiser
|
||||||
|
: public MoveOnly<OfflineSpeechDenoiser, SherpaOnnxOfflineSpeechDenoiser> {
|
||||||
|
public:
|
||||||
|
static OfflineSpeechDenoiser Create(
|
||||||
|
const OfflineSpeechDenoiserConfig &config);
|
||||||
|
|
||||||
|
void Destroy(const SherpaOnnxOfflineSpeechDenoiser *p) const;
|
||||||
|
|
||||||
|
DenoisedAudio Run(const float *samples, int32_t n, int32_t sample_rate) const;
|
||||||
|
|
||||||
|
int32_t GetSampleRate() const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
explicit OfflineSpeechDenoiser(const SherpaOnnxOfflineSpeechDenoiser *p);
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace sherpa_onnx::cxx
|
} // namespace sherpa_onnx::cxx
|
||||||
|
|
||||||
#endif // SHERPA_ONNX_C_API_CXX_API_H_
|
#endif // SHERPA_ONNX_C_API_CXX_API_H_
|
||||||
|
|||||||
Reference in New Issue
Block a user