Add C++ runtime for Tele-AI/TeleSpeech-ASR (#970)

This commit is contained in:
Fangjun Kuang
2024-06-05 00:26:40 +08:00
committed by GitHub
parent f8dbc10146
commit fd5a0d1e00
52 changed files with 1052 additions and 145 deletions

View File

@@ -56,22 +56,11 @@ std::string FeatureExtractorConfig::ToString() const {
class FeatureExtractor::Impl {
public:
explicit Impl(const FeatureExtractorConfig &config) : config_(config) {
opts_.frame_opts.dither = config.dither;
opts_.frame_opts.snip_edges = config.snip_edges;
opts_.frame_opts.samp_freq = config.sampling_rate;
opts_.frame_opts.frame_shift_ms = config.frame_shift_ms;
opts_.frame_opts.frame_length_ms = config.frame_length_ms;
opts_.frame_opts.remove_dc_offset = config.remove_dc_offset;
opts_.frame_opts.window_type = config.window_type;
opts_.mel_opts.num_bins = config.feature_dim;
opts_.mel_opts.high_freq = config.high_freq;
opts_.mel_opts.low_freq = config.low_freq;
opts_.mel_opts.is_librosa = config.is_librosa;
fbank_ = std::make_unique<knf::OnlineFbank>(opts_);
if (config_.is_mfcc) {
InitMfcc();
} else {
InitFbank();
}
}
void AcceptWaveform(int32_t sampling_rate, const float *waveform, int32_t n) {
@@ -101,35 +90,48 @@ class FeatureExtractor::Impl {
std::vector<float> samples;
resampler_->Resample(waveform, n, false, &samples);
fbank_->AcceptWaveform(opts_.frame_opts.samp_freq, samples.data(),
samples.size());
if (fbank_) {
fbank_->AcceptWaveform(config_.sampling_rate, samples.data(),
samples.size());
} else {
mfcc_->AcceptWaveform(config_.sampling_rate, samples.data(),
samples.size());
}
return;
}
if (sampling_rate != opts_.frame_opts.samp_freq) {
if (sampling_rate != config_.sampling_rate) {
SHERPA_ONNX_LOGE(
"Creating a resampler:\n"
" in_sample_rate: %d\n"
" output_sample_rate: %d\n",
sampling_rate, static_cast<int32_t>(opts_.frame_opts.samp_freq));
sampling_rate, static_cast<int32_t>(config_.sampling_rate));
float min_freq =
std::min<int32_t>(sampling_rate, opts_.frame_opts.samp_freq);
float min_freq = std::min<int32_t>(sampling_rate, config_.sampling_rate);
float lowpass_cutoff = 0.99 * 0.5 * min_freq;
int32_t lowpass_filter_width = 6;
resampler_ = std::make_unique<LinearResample>(
sampling_rate, opts_.frame_opts.samp_freq, lowpass_cutoff,
sampling_rate, config_.sampling_rate, lowpass_cutoff,
lowpass_filter_width);
std::vector<float> samples;
resampler_->Resample(waveform, n, false, &samples);
fbank_->AcceptWaveform(opts_.frame_opts.samp_freq, samples.data(),
samples.size());
if (fbank_) {
fbank_->AcceptWaveform(config_.sampling_rate, samples.data(),
samples.size());
} else {
mfcc_->AcceptWaveform(config_.sampling_rate, samples.data(),
samples.size());
}
return;
}
fbank_->AcceptWaveform(sampling_rate, waveform, n);
if (fbank_) {
fbank_->AcceptWaveform(sampling_rate, waveform, n);
} else {
mfcc_->AcceptWaveform(sampling_rate, waveform, n);
}
}
void InputFinished() const {
@@ -179,11 +181,56 @@ class FeatureExtractor::Impl {
return features;
}
int32_t FeatureDim() const { return opts_.mel_opts.num_bins; }
int32_t FeatureDim() const {
return mfcc_ ? mfcc_opts_.num_ceps : opts_.mel_opts.num_bins;
}
private:
void InitFbank() {
opts_.frame_opts.dither = config_.dither;
opts_.frame_opts.snip_edges = config_.snip_edges;
opts_.frame_opts.samp_freq = config_.sampling_rate;
opts_.frame_opts.frame_shift_ms = config_.frame_shift_ms;
opts_.frame_opts.frame_length_ms = config_.frame_length_ms;
opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset;
opts_.frame_opts.window_type = config_.window_type;
opts_.mel_opts.num_bins = config_.feature_dim;
opts_.mel_opts.high_freq = config_.high_freq;
opts_.mel_opts.low_freq = config_.low_freq;
opts_.mel_opts.is_librosa = config_.is_librosa;
fbank_ = std::make_unique<knf::OnlineFbank>(opts_);
}
void InitMfcc() {
mfcc_opts_.frame_opts.dither = config_.dither;
mfcc_opts_.frame_opts.snip_edges = config_.snip_edges;
mfcc_opts_.frame_opts.samp_freq = config_.sampling_rate;
mfcc_opts_.frame_opts.frame_shift_ms = config_.frame_shift_ms;
mfcc_opts_.frame_opts.frame_length_ms = config_.frame_length_ms;
mfcc_opts_.frame_opts.remove_dc_offset = config_.remove_dc_offset;
mfcc_opts_.frame_opts.window_type = config_.window_type;
mfcc_opts_.mel_opts.num_bins = config_.feature_dim;
mfcc_opts_.mel_opts.high_freq = config_.high_freq;
mfcc_opts_.mel_opts.low_freq = config_.low_freq;
mfcc_opts_.mel_opts.is_librosa = config_.is_librosa;
mfcc_opts_.num_ceps = config_.num_ceps;
mfcc_opts_.use_energy = config_.use_energy;
mfcc_ = std::make_unique<knf::OnlineMfcc>(mfcc_opts_);
}
private:
std::unique_ptr<knf::OnlineFbank> fbank_;
std::unique_ptr<knf::OnlineMfcc> mfcc_;
knf::FbankOptions opts_;
knf::MfccOptions mfcc_opts_;
FeatureExtractorConfig config_;
mutable std::mutex mutex_;
std::unique_ptr<LinearResample> resampler_;