Add C++ runtime for Tele-AI/TeleSpeech-ASR (#970)

2024-06-05 00:26:40 +08:00
parent f8dbc10146
commit fd5a0d1e00
52 changed files with 1052 additions and 145 deletions
--- a/sherpa-onnx/csrc/features.h
+++ b/sherpa-onnx/csrc/features.h
@@ -18,7 +18,10 @@ struct FeatureExtractorConfig {
  // the sampling rate of the input waveform, we will do resampling inside.
  int32_t sampling_rate = 16000;

-  // Feature dimension
+  // num_mel_bins
+  //
+  // Note: for mfcc, this value is also for num_mel_bins.
+  // The actual feature dimension is actuall num_ceps
  int32_t feature_dim = 80;

  // minimal frequency for Mel-filterbank, in Hz
@@ -69,6 +72,12 @@ struct FeatureExtractorConfig {
  // for details
  std::string nemo_normalize_type;

+  // for MFCC
+  int32_t num_ceps = 13;
+  bool use_energy = true;
+
+  bool is_mfcc = false;
+
  std::string ToString() const;

  void Register(ParseOptions *po);