Add C++ runtime for non-streaming faster conformer transducer from NeMo. (#854)

This commit is contained in:
Fangjun Kuang
2024-05-10 12:15:39 +08:00
committed by GitHub
parent 5d8c35e44e
commit 17cd3a5f01
31 changed files with 1093 additions and 153 deletions

View File

@@ -11,6 +11,7 @@
#include <vector>
#include "sherpa-onnx/csrc/context-graph.h"
#include "sherpa-onnx/csrc/features.h"
#include "sherpa-onnx/csrc/parse-options.h"
namespace sherpa_onnx {
@@ -32,46 +33,12 @@ struct OfflineRecognitionResult {
std::string AsJsonString() const;
};
struct OfflineFeatureExtractorConfig {
// Sampling rate used by the feature extractor. If it is different from
// the sampling rate of the input waveform, we will do resampling inside.
int32_t sampling_rate = 16000;
// Feature dimension
int32_t feature_dim = 80;
// Set internally by some models, e.g., paraformer and wenet CTC models set
// it to false.
// This parameter is not exposed to users from the commandline
// If true, the feature extractor expects inputs to be normalized to
// the range [-1, 1].
// If false, we will multiply the inputs by 32768
bool normalize_samples = true;
// For models from NeMo
// This option is not exposed and is set internally when loading models.
// Possible values:
// - per_feature
// - all_features (not implemented yet)
// - fixed_mean (not implemented)
// - fixed_std (not implemented)
// - or just leave it to empty
// See
// https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/asr/parts/preprocessing/features.py#L59
// for details
std::string nemo_normalize_type;
std::string ToString() const;
void Register(ParseOptions *po);
};
struct WhisperTag {};
struct CEDTag {};
class OfflineStream {
public:
explicit OfflineStream(const OfflineFeatureExtractorConfig &config = {},
explicit OfflineStream(const FeatureExtractorConfig &config = {},
ContextGraphPtr context_graph = {});
explicit OfflineStream(WhisperTag tag);