Add C++ runtime for speaker verification models from NeMo (#527)

2024-01-13 21:42:09 +08:00
parent 68a525a024
commit 2024e96639
20 changed files with 405 additions and 24 deletions
--- a/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h
+++ b/sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h
@@ -0,0 +1,28 @@
+// sherpa-onnx/csrc/speaker-embedding-extractor-nemo-model-meta-data.h
+//
+// Copyright (c)  2024  Xiaomi Corporation
+#ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_META_DATA_H_
+#define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_META_DATA_H_
+
+#include <cstdint>
+#include <string>
+
+namespace sherpa_onnx {
+
+struct SpeakerEmbeddingExtractorNeMoModelMetaData {
+  int32_t output_dim = 0;
+  int32_t feat_dim = 80;
+  int32_t sample_rate = 0;
+  int32_t window_size_ms = 25;
+  int32_t window_stride_ms = 25;
+
+  // Chinese, English, etc.
+  std::string language;
+
+  // for 3d-speaker, it is global-mean
+  std::string feature_normalize_type;
+  std::string window_type = "hann";
+};
+
+}  // namespace sherpa_onnx
+#endif  // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_NEMO_MODEL_META_DATA_H_