// sherpa-onnx/csrc/speaker-embedding-extractor-wespeaker-impl.h // // Copyright (c) 2023 Xiaomi Corporation #ifndef SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_WESPEAKER_IMPL_H_ #define SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_WESPEAKER_IMPL_H_ #include #include #include #include #include "sherpa-onnx/csrc/speaker-embedding-extractor-impl.h" #include "sherpa-onnx/csrc/speaker-embedding-extractor-wespeaker-model.h" namespace sherpa_onnx { class SpeakerEmbeddingExtractorWeSpeakerImpl : public SpeakerEmbeddingExtractorImpl { public: explicit SpeakerEmbeddingExtractorWeSpeakerImpl( const SpeakerEmbeddingExtractorConfig &config) : model_(config) {} int32_t Dim() const override { return model_.GetMetaData().output_dim; } std::unique_ptr CreateStream() const override { FeatureExtractorConfig feat_config; auto meta_data = model_.GetMetaData(); feat_config.sampling_rate = meta_data.sample_rate; feat_config.normalize_samples = meta_data.normalize_features; return std::make_unique(feat_config); } bool IsReady(OnlineStream *s) const override { return s->GetNumProcessedFrames() < s->NumFramesReady(); } std::vector Compute(OnlineStream *s) const override { int32_t num_frames = s->NumFramesReady() - s->GetNumProcessedFrames(); if (num_frames <= 0) { SHERPA_ONNX_LOGE( "Please make sure IsReady(s) returns true. num_frames: %d", num_frames); return {}; } std::vector features = s->GetFrames(s->GetNumProcessedFrames(), num_frames); s->GetNumProcessedFrames() += num_frames; int32_t feat_dim = features.size() / num_frames; auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault); std::array x_shape{1, num_frames, feat_dim}; Ort::Value x = Ort::Value::CreateTensor(memory_info, features.data(), features.size(), x_shape.data(), x_shape.size()); Ort::Value embedding = model_.Compute(std::move(x)); std::vector embedding_shape = embedding.GetTensorTypeAndShapeInfo().GetShape(); std::vector ans(embedding_shape[1]); std::copy(embedding.GetTensorData(), embedding.GetTensorData() + ans.size(), ans.begin()); return ans; } private: SpeakerEmbeddingExtractorWeSpeakerModel model_; }; } // namespace sherpa_onnx #endif // SHERPA_ONNX_CSRC_SPEAKER_EMBEDDING_EXTRACTOR_WESPEAKER_IMPL_H_