Add optimized native kernels in sgl-kernel (#5150)

Co-authored-by: Chunyuan WU <chunyuan.wu@intel.com> Co-authored-by: YanbingJiang <yanbing.jiang@intel.com> Co-authored-by: blzheng <beilei.zheng@intel.com>
2025-04-09 00:37:46 +08:00
parent 89a554181f
commit a73c4df438
20 changed files with 7792 additions and 0 deletions
--- a/sgl-kernel/csrc/cpu/shm.h
+++ b/sgl-kernel/csrc/cpu/shm.h
@@ -0,0 +1,11 @@
+#include <torch/torch.h>
+
+#include <torch/csrc/distributed/c10d/ProcessGroup.hpp>
+
+#ifndef __SHM_COLLECTIVES__
+#define __SHM_COLLECTIVES__
+#define VECTOR_LENGTH_IN_BYTES 32
+void shm_initialize(int size, int rank, char* addr_string, char* port_string);
+void all_reduce_outer_loop(torch::Tensor& data, size_t numel, int data_size);
+torch::Tensor& all_gather(torch::Tensor& result, torch::Tensor& data, int dim, size_t numel, int data_size);
+#endif