Add optimized native kernels in sgl-kernel (#5150)

Co-authored-by: Chunyuan WU <chunyuan.wu@intel.com>
Co-authored-by: YanbingJiang <yanbing.jiang@intel.com>
Co-authored-by: blzheng <beilei.zheng@intel.com>
This commit is contained in:
Ma Mingfei
2025-04-09 00:37:46 +08:00
committed by GitHub
parent 89a554181f
commit a73c4df438
20 changed files with 7792 additions and 0 deletions

11
sgl-kernel/csrc/cpu/shm.h Normal file
View File

@@ -0,0 +1,11 @@
#include <torch/torch.h>
#include <torch/csrc/distributed/c10d/ProcessGroup.hpp>
#ifndef __SHM_COLLECTIVES__
#define __SHM_COLLECTIVES__
#define VECTOR_LENGTH_IN_BYTES 32
void shm_initialize(int size, int rank, char* addr_string, char* port_string);
void all_reduce_outer_loop(torch::Tensor& data, size_t numel, int data_size);
torch::Tensor& all_gather(torch::Tensor& result, torch::Tensor& data, int dim, size_t numel, int data_size);
#endif