Add optimized native kernels in sgl-kernel (#5150)
Co-authored-by: Chunyuan WU <chunyuan.wu@intel.com> Co-authored-by: YanbingJiang <yanbing.jiang@intel.com> Co-authored-by: blzheng <beilei.zheng@intel.com>
This commit is contained in:
11
sgl-kernel/csrc/cpu/shm.h
Normal file
11
sgl-kernel/csrc/cpu/shm.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#include <torch/torch.h>
|
||||
|
||||
#include <torch/csrc/distributed/c10d/ProcessGroup.hpp>
|
||||
|
||||
#ifndef __SHM_COLLECTIVES__
|
||||
#define __SHM_COLLECTIVES__
|
||||
#define VECTOR_LENGTH_IN_BYTES 32
|
||||
void shm_initialize(int size, int rank, char* addr_string, char* port_string);
|
||||
void all_reduce_outer_loop(torch::Tensor& data, size_t numel, int data_size);
|
||||
torch::Tensor& all_gather(torch::Tensor& result, torch::Tensor& data, int dim, size_t numel, int data_size);
|
||||
#endif
|
||||
Reference in New Issue
Block a user