adapt tensorrt llm custom all reduce to sgl-kernel (#2481)

Co-authored-by: Yineng Zhang <me@zhyncs.com>
2024-12-15 13:15:59 +08:00
parent 5f2595be43
commit e04d3f2897
13 changed files with 872 additions and 32 deletions
--- a/sgl-kernel/setup.py
+++ b/sgl-kernel/setup.py
@@ -84,7 +84,31 @@ setup(
            },
            libraries=["c10", "torch", "torch_python"],
            extra_link_args=["-Wl,-rpath,$ORIGIN/../../torch/lib"],
-        )
+        ),
+        CUDAExtension(
+            "sgl_kernel.ops.custom_reduce_cuda",
+            [
+                "src/sgl-kernel/csrc/trt_reduce_internal.cu",
+                "src/sgl-kernel/csrc/trt_reduce_kernel.cu",
+                "src/sgl-kernel/csrc/trt_reduce.cc",
+            ],
+            extra_compile_args={
+                "nvcc": [
+                    "-O3",
+                    "-Xcompiler",
+                    "-fPIC",
+                    "-gencode=arch=compute_75,code=sm_75",
+                    "-gencode=arch=compute_80,code=sm_80",
+                    "-gencode=arch=compute_89,code=sm_89",
+                    "-gencode=arch=compute_90,code=sm_90",
+                    "-U__CUDA_NO_HALF_OPERATORS__",
+                    "-U__CUDA_NO_HALF2_OPERATORS__",
+                ],
+                "cxx": ["-O3"],
+            },
+            libraries=["c10", "torch", "torch_python"],
+            extra_link_args=["-Wl,-rpath,$ORIGIN/../../torch/lib"],
+        ),
    ],
    cmdclass={"build_ext": BuildExtension},
    install_requires=["torch"],