[NVIDIA] Build CUDA 13 (#11299)

Co-authored-by: ishandhanani <ishandhanani@gmail.com> Co-authored-by: Baizhou Zhang <sobereddiezhang@gmail.com>
2025-10-22 20:03:12 -07:00
parent 4d4feccbb2
commit e7aa4664b3
6 changed files with 131 additions and 63 deletions
--- a/sgl-kernel/CMakeLists.txt
+++ b/sgl-kernel/CMakeLists.txt
@@ -224,6 +224,12 @@ if (ENABLE_BELOW_SM90)
        "-gencode=arch=compute_80,code=sm_80"
        "-gencode=arch=compute_89,code=sm_89"
    )
+    if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+        list(APPEND SGL_KERNEL_CUDA_FLAGS
+            "-gencode=arch=compute_87,code=sm_87"
+        )
+    endif()
+
 endif()

 if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.8" OR SGL_KERNEL_ENABLE_SM100A)
@@ -231,19 +237,24 @@ if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "12.8" OR SGL_KERNEL_ENABLE_SM100A)
        "-gencode=arch=compute_100a,code=sm_100a"
        "-gencode=arch=compute_120a,code=sm_120a"
    )
-
    # refer sm_121, sm_110 and sm_101 description  https://github.com/pytorch/pytorch/pull/156176
    if ("${CUDA_VERSION}" VERSION_GREATER_EQUAL "13.0")
        list(APPEND SGL_KERNEL_CUDA_FLAGS
            "-gencode=arch=compute_103a,code=sm_103a"
-            "-gencode=arch=compute_110a,code=sm_110a"
-            "-gencode=arch=compute_121a,code=sm_121a"
            "--compress-mode=size"
        )
+        if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+            list(APPEND SGL_KERNEL_CUDA_FLAGS
+                "-gencode=arch=compute_110a,code=sm_110a"
+                "-gencode=arch=compute_121a,code=sm_121a"
+            )
+        endif()
    else()
-        list(APPEND SGL_KERNEL_CUDA_FLAGS
-            "-gencode=arch=compute_101a,code=sm_101a"
-        )
+        if (CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+            list(APPEND SGL_KERNEL_CUDA_FLAGS
+                "-gencode=arch=compute_101a,code=sm_101a"
+            )
+        endif()
    endif()
 endif()

--- a/sgl-kernel/build.sh
+++ b/sgl-kernel/build.sh
@@ -20,7 +20,10 @@ else
   BUILDER_NAME="pytorch/manylinux2_28-builder"
 fi

-if [ ${CUDA_VERSION} = "12.9" ]; then
+if [ ${CUDA_VERSION} = "13.0" ]; then
+   DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION}"
+   TORCH_INSTALL="pip install --no-cache-dir torch==2.9.0 --index-url https://download.pytorch.org/whl/cu130"
+elif [ ${CUDA_VERSION} = "12.9" ]; then
   DOCKER_IMAGE="${BUILDER_NAME}:cuda${CUDA_VERSION}"
   TORCH_INSTALL="pip install --no-cache-dir torch==2.8.0 --index-url https://download.pytorch.org/whl/cu129"
 elif [ ${CUDA_VERSION} = "12.8" ]; then
@@ -148,6 +151,8 @@ docker run --rm \
   export CUDA_VERSION=${CUDA_VERSION} && \
   mkdir -p /usr/lib/${ARCH}-linux-gnu/ && \
   ln -s /usr/local/cuda-${CUDA_VERSION}/targets/${LIBCUDA_ARCH}-linux/lib/stubs/libcuda.so /usr/lib/${ARCH}-linux-gnu/libcuda.so && \
+   export CPLUS_INCLUDE_PATH=/usr/local/cuda/include/cccl${CPLUS_INCLUDE_PATH:+:${CPLUS_INCLUDE_PATH}} && \
+   export C_INCLUDE_PATH=/usr/local/cuda/include/cccl${C_INCLUDE_PATH:+:${C_INCLUDE_PATH}} && \

   cd /sgl-kernel && \
   ls -la ${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages/wheel/ && \