From 2c7dbb7cc2306d00f1ca4713d6f9d9901ad6da56 Mon Sep 17 00:00:00 2001 From: Johnny Date: Wed, 30 Apr 2025 00:06:16 +0200 Subject: [PATCH] [FEATURE] Enhance platform compatibility for ARM (#5746) --- sgl-kernel/build.sh | 16 ++++++++++------ sgl-kernel/python/sgl_kernel/__init__.py | 11 ++++++----- sgl-kernel/setup_cpu.py | 15 +++++++++++++-- sgl-kernel/setup_rocm.py | 4 +++- 4 files changed, 32 insertions(+), 14 deletions(-) diff --git a/sgl-kernel/build.sh b/sgl-kernel/build.sh index 945d60ca5..819283c51 100755 --- a/sgl-kernel/build.sh +++ b/sgl-kernel/build.sh @@ -5,6 +5,9 @@ PYTHON_VERSION=$1 CUDA_VERSION=$2 PYTHON_ROOT_PATH=/opt/python/cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.} +ARCH=$(uname -i) +echo "ARCH: $ARCH" + if [ ${CUDA_VERSION} = "12.8" ]; then DOCKER_IMAGE="pytorch/manylinux2_28-builder:cuda${CUDA_VERSION}" TORCH_INSTALL="pip install --no-cache-dir --pre torch --index-url https://download.pytorch.org/whl/nightly/cu${CUDA_VERSION//.}" @@ -20,10 +23,10 @@ docker run --rm \ # Install CMake (version >= 3.26) - Robust Installation export CMAKE_VERSION_MAJOR=3.31 export CMAKE_VERSION_MINOR=1 - echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-x86_64.tar.gz\" - wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-x86_64.tar.gz - tar -xzf cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-x86_64.tar.gz - mv cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-x86_64 /opt/cmake + echo \"Downloading CMake from: https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz\" + wget https://cmake.org/files/v\${CMAKE_VERSION_MAJOR}/cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz + tar -xzf cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH}.tar.gz + mv cmake-\${CMAKE_VERSION_MAJOR}.\${CMAKE_VERSION_MINOR}-linux-${ARCH} /opt/cmake export PATH=/opt/cmake/bin:\$PATH # Debugging CMake @@ -35,8 +38,9 @@ docker run --rm \ ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \ export CUDA_VERSION=${CUDA_VERSION} && \ - mkdir -p /usr/lib/x86_64-linux-gnu/ && \ - ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \ + mkdir -p /usr/lib/${ARCH}-linux-gnu/ && \ + ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/${ARCH}-linux-gnu/libcuda.so && \ + cd /sgl-kernel && \ ls -la ${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages/wheel/ && \ PYTHONPATH=${PYTHON_ROOT_PATH}/lib/python${PYTHON_VERSION}/site-packages ${PYTHON_ROOT_PATH}/bin/python -m uv build --wheel -Cbuild-dir=build . --color=always --no-build-isolation && \ diff --git a/sgl-kernel/python/sgl_kernel/__init__.py b/sgl-kernel/python/sgl_kernel/__init__.py index acd21a46e..0aaf09042 100755 --- a/sgl-kernel/python/sgl_kernel/__init__.py +++ b/sgl-kernel/python/sgl_kernel/__init__.py @@ -1,13 +1,14 @@ import ctypes import os +import platform import torch -if os.path.exists("/usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.12"): - ctypes.CDLL( - "/usr/local/cuda/targets/x86_64-linux/lib/libcudart.so.12", - mode=ctypes.RTLD_GLOBAL, - ) +SYSTEM_ARCH = platform.machine() + +cuda_path = f"/usr/local/cuda/targets/{SYSTEM_ARCH}-linux/lib/libcudart.so.12" +if os.path.exists(cuda_path): + ctypes.CDLL(cuda_path, mode=ctypes.RTLD_GLOBAL) from sgl_kernel import common_ops from sgl_kernel.allreduce import * diff --git a/sgl-kernel/setup_cpu.py b/sgl-kernel/setup_cpu.py index 04e06cb1a..8de4fcf2d 100644 --- a/sgl-kernel/setup_cpu.py +++ b/sgl-kernel/setup_cpu.py @@ -14,6 +14,7 @@ # ============================================================================== import os +import platform import shutil import sys from pathlib import Path @@ -24,9 +25,19 @@ from setuptools.command.build_py import build_py from torch.utils.cpp_extension import BuildExtension, CppExtension root = Path(__file__).parent.resolve() +arch = platform.machine().lower() + +if arch in ("x86_64", "amd64"): + plat_name = "manylinux2014_x86_64" +elif arch in ("aarch64", "arm64"): + plat_name = "manylinux2014_aarch64" +elif arch.startswith("ppc"): + plat_name = "manylinux2014_ppc64le" +else: + plat_name = f"manylinux2014_{arch}" if "bdist_wheel" in sys.argv and "--plat-name" not in sys.argv: - sys.argv.extend(["--plat-name", "manylinux2014_x86_64"]) + sys.argv.extend(["--plat-name", plat_name]) def _get_version(): @@ -70,7 +81,7 @@ cmdclass = { } Extension = CppExtension -extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", "-L/usr/lib/x86_64-linux-gnu"] +extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", f"-L/usr/lib/{arch}-linux-gnu"] ext_modules = [ Extension( diff --git a/sgl-kernel/setup_rocm.py b/sgl-kernel/setup_rocm.py index c0d827fff..b230abfa8 100644 --- a/sgl-kernel/setup_rocm.py +++ b/sgl-kernel/setup_rocm.py @@ -13,6 +13,7 @@ # limitations under the License. # ============================================================================== +import platform import sys from pathlib import Path @@ -20,6 +21,7 @@ from setuptools import find_packages, setup from torch.utils.cpp_extension import BuildExtension, CUDAExtension root = Path(__file__).parent.resolve() +arch = platform.machine().lower() def _get_version(): @@ -45,7 +47,7 @@ sources = [ cxx_flags = ["-O3"] libraries = ["hiprtc", "amdhip64", "c10", "torch", "torch_python"] -extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", "-L/usr/lib/x86_64-linux-gnu"] +extra_link_args = ["-Wl,-rpath,$ORIGIN/../../torch/lib", f"-L/usr/lib/{arch}-linux-gnu"] hipcc_flags = [ "-DNDEBUG",