use self-hosted to build sgl-kernel (#3154)

2025-01-26 23:02:57 +08:00
parent 02431b9ad2
commit f265d15b96
3 changed files with 14 additions and 2 deletions
--- a/.github/workflows/pr-test-sgl-kernel.yml
+++ b/.github/workflows/pr-test-sgl-kernel.yml
@@ -32,13 +32,17 @@ jobs:

  build-wheels:
    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
-    runs-on: ubuntu-latest
+    runs-on: sgl-kernel-build-node
    strategy:
      matrix:
        python-version: ['3.9']
        cuda-version: ['12.4']

    steps:
+      - name: Cleanup
+        run: |
+          sudo rm -rf $GITHUB_WORKSPACE/* || true
+
      - uses: actions/checkout@v4
        with:
          submodules: 'recursive'
--- a/sgl-kernel/build.sh
+++ b/sgl-kernel/build.sh
@@ -15,6 +15,7 @@ docker run --rm \
    pytorch/manylinux-builder:cuda${CUDA_VERSION} \
    bash -c "
    ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir torch==2.5.1 --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//.} && \
+    ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja && \
    export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
    export CUDA_VERSION=${CUDA_VERSION} && \
    export SGL_KERNEL_ENABLE_BF16=1 && \
--- a/sgl-kernel/setup.py
+++ b/sgl-kernel/setup.py
@@ -1,3 +1,4 @@
+import multiprocessing
 import os
 from pathlib import Path

@@ -70,6 +71,8 @@ nvcc_flags = [
    "-std=c++17",
    "-use_fast_math",
    "-DFLASHINFER_ENABLE_F16",
+    "-Xcompiler",
+    "-w",
 ]
 nvcc_flags_fp8 = [
    "-DFLASHINFER_ENABLE_FP8",
@@ -151,7 +154,11 @@ setup(
    packages=find_packages(),
    package_dir={"": "src"},
    ext_modules=ext_modules,
-    cmdclass={"build_ext": BuildExtension},
+    cmdclass={
+        "build_ext": BuildExtension.with_options(
+            use_ninja=True, max_jobs=multiprocessing.cpu_count()
+        )
+    },
    options={"bdist_wheel": {"py_limited_api": "cp39"}},
 )