use self-hosted to build sgl-kernel (#3154)
This commit is contained in:
6
.github/workflows/pr-test-sgl-kernel.yml
vendored
6
.github/workflows/pr-test-sgl-kernel.yml
vendored
@@ -32,13 +32,17 @@ jobs:
|
|||||||
|
|
||||||
build-wheels:
|
build-wheels:
|
||||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||||
runs-on: ubuntu-latest
|
runs-on: sgl-kernel-build-node
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
python-version: ['3.9']
|
python-version: ['3.9']
|
||||||
cuda-version: ['12.4']
|
cuda-version: ['12.4']
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
- name: Cleanup
|
||||||
|
run: |
|
||||||
|
sudo rm -rf $GITHUB_WORKSPACE/* || true
|
||||||
|
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: 'recursive'
|
submodules: 'recursive'
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ docker run --rm \
|
|||||||
pytorch/manylinux-builder:cuda${CUDA_VERSION} \
|
pytorch/manylinux-builder:cuda${CUDA_VERSION} \
|
||||||
bash -c "
|
bash -c "
|
||||||
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir torch==2.5.1 --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//.} && \
|
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir torch==2.5.1 --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//.} && \
|
||||||
|
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja && \
|
||||||
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
|
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
|
||||||
export CUDA_VERSION=${CUDA_VERSION} && \
|
export CUDA_VERSION=${CUDA_VERSION} && \
|
||||||
export SGL_KERNEL_ENABLE_BF16=1 && \
|
export SGL_KERNEL_ENABLE_BF16=1 && \
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import multiprocessing
|
||||||
import os
|
import os
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -70,6 +71,8 @@ nvcc_flags = [
|
|||||||
"-std=c++17",
|
"-std=c++17",
|
||||||
"-use_fast_math",
|
"-use_fast_math",
|
||||||
"-DFLASHINFER_ENABLE_F16",
|
"-DFLASHINFER_ENABLE_F16",
|
||||||
|
"-Xcompiler",
|
||||||
|
"-w",
|
||||||
]
|
]
|
||||||
nvcc_flags_fp8 = [
|
nvcc_flags_fp8 = [
|
||||||
"-DFLASHINFER_ENABLE_FP8",
|
"-DFLASHINFER_ENABLE_FP8",
|
||||||
@@ -151,7 +154,11 @@ setup(
|
|||||||
packages=find_packages(),
|
packages=find_packages(),
|
||||||
package_dir={"": "src"},
|
package_dir={"": "src"},
|
||||||
ext_modules=ext_modules,
|
ext_modules=ext_modules,
|
||||||
cmdclass={"build_ext": BuildExtension},
|
cmdclass={
|
||||||
|
"build_ext": BuildExtension.with_options(
|
||||||
|
use_ninja=True, max_jobs=multiprocessing.cpu_count()
|
||||||
|
)
|
||||||
|
},
|
||||||
options={"bdist_wheel": {"py_limited_api": "cp39"}},
|
options={"bdist_wheel": {"py_limited_api": "cp39"}},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user