use self-hosted to build sgl-kernel (#3154)
This commit is contained in:
6
.github/workflows/pr-test-sgl-kernel.yml
vendored
6
.github/workflows/pr-test-sgl-kernel.yml
vendored
@@ -32,13 +32,17 @@ jobs:
|
||||
|
||||
build-wheels:
|
||||
if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: sgl-kernel-build-node
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ['3.9']
|
||||
cuda-version: ['12.4']
|
||||
|
||||
steps:
|
||||
- name: Cleanup
|
||||
run: |
|
||||
sudo rm -rf $GITHUB_WORKSPACE/* || true
|
||||
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
|
||||
@@ -15,6 +15,7 @@ docker run --rm \
|
||||
pytorch/manylinux-builder:cuda${CUDA_VERSION} \
|
||||
bash -c "
|
||||
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir torch==2.5.1 --index-url https://download.pytorch.org/whl/cu${CUDA_VERSION//.} && \
|
||||
${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja && \
|
||||
export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \
|
||||
export CUDA_VERSION=${CUDA_VERSION} && \
|
||||
export SGL_KERNEL_ENABLE_BF16=1 && \
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import multiprocessing
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
@@ -70,6 +71,8 @@ nvcc_flags = [
|
||||
"-std=c++17",
|
||||
"-use_fast_math",
|
||||
"-DFLASHINFER_ENABLE_F16",
|
||||
"-Xcompiler",
|
||||
"-w",
|
||||
]
|
||||
nvcc_flags_fp8 = [
|
||||
"-DFLASHINFER_ENABLE_FP8",
|
||||
@@ -151,7 +154,11 @@ setup(
|
||||
packages=find_packages(),
|
||||
package_dir={"": "src"},
|
||||
ext_modules=ext_modules,
|
||||
cmdclass={"build_ext": BuildExtension},
|
||||
cmdclass={
|
||||
"build_ext": BuildExtension.with_options(
|
||||
use_ninja=True, max_jobs=multiprocessing.cpu_count()
|
||||
)
|
||||
},
|
||||
options={"bdist_wheel": {"py_limited_api": "cp39"}},
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user