From 88defc4d89b766ce2ed9d0828d31f583b094c278 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Tue, 15 Apr 2025 12:58:11 -0700 Subject: [PATCH] fix: solve release issue (#5434) --- .github/workflows/release-pypi-kernel.yml | 44 --------------- ...cu128.yml => release-whl-kernel-cu118.yml} | 6 +- .github/workflows/release-whl-kernel.yml | 56 +++++++++++++++---- docker/Dockerfile.blackwell | 2 +- sgl-kernel/build.sh | 2 - 5 files changed, 49 insertions(+), 61 deletions(-) delete mode 100644 .github/workflows/release-pypi-kernel.yml rename .github/workflows/{release-whl-kernel-cu128.yml => release-whl-kernel-cu118.yml} (94%) diff --git a/.github/workflows/release-pypi-kernel.yml b/.github/workflows/release-pypi-kernel.yml deleted file mode 100644 index 59aa00a77..000000000 --- a/.github/workflows/release-pypi-kernel.yml +++ /dev/null @@ -1,44 +0,0 @@ -name: Release SGLang Kernel to PyPI - -on: - push: - branches: - - main - paths: - - sgl-kernel/python/sgl_kernel/version.py - workflow_dispatch: - -concurrency: - group: release-pypi-kernel-${{ github.ref }} - cancel-in-progress: true - -jobs: - build-wheels: - if: github.repository == 'sgl-project/sglang' - runs-on: sgl-kernel-release-node - strategy: - matrix: - python-version: ['3.9'] - cuda-version: ['12.4'] - - steps: - - uses: actions/checkout@v4 - with: - submodules: 'recursive' - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} - run: | - cd sgl-kernel - chmod +x ./build.sh - ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" - - - name: Upload to pypi - working-directory: sgl-kernel - run: | - pip install twine - python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }} diff --git a/.github/workflows/release-whl-kernel-cu128.yml b/.github/workflows/release-whl-kernel-cu118.yml similarity index 94% rename from .github/workflows/release-whl-kernel-cu128.yml rename to .github/workflows/release-whl-kernel-cu118.yml index 8be761f38..09b154e6f 100644 --- a/.github/workflows/release-whl-kernel-cu128.yml +++ b/.github/workflows/release-whl-kernel-cu118.yml @@ -1,4 +1,4 @@ -name: Release SGLang Kernel Wheel (cu128) +name: Release SGLang Kernel Wheel (cu118) on: workflow_dispatch: @@ -18,7 +18,7 @@ jobs: strategy: matrix: python-version: ['3.9'] - cuda-version: ['12.8'] + cuda-version: ['11.8'] steps: - uses: actions/checkout@v4 @@ -80,7 +80,7 @@ jobs: WHL_TOKEN: ${{ secrets.WHL_TOKEN }} - name: Update wheel index - run: python3 scripts/update_kernel_whl_index.py --cuda 128 + run: python3 scripts/update_kernel_whl_index.py - name: Push wheel index run: | diff --git a/.github/workflows/release-whl-kernel.yml b/.github/workflows/release-whl-kernel.yml index 09b154e6f..d80afe5cf 100644 --- a/.github/workflows/release-whl-kernel.yml +++ b/.github/workflows/release-whl-kernel.yml @@ -1,25 +1,29 @@ -name: Release SGLang Kernel Wheel (cu118) +name: Release SGLang Kernels on: - workflow_dispatch: - inputs: - tag_name: - type: string push: branches: - main paths: - sgl-kernel/python/sgl_kernel/version.py + workflow_dispatch: + inputs: + tag_name: + type: string + required: false + +concurrency: + group: release-sglang-kernels-${{ github.ref }} + cancel-in-progress: true jobs: - build-wheels: + build-cu124: if: github.repository == 'sgl-project/sglang' runs-on: sgl-kernel-release-node strategy: matrix: python-version: ['3.9'] - cuda-version: ['11.8'] - + cuda-version: ['12.4'] steps: - uses: actions/checkout@v4 with: @@ -30,7 +34,37 @@ jobs: with: python-version: ${{ matrix.python-version }} - - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} + - name: Build wheels + run: | + cd sgl-kernel + chmod +x ./build.sh + ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" + + - name: Upload to PyPI + working-directory: sgl-kernel + run: | + pip install twine + python3 -m twine upload dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }} + + build-cu128: + if: github.repository == 'sgl-project/sglang' + needs: build-cu124 + runs-on: sgl-kernel-release-node + strategy: + matrix: + python-version: ['3.9'] + cuda-version: ['12.8'] + steps: + - uses: actions/checkout@v4 + with: + submodules: 'recursive' + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Build wheels run: | cd sgl-kernel chmod +x ./build.sh @@ -43,7 +77,7 @@ jobs: path: sgl-kernel/dist/* release: - needs: build-wheels + needs: build-cu128 runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -80,7 +114,7 @@ jobs: WHL_TOKEN: ${{ secrets.WHL_TOKEN }} - name: Update wheel index - run: python3 scripts/update_kernel_whl_index.py + run: python3 scripts/update_kernel_whl_index.py --cuda 128 - name: Push wheel index run: | diff --git a/docker/Dockerfile.blackwell b/docker/Dockerfile.blackwell index 75236b659..7f1443891 100644 --- a/docker/Dockerfile.blackwell +++ b/docker/Dockerfile.blackwell @@ -6,7 +6,7 @@ WORKDIR /sgl-workspace RUN pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128 -RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.0.9/sgl_kernel-0.0.9+cu128-cp39-abi3-manylinux2014_x86_64.whl \ +RUN pip3 install https://github.com/sgl-project/whl/releases/download/v0.0.9.post1/sgl_kernel-0.0.9.post1+cu128-cp39-abi3-manylinux2014_x86_64.whl \ && pip3 install setuptools==75.0.0 wheel==0.41.0 scikit-build-core RUN git clone --depth=1 https://github.com/sgl-project/sglang.git \ diff --git a/sgl-kernel/build.sh b/sgl-kernel/build.sh index 6b82613a2..945d60ca5 100755 --- a/sgl-kernel/build.sh +++ b/sgl-kernel/build.sh @@ -35,8 +35,6 @@ docker run --rm \ ${PYTHON_ROOT_PATH}/bin/pip install --no-cache-dir ninja setuptools==75.0.0 wheel==0.41.0 numpy uv scikit-build-core && \ export TORCH_CUDA_ARCH_LIST='7.5 8.0 8.9 9.0+PTX' && \ export CUDA_VERSION=${CUDA_VERSION} && \ - export CMAKE_BUILD_PARALLEL_LEVEL=96 - export MAX_JOBS=96 mkdir -p /usr/lib/x86_64-linux-gnu/ && \ ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \ cd /sgl-kernel && \