From a22f60a313818678ba7455088833705be694c32f Mon Sep 17 00:00:00 2001
From: Ke Bao <ISPObaoke@163.com>
Date: Fri, 24 Jan 2025 22:30:30 +0800
Subject: [PATCH] Add workflow for sgl-kernel cu118 release (#3109)

---
 .github/workflows/release-whl-kernel.yml | 59 ++++++++++++++++++++++++
 sgl-kernel/build.sh                      |  8 +++-
 2 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/release-whl-kernel.yml

diff --git a/.github/workflows/release-whl-kernel.yml b/.github/workflows/release-whl-kernel.yml
new file mode 100644
index 000000000..b49da1feb
--- /dev/null
+++ b/.github/workflows/release-whl-kernel.yml
@@ -0,0 +1,59 @@
+name: Release SGLang Kernel Wheel (cu118)
+
+on:
+  workflow_dispatch:
+    inputs:
+      tag_name:
+        required: true
+        type: string
+
+jobs:
+  build-wheels:
+    if: github.repository == 'sgl-project/sglang'
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.9', '3.10', '3.11', '3.12']
+        cuda-version: ['11.8']
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          submodules: 'recursive'
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+
+      - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
+        run: |
+          cd sgl-kernel
+          chmod +x ./build.sh
+          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
+
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
+          path: sgl-kernel/dist/*
+
+  release:
+    needs: build-wheels
+    runs-on: ubuntu-latest
+    steps:
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          path: sgl-kernel/dist/
+          merge-multiple: true
+          pattern: wheel-*
+
+      - name: Release
+        uses: softprops/action-gh-release@v2
+        with:
+          tag_name: ${{ inputs.tag_name }}
+          repository: sgl-project/whl
+          token: ${{ secrets.WHL_TOKEN }}
+          files: |
+            sgl-kernel/dist/*
diff --git a/sgl-kernel/build.sh b/sgl-kernel/build.sh
index c89922481..1caa892bc 100755
--- a/sgl-kernel/build.sh
+++ b/sgl-kernel/build.sh
@@ -4,6 +4,12 @@ PYTHON_VERSION=$1
 CUDA_VERSION=$2
 PYTHON_ROOT_PATH=/opt/python/cp${PYTHON_VERSION//.}-cp${PYTHON_VERSION//.}
 
+if (( ${CUDA_VERSION%.*} < 12 )); then
+    ENABLE_SM90A=0
+else
+    ENABLE_SM90A=1
+fi
+
 docker run --rm \
     -v "$(pwd)":/sgl-kernel \
     pytorch/manylinux-builder:cuda${CUDA_VERSION} \
@@ -13,7 +19,7 @@ docker run --rm \
     export CUDA_VERSION=${CUDA_VERSION} && \
     export SGL_KERNEL_ENABLE_BF16=1 && \
     export SGL_KERNEL_ENABLE_FP8=1 && \
-    export SGL_KERNEL_ENABLE_SM90A=1 && \
+    export SGL_KERNEL_ENABLE_SM90A=${ENABLE_SM90A} && \
     mkdir -p /usr/lib/x86_64-linux-gnu/ && \
     ln -s /usr/local/cuda-${CUDA_VERSION}/targets/x86_64-linux/lib/stubs/libcuda.so /usr/lib/x86_64-linux-gnu/libcuda.so && \
     cd /sgl-kernel && \