Rename runner labels (#11228)

2025-10-05 18:05:41 -07:00
parent 41763ba079
commit d645ae90a3
13 changed files with 48 additions and 277 deletions
--- a/.github/workflows/pr-test-pd-router.yml
+++ b/.github/workflows/pr-test-pd-router.yml
@@ -28,7 +28,7 @@ permissions:
 jobs:
  test-disaggregation:
    if: github.event_name != 'pull_request' || (contains(github.event.pull_request.labels.*.name, 'run-ci') && contains(github.event.pull_request.labels.*.name, 'router-benchmark'))
-    runs-on: [h200]
+    runs-on: [8-gpu-h200-oracle]
    timeout-minutes: 45
    steps:
--- a/.github/workflows/pr-test-rust.yml
+++ b/.github/workflows/pr-test-rust.yml
@@ -83,7 +83,7 @@ jobs:
  pytest-rust:
    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-    runs-on: BM.A10.4
+    runs-on: 4-gpu-a10
    timeout-minutes: 25
    steps:
      - name: Checkout code
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -62,7 +62,7 @@ jobs:
  sgl-kernel-build-wheels:
    needs: [check-changes]
    if: needs.check-changes.outputs.sgl_kernel == 'true'
-    runs-on: sgl-kernel-build-node
+    runs-on: x64-kernel-build-node
    strategy:
      matrix:
        include:
@@ -323,7 +323,7 @@ jobs:
    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
    if: always() && !failure() && !cancelled() &&
        ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
-    runs-on: 8-gpu-runner
+    runs-on: 8-gpu-h200
    strategy:
      fail-fast: false
      matrix:
@@ -641,7 +641,7 @@ jobs:
    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
    if: always() && !failure() && !cancelled() &&
        ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
-    runs-on: 8-gpu-runner
+    runs-on: 8-gpu-h200
    steps:
      - name: Checkout code
        uses: actions/checkout@v4
@@ -668,7 +668,7 @@ jobs:
    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
    if: always() && !failure() && !cancelled() &&
        ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
-    runs-on: 4-b200-runner
+    runs-on: 4-gpu-b200
    strategy:
      fail-fast: false
    steps:
@@ -702,7 +702,8 @@ jobs:
      unit-test-frontend, unit-test-backend-1-gpu,
      unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu,
-      performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu,
+      performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-1-gpu-part-3,
      performance-test-2-gpu,
      accuracy-test-1-gpu, accuracy-test-2-gpu,
      unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
      unit-test-backend-4-gpu-b200,
--- a/.github/workflows/release-docker-dev.yml
+++ b/.github/workflows/release-docker-dev.yml
@@ -8,7 +8,7 @@ on:
 jobs:
  build-dev-x86:
    if: ${{ github.repository == 'sgl-project/sglang' }}
-    runs-on: nvidia
+    runs-on: x64-docker-build-node
    strategy:
      matrix:
        variant:
@@ -48,12 +48,12 @@ jobs:
  build-dev-arm:
    if: ${{ github.repository == 'sgl-project/sglang' }}
-    runs-on: sgl-kernel-release-node-arm
+    runs-on: arm-docker-build-node
    strategy:
      matrix:
        variant:
          - version: 12.9.1
-            type: blackwell_aarch64
+            type: all_aarch64
            tag: dev-arm64
    steps:
      - name: Delete huge unnecessary tools folder
--- a/.github/workflows/release-docker.yml
+++ b/.github/workflows/release-docker.yml
@@ -14,13 +14,9 @@ jobs:
    strategy:
      matrix:
        variant:
          - cuda_version: "12.6.1"
            build_type: "all"
          - cuda_version: "12.8.1"
            build_type: "blackwell"
          - cuda_version: "12.9.1"
-            build_type: "blackwell"
+            build_type: "all"
-    runs-on: nvidia
+    runs-on: x64-docker-build-node
    steps:
      - name: Delete huge unnecessary tools folder
        run: rm -rf /opt/hostedtoolcache
@@ -67,8 +63,6 @@ jobs:
          if [ "${{ matrix.variant.build_type }}" = "all" ]; then
            tag_suffix=""
          elif [ "${{ matrix.variant.build_type }}" = "blackwell" ]; then
            tag_suffix="-b200"
          else
            echo "Unsupported build type"
            exit 1
@@ -87,8 +81,8 @@ jobs:
      matrix:
        variant:
          - cuda_version: "12.9.1"
-            build_type: "blackwell_aarch64"
+            build_type: "all_aarch64"
-    runs-on: sgl-kernel-release-node-arm
+    runs-on: arm-docker-build-node
    steps:
      - name: Delete huge unnecessary tools folder
        run: rm -rf /opt/hostedtoolcache
--- a/.github/workflows/release-whl-kernel-cu118.yml
+++ b/.github/workflows/release-whl-kernel-cu118.yml
@@ -1,92 +0,0 @@
 name: Release SGLang Kernel Wheel (cu118)
 on:
  workflow_dispatch:
    inputs:
      tag_name:
        type: string
  push:
    branches:
      - main
    paths:
      - sgl-kernel/python/sgl_kernel/version.py
 jobs:
  build-wheels:
    if: github.repository == 'sgl-project/sglang'
    runs-on: sgl-kernel-release-node
    strategy:
      matrix:
        python-version: ["3.9"]
        cuda-version: ["11.8"]
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: "recursive"
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
        run: |
          cd sgl-kernel
          chmod +x ./build.sh
          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
          path: sgl-kernel/dist/*
  release:
    needs: build-wheels
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-*
      - name: Set tag name
        id: set_tag_name
        run: |
          if [ -z "${{ inputs.tag_name }}" ]; then
            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
          else
            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
          fi
      - name: Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
          repository: sgl-project/whl
          token: ${{ secrets.WHL_TOKEN }}
          files: |
            sgl-kernel/dist/*
      - name: Clone wheel index
        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
        env:
          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
      - name: Update wheel index
        run: python3 scripts/update_kernel_whl_index.py
      - name: Push wheel index
        run: |
          cd sgl-whl
          git config --local user.name "github-actions[bot]"
          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
          git add -A
          git commit -m "update whl index"
          git push
--- a/.github/workflows/release-whl-kernel.yml
+++ b/.github/workflows/release-whl-kernel.yml
@@ -19,7 +19,7 @@ concurrency:
 jobs:
  build-cu129:
    if: github.repository == 'sgl-project/sglang'
-    runs-on: sgl-kernel-release-node
+    runs-on: x64-kernel-build-node
    strategy:
      matrix:
        python-version: ["3.10"]
@@ -46,38 +46,14 @@ jobs:
          pip install twine
          python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
  build-cu124:
    if: github.repository == 'sgl-project/sglang'
    needs: build-cu129
    runs-on: sgl-kernel-release-node
    strategy:
      matrix:
        python-version: ["3.10"]
        cuda-version: ["12.4"]
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: "recursive"
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Build wheels
        run: |
          cd sgl-kernel
          chmod +x ./build.sh
          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
          path: sgl-kernel/dist/*
-  release-cu124:
+  release-cu129:
-    needs: build-cu124
+    needs: build-cu129
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
@@ -114,99 +90,20 @@ jobs:
          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
      - name: Update wheel index
-        run: python3 scripts/update_kernel_whl_index.py --cuda 124
+        run: python3 scripts/update_kernel_whl_index.py --cuda 129
      - name: Push wheel index
        run: |
          cd sgl-whl
-          git config --local user.name "github-actions[bot]"
+          git config --local user.name "sglang-bot"
-          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git config --local user.email "sglangbot@gmail.com"
          git add -A
          git commit -m "update whl index"
          git push
  build-cu128:
    if: github.repository == 'sgl-project/sglang'
    needs: build-cu129
    runs-on: sgl-kernel-release-node
    strategy:
      matrix:
        python-version: ["3.10"]
        cuda-version: ["12.8"]
    steps:
      - uses: actions/checkout@v4
        with:
          submodules: "recursive"
      - name: Set up Python ${{ matrix.python-version }}
        uses: actions/setup-python@v5
        with:
          python-version: ${{ matrix.python-version }}
      - name: Build wheels
        run: |
          cd sgl-kernel
          chmod +x ./build.sh
          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
      - name: Upload artifacts
        uses: actions/upload-artifact@v4
        with:
          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
          path: sgl-kernel/dist/*
  release-cu128:
    needs: build-cu128
    runs-on: ubuntu-latest
    steps:
      - uses: actions/checkout@v4
      - name: Download artifacts
        uses: actions/download-artifact@v4
        with:
          path: sgl-kernel/dist/
          merge-multiple: true
          pattern: wheel-*
      - name: Set tag name
        id: set_tag_name
        run: |
          if [ -z "${{ inputs.tag_name }}" ]; then
            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
          else
            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
          fi
      - name: Release
        uses: softprops/action-gh-release@v2
        with:
          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
          repository: sgl-project/whl
          token: ${{ secrets.WHL_TOKEN }}
          files: |
            sgl-kernel/dist/*
      - name: Clone wheel index
        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
        env:
          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
      - name: Update wheel index
        run: python3 scripts/update_kernel_whl_index.py --cuda 128
      - name: Push wheel index
        run: |
          cd sgl-whl
          git config --local user.name "github-actions[bot]"
          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
          git add -A
          git commit -m "update whl index"
          git push
  build-cu129-aarch64:
    if: github.repository == 'sgl-project/sglang'
-    runs-on: sgl-kernel-release-node-arm
+    runs-on: arm-kernel-build-node
    strategy:
      matrix:
        python-version: ["3.10"]
@@ -282,8 +179,8 @@ jobs:
      - name: Push wheel index
        run: |
          cd sgl-whl
-          git config --local user.name "github-actions[bot]"
+          git config --local user.name "sglang-bot"
-          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git config --local user.email "sglangbot@gmail.com"
          git add -A
          git commit -m "update whl index"
          git push
--- a/docs/get_started/install.md
+++ b/docs/get_started/install.md
@@ -3,7 +3,7 @@
 You can install SGLang using one of the methods below.
 This page primarily applies to common NVIDIA GPU platforms.
-For other or newer platforms, please refer to the dedicated pages for [NVIDIA Blackwell GPUs](../platforms/blackwell_gpu.md), [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md).
+For other or newer platforms, please refer to the dedicated pages for [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md).
 ## Method 1: With pip or uv
--- a/docs/platforms/blackwell_gpu.md
+++ b/docs/platforms/blackwell_gpu.md
@@ -1,9 +0,0 @@
 # Blackwell GPUs
 We will release the pre-built wheels soon. Before that, please try to compile from source or check the blackwell docker images from [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
 ## B200 with x86 CPUs
 TODO
 ## GB200/GB300 with ARM CPUs
 TODO
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -91,9 +91,14 @@ tracing = [
    "opentelemetry-sdk",
 ]
 all = ["sglang[test]", "sglang[decord]"]
 all_aarch64 = ["sglang[test]"]
 dev = ["sglang[test]", "sglang[decord]"]
 # The following will be deprecated in 2 weeks
 blackwell = ["sglang[test]", "sglang[decord]"]
 blackwell_aarch64 = ["sglang[test]"]
-dev = ["sglang[test]", "sglang[decord]"]
+
 [project.urls]
 "Homepage" = "https://github.com/sgl-project/sglang"
--- a/scripts/ci/ci_install_dependency.sh
+++ b/scripts/ci/ci_install_dependency.sh
@@ -3,21 +3,16 @@
 set -euxo pipefail
 IS_BLACKWELL=${IS_BLACKWELL:-0}
-
+CU_VERSION="cu128"
 if [ "$IS_BLACKWELL" = "1" ]; then
    CU_VERSION="cu129"
 else
    CU_VERSION="cu126"
 fi
 # Clear torch compilation cache
 python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
 # Kill existing processes
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 bash "${SCRIPT_DIR}/../killall_sglang.sh"
 echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
 # Clear torch compilation cache
 python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
 # Install apt packages
 apt install -y git libnuma-dev
@@ -29,7 +24,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then
    PIP_INSTALL_SUFFIX="--break-system-packages"
    # Clean up existing installations
-    $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm $PIP_INSTALL_SUFFIX || true
+    $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm torch $PIP_INSTALL_SUFFIX || true
 else
    # In normal cases, we use uv, which is much faster than pip.
    pip install --upgrade pip
@@ -40,7 +35,7 @@ else
    PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match"
    # Clean up existing installations
-    $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm || true
+    $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm torch || true
 fi
 # Install the main package
@@ -49,26 +44,16 @@ $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org
 # Install router for pd-disagg test
 SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX
 # Install sgl-kernel
 SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
 SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
 echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"
 if [ "$IS_BLACKWELL" = "1" ]; then
    SGL_KERNEL_CUDA_VERSION=cu128
 else
    SGL_KERNEL_CUDA_VERSION=cu124
 fi
 if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
    ls -alh sgl-kernel/dist
-    WHEEL_FILE=$(ls sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl 2>/dev/null || true)
+    $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
    if [ -f "$WHEEL_FILE" ]; then
      $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
    else
      $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
    fi
 else
-    $PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION_FROM_SRT}/sgl_kernel-${SGL_KERNEL_VERSION_FROM_SRT}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
+    $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
 fi
 # Show current packages
@@ -86,14 +71,6 @@ if [ "$IS_BLACKWELL" != "1" ]; then
    $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
 fi
 # Install FlashMLA for attention backend tests
 # $PIP_CMD install git+https://github.com/deepseek-ai/FlashMLA.git $PIP_INSTALL_SUFFIX
 # Show current packages
 $PIP_CMD list
-
+python3 -c "import torch; print(torch.version.cuda)"
 if [ -n "${HF_TOKEN:-}" ]; then
    $PIP_CMD install -U "huggingface_hub[cli]" $PIP_INSTALL_SUFFIX
    hf auth login --token $HF_TOKEN
 fi
--- a/sgl-kernel/README.md
+++ b/sgl-kernel/README.md
@@ -25,7 +25,6 @@ make build
 ```
 Note:
 The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, try using `make rebuild`.
 ### Build with [ccache](https://github.com/ccache/ccache)
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -88,7 +88,7 @@ suites = {
        TestFile("test_metrics.py", 32),
        TestFile("test_metrics_utils.py", 1),
        TestFile("test_mla.py", 167),
-        TestFile("test_mla_deepseek_v3.py", 1420),
+        TestFile("test_mla_deepseek_v3.py", 500),
        TestFile("test_mla_int8_deepseek_v3.py", 429),
        TestFile("test_mla_flashinfer.py", 302),
        TestFile("test_mla_fp8.py", 93),
@@ -130,7 +130,7 @@ suites = {
        TestFile("lora/test_lora_tp.py", 116),
        TestFile("rl/test_update_weights_from_distributed.py", 103),
        TestFile("test_data_parallelism.py", 73),
-        TestFile("test_dp_attention.py", 277),
+        TestFile("test_dp_attention.py", 594),
        TestFile("test_load_weights_from_remote_instance.py", 72),
        TestFile("test_patch_torch.py", 19),
        TestFile("test_release_memory_occupation.py", 257),
@@ -138,17 +138,16 @@ suites = {
        TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
    ],
    "per-commit-4-gpu": [
-        TestFile("test_gpt_oss_4gpu.py", 600),
+        TestFile("test_gpt_oss_4gpu.py", 300),
-        TestFile("test_local_attn.py", 250),
+        TestFile("test_local_attn.py", 411),
-        TestFile("test_pp_single_node.py", 372),
+        TestFile("test_pp_single_node.py", 481),
-        TestFile("models/test_qwen3_next_models.py", 200),
+        TestFile("models/test_qwen3_next_models.py", 291),
        TestFile("models/test_falcon_h1_models.py", 200),
        TestFile("test_multi_instance_release_memory_occupation.py", 64),
    ],
    "per-commit-8-gpu": [
        TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
        TestFile("lora/test_lora_llama4.py", 400),
-        TestFile("test_disaggregation.py", 600),
+        TestFile("test_disaggregation.py", 499),
        TestFile("test_disaggregation_dp_attention.py", 155),
        TestFile("test_disaggregation_different_tp.py", 600),
        TestFile("test_disaggregation_pp.py", 140),