From d645ae90a3f538c72baec8bbf312388c22e25eae Mon Sep 17 00:00:00 2001
From: Lianmin Zheng <lianminzheng@gmail.com>
Date: Sun, 5 Oct 2025 18:05:41 -0700
Subject: [PATCH] Rename runner labels (#11228)

---
 .github/workflows/pr-test-pd-router.yml       |   2 +-
 .github/workflows/pr-test-rust.yml            |   2 +-
 .github/workflows/pr-test.yml                 |  11 +-
 .github/workflows/release-docker-dev.yml      |   6 +-
 .github/workflows/release-docker.yml          |  14 +-
 .../workflows/release-whl-kernel-cu118.yml    |  92 -------------
 .github/workflows/release-whl-kernel.yml      | 121 ++----------------
 docs/get_started/install.md                   |   2 +-
 docs/platforms/blackwell_gpu.md               |   9 --
 python/pyproject.toml                         |   7 +-
 scripts/ci/ci_install_dependency.sh           |  43 ++-----
 sgl-kernel/README.md                          |   1 -
 test/srt/run_suite.py                         |  15 +--
 13 files changed, 48 insertions(+), 277 deletions(-)
 delete mode 100644 .github/workflows/release-whl-kernel-cu118.yml
 delete mode 100644 docs/platforms/blackwell_gpu.md

diff --git a/.github/workflows/pr-test-pd-router.yml b/.github/workflows/pr-test-pd-router.yml
index 68900c94f..95278aed8 100644
--- a/.github/workflows/pr-test-pd-router.yml
+++ b/.github/workflows/pr-test-pd-router.yml
@@ -28,7 +28,7 @@ permissions:
 jobs:
   test-disaggregation:
     if: github.event_name != 'pull_request' || (contains(github.event.pull_request.labels.*.name, 'run-ci') && contains(github.event.pull_request.labels.*.name, 'router-benchmark'))
-    runs-on: [h200]
+    runs-on: [8-gpu-h200-oracle]
     timeout-minutes: 45
 
     steps:
diff --git a/.github/workflows/pr-test-rust.yml b/.github/workflows/pr-test-rust.yml
index 1017e3c0b..3608bdaa2 100644
--- a/.github/workflows/pr-test-rust.yml
+++ b/.github/workflows/pr-test-rust.yml
@@ -83,7 +83,7 @@ jobs:
 
   pytest-rust:
     if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
-    runs-on: BM.A10.4
+    runs-on: 4-gpu-a10
     timeout-minutes: 25
     steps:
       - name: Checkout code
diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
index d983c91bf..7f504e3de 100644
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -62,7 +62,7 @@ jobs:
   sgl-kernel-build-wheels:
     needs: [check-changes]
     if: needs.check-changes.outputs.sgl_kernel == 'true'
-    runs-on: sgl-kernel-build-node
+    runs-on: x64-kernel-build-node
     strategy:
       matrix:
         include:
@@ -323,7 +323,7 @@ jobs:
     needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
         ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
-    runs-on: 8-gpu-runner
+    runs-on: 8-gpu-h200
     strategy:
       fail-fast: false
       matrix:
@@ -641,7 +641,7 @@ jobs:
     needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
         ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
-    runs-on: 8-gpu-runner
+    runs-on: 8-gpu-h200
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
@@ -668,7 +668,7 @@ jobs:
     needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
         ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true'))
-    runs-on: 4-b200-runner
+    runs-on: 4-gpu-b200
     strategy:
       fail-fast: false
     steps:
@@ -702,7 +702,8 @@ jobs:
 
       unit-test-frontend, unit-test-backend-1-gpu,
       unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu,
-      performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu,
+      performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-1-gpu-part-3,
+      performance-test-2-gpu,
       accuracy-test-1-gpu, accuracy-test-2-gpu,
       unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
       unit-test-backend-4-gpu-b200,
diff --git a/.github/workflows/release-docker-dev.yml b/.github/workflows/release-docker-dev.yml
index 4b4701099..04fb366eb 100644
--- a/.github/workflows/release-docker-dev.yml
+++ b/.github/workflows/release-docker-dev.yml
@@ -8,7 +8,7 @@ on:
 jobs:
   build-dev-x86:
     if: ${{ github.repository == 'sgl-project/sglang' }}
-    runs-on: nvidia
+    runs-on: x64-docker-build-node
     strategy:
       matrix:
         variant:
@@ -48,12 +48,12 @@ jobs:
 
   build-dev-arm:
     if: ${{ github.repository == 'sgl-project/sglang' }}
-    runs-on: sgl-kernel-release-node-arm
+    runs-on: arm-docker-build-node
     strategy:
       matrix:
         variant:
           - version: 12.9.1
-            type: blackwell_aarch64
+            type: all_aarch64
             tag: dev-arm64
     steps:
       - name: Delete huge unnecessary tools folder
diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml
index 7b5a6dda7..c7bc8194e 100644
--- a/.github/workflows/release-docker.yml
+++ b/.github/workflows/release-docker.yml
@@ -14,13 +14,9 @@ jobs:
     strategy:
       matrix:
         variant:
-          - cuda_version: "12.6.1"
-            build_type: "all"
-          - cuda_version: "12.8.1"
-            build_type: "blackwell"
           - cuda_version: "12.9.1"
-            build_type: "blackwell"
-    runs-on: nvidia
+            build_type: "all"
+    runs-on: x64-docker-build-node
     steps:
       - name: Delete huge unnecessary tools folder
         run: rm -rf /opt/hostedtoolcache
@@ -67,8 +63,6 @@ jobs:
 
           if [ "${{ matrix.variant.build_type }}" = "all" ]; then
             tag_suffix=""
-          elif [ "${{ matrix.variant.build_type }}" = "blackwell" ]; then
-            tag_suffix="-b200"
           else
             echo "Unsupported build type"
             exit 1
@@ -87,8 +81,8 @@ jobs:
       matrix:
         variant:
           - cuda_version: "12.9.1"
-            build_type: "blackwell_aarch64"
-    runs-on: sgl-kernel-release-node-arm
+            build_type: "all_aarch64"
+    runs-on: arm-docker-build-node
     steps:
       - name: Delete huge unnecessary tools folder
         run: rm -rf /opt/hostedtoolcache
diff --git a/.github/workflows/release-whl-kernel-cu118.yml b/.github/workflows/release-whl-kernel-cu118.yml
deleted file mode 100644
index 4757bcaa1..000000000
--- a/.github/workflows/release-whl-kernel-cu118.yml
+++ /dev/null
@@ -1,92 +0,0 @@
-name: Release SGLang Kernel Wheel (cu118)
-
-on:
-  workflow_dispatch:
-    inputs:
-      tag_name:
-        type: string
-  push:
-    branches:
-      - main
-    paths:
-      - sgl-kernel/python/sgl_kernel/version.py
-
-jobs:
-  build-wheels:
-    if: github.repository == 'sgl-project/sglang'
-    runs-on: sgl-kernel-release-node
-    strategy:
-      matrix:
-        python-version: ["3.9"]
-        cuda-version: ["11.8"]
-
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: "recursive"
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }}
-        run: |
-          cd sgl-kernel
-          chmod +x ./build.sh
-          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
-
-      - name: Upload artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
-          path: sgl-kernel/dist/*
-
-  release:
-    needs: build-wheels
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Download artifacts
-        uses: actions/download-artifact@v4
-        with:
-          path: sgl-kernel/dist/
-          merge-multiple: true
-          pattern: wheel-*
-
-      - name: Set tag name
-        id: set_tag_name
-        run: |
-          if [ -z "${{ inputs.tag_name }}" ]; then
-            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
-            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
-          else
-            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
-          repository: sgl-project/whl
-          token: ${{ secrets.WHL_TOKEN }}
-          files: |
-            sgl-kernel/dist/*
-
-      - name: Clone wheel index
-        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
-        env:
-          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
-
-      - name: Update wheel index
-        run: python3 scripts/update_kernel_whl_index.py
-
-      - name: Push wheel index
-        run: |
-          cd sgl-whl
-          git config --local user.name "github-actions[bot]"
-          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
-          git add -A
-          git commit -m "update whl index"
-          git push
diff --git a/.github/workflows/release-whl-kernel.yml b/.github/workflows/release-whl-kernel.yml
index c80fd1fd1..5657332cf 100644
--- a/.github/workflows/release-whl-kernel.yml
+++ b/.github/workflows/release-whl-kernel.yml
@@ -19,7 +19,7 @@ concurrency:
 jobs:
   build-cu129:
     if: github.repository == 'sgl-project/sglang'
-    runs-on: sgl-kernel-release-node
+    runs-on: x64-kernel-build-node
     strategy:
       matrix:
         python-version: ["3.10"]
@@ -46,38 +46,14 @@ jobs:
           pip install twine
           python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }}
 
-  build-cu124:
-    if: github.repository == 'sgl-project/sglang'
-    needs: build-cu129
-    runs-on: sgl-kernel-release-node
-    strategy:
-      matrix:
-        python-version: ["3.10"]
-        cuda-version: ["12.4"]
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: "recursive"
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Build wheels
-        run: |
-          cd sgl-kernel
-          chmod +x ./build.sh
-          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
-
       - name: Upload artifacts
         uses: actions/upload-artifact@v4
         with:
           name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
           path: sgl-kernel/dist/*
 
-  release-cu124:
-    needs: build-cu124
+  release-cu129:
+    needs: build-cu129
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -114,99 +90,20 @@ jobs:
           WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
 
       - name: Update wheel index
-        run: python3 scripts/update_kernel_whl_index.py --cuda 124
+        run: python3 scripts/update_kernel_whl_index.py --cuda 129
 
       - name: Push wheel index
         run: |
           cd sgl-whl
-          git config --local user.name "github-actions[bot]"
-          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
-          git add -A
-          git commit -m "update whl index"
-          git push
-
-  build-cu128:
-    if: github.repository == 'sgl-project/sglang'
-    needs: build-cu129
-    runs-on: sgl-kernel-release-node
-    strategy:
-      matrix:
-        python-version: ["3.10"]
-        cuda-version: ["12.8"]
-    steps:
-      - uses: actions/checkout@v4
-        with:
-          submodules: "recursive"
-
-      - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v5
-        with:
-          python-version: ${{ matrix.python-version }}
-
-      - name: Build wheels
-        run: |
-          cd sgl-kernel
-          chmod +x ./build.sh
-          ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
-
-      - name: Upload artifacts
-        uses: actions/upload-artifact@v4
-        with:
-          name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}
-          path: sgl-kernel/dist/*
-
-  release-cu128:
-    needs: build-cu128
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v4
-
-      - name: Download artifacts
-        uses: actions/download-artifact@v4
-        with:
-          path: sgl-kernel/dist/
-          merge-multiple: true
-          pattern: wheel-*
-
-      - name: Set tag name
-        id: set_tag_name
-        run: |
-          if [ -z "${{ inputs.tag_name }}" ]; then
-            TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)"
-            echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT
-          else
-            echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT
-          fi
-
-      - name: Release
-        uses: softprops/action-gh-release@v2
-        with:
-          tag_name: ${{ steps.set_tag_name.outputs.tag_name }}
-          repository: sgl-project/whl
-          token: ${{ secrets.WHL_TOKEN }}
-          files: |
-            sgl-kernel/dist/*
-
-      - name: Clone wheel index
-        run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl
-        env:
-          WHL_TOKEN: ${{ secrets.WHL_TOKEN }}
-
-      - name: Update wheel index
-        run: python3 scripts/update_kernel_whl_index.py --cuda 128
-
-      - name: Push wheel index
-        run: |
-          cd sgl-whl
-          git config --local user.name "github-actions[bot]"
-          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "sglang-bot"
+          git config --local user.email "sglangbot@gmail.com"
           git add -A
           git commit -m "update whl index"
           git push
 
   build-cu129-aarch64:
     if: github.repository == 'sgl-project/sglang'
-    runs-on: sgl-kernel-release-node-arm
+    runs-on: arm-kernel-build-node
     strategy:
       matrix:
         python-version: ["3.10"]
@@ -282,8 +179,8 @@ jobs:
       - name: Push wheel index
         run: |
           cd sgl-whl
-          git config --local user.name "github-actions[bot]"
-          git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
+          git config --local user.name "sglang-bot"
+          git config --local user.email "sglangbot@gmail.com"
           git add -A
           git commit -m "update whl index"
           git push
diff --git a/docs/get_started/install.md b/docs/get_started/install.md
index 2721555fb..e5653774e 100644
--- a/docs/get_started/install.md
+++ b/docs/get_started/install.md
@@ -3,7 +3,7 @@
 You can install SGLang using one of the methods below.
 
 This page primarily applies to common NVIDIA GPU platforms.
-For other or newer platforms, please refer to the dedicated pages for [NVIDIA Blackwell GPUs](../platforms/blackwell_gpu.md), [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md).
+For other or newer platforms, please refer to the dedicated pages for [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md).
 
 ## Method 1: With pip or uv
 
diff --git a/docs/platforms/blackwell_gpu.md b/docs/platforms/blackwell_gpu.md
deleted file mode 100644
index 8c433b3f0..000000000
--- a/docs/platforms/blackwell_gpu.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Blackwell GPUs
-
-We will release the pre-built wheels soon. Before that, please try to compile from source or check the blackwell docker images from [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags).
-
-## B200 with x86 CPUs
-TODO
-
-## GB200/GB300 with ARM CPUs
-TODO
diff --git a/python/pyproject.toml b/python/pyproject.toml
index ab2adc9e5..fb095f2a0 100755
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -91,9 +91,14 @@ tracing = [
     "opentelemetry-sdk",
 ]
 all = ["sglang[test]", "sglang[decord]"]
+all_aarch64 = ["sglang[test]"]
+dev = ["sglang[test]", "sglang[decord]"]
+
+
+# The following will be deprecated in 2 weeks
 blackwell = ["sglang[test]", "sglang[decord]"]
 blackwell_aarch64 = ["sglang[test]"]
-dev = ["sglang[test]", "sglang[decord]"]
+
 
 [project.urls]
 "Homepage" = "https://github.com/sgl-project/sglang"
diff --git a/scripts/ci/ci_install_dependency.sh b/scripts/ci/ci_install_dependency.sh
index e7f9f0c42..61ea15565 100755
--- a/scripts/ci/ci_install_dependency.sh
+++ b/scripts/ci/ci_install_dependency.sh
@@ -3,21 +3,16 @@
 set -euxo pipefail
 
 IS_BLACKWELL=${IS_BLACKWELL:-0}
-
-if [ "$IS_BLACKWELL" = "1" ]; then
-    CU_VERSION="cu129"
-else
-    CU_VERSION="cu126"
-fi
-
-# Clear torch compilation cache
-python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
+CU_VERSION="cu128"
 
 # Kill existing processes
 SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 bash "${SCRIPT_DIR}/../killall_sglang.sh"
 echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}"
 
+# Clear torch compilation cache
+python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)'
+
 # Install apt packages
 apt install -y git libnuma-dev
 
@@ -29,7 +24,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then
     PIP_INSTALL_SUFFIX="--break-system-packages"
 
     # Clean up existing installations
-    $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm $PIP_INSTALL_SUFFIX || true
+    $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm torch $PIP_INSTALL_SUFFIX || true
 else
     # In normal cases, we use uv, which is much faster than pip.
     pip install --upgrade pip
@@ -40,7 +35,7 @@ else
     PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match"
 
     # Clean up existing installations
-    $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm || true
+    $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm torch || true
 fi
 
 # Install the main package
@@ -49,26 +44,16 @@ $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org
 # Install router for pd-disagg test
 SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX
 
+# Install sgl-kernel
 SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml)
 SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml)
 echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}"
 
-if [ "$IS_BLACKWELL" = "1" ]; then
-    SGL_KERNEL_CUDA_VERSION=cu128
-else
-    SGL_KERNEL_CUDA_VERSION=cu124
-fi
-
 if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then
     ls -alh sgl-kernel/dist
-    WHEEL_FILE=$(ls sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl 2>/dev/null || true)
-    if [ -f "$WHEEL_FILE" ]; then
-      $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
-    else
-      $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
-    fi
+    $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
 else
-    $PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION_FROM_SRT}/sgl_kernel-${SGL_KERNEL_VERSION_FROM_SRT}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX
+    $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX
 fi
 
 # Show current packages
@@ -86,14 +71,6 @@ if [ "$IS_BLACKWELL" != "1" ]; then
     $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX
 fi
 
-# Install FlashMLA for attention backend tests
-# $PIP_CMD install git+https://github.com/deepseek-ai/FlashMLA.git $PIP_INSTALL_SUFFIX
-
 # Show current packages
 $PIP_CMD list
-
-
-if [ -n "${HF_TOKEN:-}" ]; then
-    $PIP_CMD install -U "huggingface_hub[cli]" $PIP_INSTALL_SUFFIX
-    hf auth login --token $HF_TOKEN
-fi
+python3 -c "import torch; print(torch.version.cuda)"
diff --git a/sgl-kernel/README.md b/sgl-kernel/README.md
index f86d5851f..cd3c0288a 100644
--- a/sgl-kernel/README.md
+++ b/sgl-kernel/README.md
@@ -25,7 +25,6 @@ make build
 ```
 
 Note:
-
 The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, try using `make rebuild`.
 
 ### Build with [ccache](https://github.com/ccache/ccache)
diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py
index 5904e4ab1..9aaad9482 100644
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -88,7 +88,7 @@ suites = {
         TestFile("test_metrics.py", 32),
         TestFile("test_metrics_utils.py", 1),
         TestFile("test_mla.py", 167),
-        TestFile("test_mla_deepseek_v3.py", 1420),
+        TestFile("test_mla_deepseek_v3.py", 500),
         TestFile("test_mla_int8_deepseek_v3.py", 429),
         TestFile("test_mla_flashinfer.py", 302),
         TestFile("test_mla_fp8.py", 93),
@@ -130,7 +130,7 @@ suites = {
         TestFile("lora/test_lora_tp.py", 116),
         TestFile("rl/test_update_weights_from_distributed.py", 103),
         TestFile("test_data_parallelism.py", 73),
-        TestFile("test_dp_attention.py", 277),
+        TestFile("test_dp_attention.py", 594),
         TestFile("test_load_weights_from_remote_instance.py", 72),
         TestFile("test_patch_torch.py", 19),
         TestFile("test_release_memory_occupation.py", 257),
@@ -138,17 +138,16 @@ suites = {
         TestFile("hicache/test_hicache_storage_3fs_backend.py", 200),
     ],
     "per-commit-4-gpu": [
-        TestFile("test_gpt_oss_4gpu.py", 600),
-        TestFile("test_local_attn.py", 250),
-        TestFile("test_pp_single_node.py", 372),
-        TestFile("models/test_qwen3_next_models.py", 200),
-        TestFile("models/test_falcon_h1_models.py", 200),
+        TestFile("test_gpt_oss_4gpu.py", 300),
+        TestFile("test_local_attn.py", 411),
+        TestFile("test_pp_single_node.py", 481),
+        TestFile("models/test_qwen3_next_models.py", 291),
         TestFile("test_multi_instance_release_memory_occupation.py", 64),
     ],
     "per-commit-8-gpu": [
         TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400),
         TestFile("lora/test_lora_llama4.py", 400),
-        TestFile("test_disaggregation.py", 600),
+        TestFile("test_disaggregation.py", 499),
         TestFile("test_disaggregation_dp_attention.py", 155),
         TestFile("test_disaggregation_different_tp.py", 600),
         TestFile("test_disaggregation_pp.py", 140),