diff --git a/.github/workflows/execute-notebook.yml b/.github/workflows/execute-notebook.yml
index 7298d80ec..aa5161150 100644
--- a/.github/workflows/execute-notebook.yml
+++ b/.github/workflows/execute-notebook.yml
@@ -6,6 +6,7 @@ on:
     paths:
       - "python/sglang/**"
       - "docs/**"
+    types: [synchronize, labeled]
   workflow_dispatch:
 
 
@@ -17,7 +18,7 @@ concurrency:
 jobs:
   run-all-notebooks:
     runs-on: 1-gpu-runner
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
index 3a281299a..f529be66f 100644
--- a/.github/workflows/lint.yml
+++ b/.github/workflows/lint.yml
@@ -18,5 +18,13 @@ jobs:
           python -m pip install pre-commit
           pre-commit install
 
-      - name: Linting
+      - name: Run pre-commit checks
         run: pre-commit run --all-files --show-diff-on-failure
+
+      - name: Run sgl-kernel clang-format checks
+        uses: DoozyX/clang-format-lint-action@v0.18.1
+        with:
+          source: sgl-kernel
+          extensions: h,c,cpp,hpp,cu,cuh,cc
+          clangFormatVersion: 18
+          style: file
diff --git a/.github/workflows/pr-benchmark-rust.yml b/.github/workflows/pr-benchmark-rust.yml
index 937fbbea1..0ff6ceb62 100644
--- a/.github/workflows/pr-benchmark-rust.yml
+++ b/.github/workflows/pr-benchmark-rust.yml
@@ -9,7 +9,7 @@ on:
     branches: [ main ]
     paths:
       - "sgl-router/**"
-    types: [opened, synchronize, reopened, labeled]
+    types: [synchronize, labeled]
   workflow_dispatch:
 
 concurrency:
@@ -24,11 +24,11 @@ permissions:
   contents: read
   pull-requests: write
   issues: write
+
 jobs:
   # Quick check job that always runs on PRs
   benchmark-compile-check:
     name: Benchmark Compilation Check
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml
index 2c7e2c652..0a2d01a21 100644
--- a/.github/workflows/pr-test-amd.yml
+++ b/.github/workflows/pr-test-amd.yml
@@ -17,6 +17,7 @@ on:
       - "test/**"
       - "sgl-kernel/**"
       - ".github/workflows/pr-test-amd.yml"
+    types: [synchronize, labeled]
   workflow_dispatch:
 
 concurrency:
@@ -25,8 +26,7 @@ concurrency:
 
 jobs:
   accuracy-test-1-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -52,8 +52,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh python3 models/test_qwen_models.py
 
   accuracy-test-2-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -77,8 +76,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
 
   mla-test-1-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -102,8 +100,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh python3 test_mla.py
 
   performance-test-1-gpu-part-1-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -143,8 +140,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_non_stream_small_batch_size
 
   performance-test-1-gpu-part-2-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -178,8 +174,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_offline_throughput_default_fp8
 
   bench-test-2-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -223,8 +218,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh python3 -m unittest test_bench_serving.TestBenchServing.test_moe_offline_throughput_without_radix_cache
 
   unit-test-backend-1-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -249,8 +243,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
 
   unit-test-backend-1-gpu-amd-mi35x:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -274,8 +267,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x
 
   unit-test-backend-2-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -299,8 +291,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
 
   unit-test-backend-8-gpu-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -324,8 +315,7 @@ jobs:
           bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
 
   unit-test-sgl-kernel-amd:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     strategy:
       fail-fast: false
       matrix:
@@ -353,25 +343,3 @@ jobs:
           docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
           docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
           docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
-
-  pr-test-amd-finish:
-    if: always()
-    needs: [
-      accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd,
-      accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd,
-      unit-test-backend-1-gpu-amd, unit-test-backend-1-gpu-amd-mi35x, unit-test-backend-2-gpu-amd,
-      unit-test-backend-8-gpu-amd, unit-test-sgl-kernel-amd
-    ]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check all dependent job statuses
-        run: |
-          results=(${{ join(needs.*.result, ' ') }})
-          for result in "${results[@]}"; do
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
-              exit 1
-            fi
-          done
-          echo "All jobs completed successfully"
-          exit 0
diff --git a/.github/workflows/pr-test-h20.yml b/.github/workflows/pr-test-h20.yml
index 58e335289..f91b22108 100644
--- a/.github/workflows/pr-test-h20.yml
+++ b/.github/workflows/pr-test-h20.yml
@@ -5,6 +5,7 @@ on:
     branches: [ main ]
   pull_request:
     branches: [ main ]
+    types: [synchronize, labeled]
   workflow_dispatch:
     inputs:
       version:
@@ -23,17 +24,29 @@ jobs:
   check-changes:
     runs-on: ubuntu-latest
     outputs:
-      src: ${{ steps.filter.outputs.src }}
+      h20_files: ${{ steps.filter.outputs.h20_files }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Fail if the PR does not have the 'run-ci' label
+        if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'run-ci')
+        run: |
+          echo "This pull request does not have the 'run-ci' label. Failing the workflow."
+          exit 1
+
+      - name: Fail if the PR is a draft
+        if: github.event_name == 'pull_request' && github.event.pull_request.draft == true
+        run: |
+          echo "This pull request is a draft. Failing the workflow."
+          exit 1
+
       - name: Detect file changes
         id: filter
         uses: dorny/paths-filter@v3
         with:
           filters: |
-            src:
+            h20_files:
               - "python/sglang/srt/models/deepseek*"
               - "python/sglang/srt/layers/moe/**"
               - ".github/workflows/pr-test-h20.yml"
@@ -41,9 +54,7 @@ jobs:
 
   per-commit-8-gpu-h20:
     needs: [check-changes]
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+    if: needs.check-changes.outputs.h20_files == 'true'
     runs-on: 8-gpu-h20
     steps:
       - name: Checkout code
@@ -65,17 +76,31 @@ jobs:
       check-changes,
       per-commit-8-gpu-h20,
     ]
-    if: needs.check-changes.outputs.src == 'true'
+    if: always()
     runs-on: ubuntu-latest
     steps:
       - name: Check all dependent job statuses
         run: |
-          results=(${{ join(needs.*.result, ' ') }})
-          for result in "${results[@]}"; do
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
+          # Convert the 'needs' context to a JSON string
+          json_needs='${{ toJson(needs) }}'
+
+          # Get a list of all job names from the JSON keys
+          job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
+
+          for job in $job_names; do
+            # For each job, extract its result
+            result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
+
+            # Print the job name and its result
+            echo "$job: $result"
+
+            # Check for failure or cancellation and exit if found
+            if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
+              echo "The above jobs failed."
               exit 1
             fi
           done
+
+          # If the loop completes, all jobs were successful
           echo "All jobs completed successfully"
           exit 0
diff --git a/.github/workflows/pr-test-npu.yml b/.github/workflows/pr-test-npu.yml
index c0fe381e3..cca050111 100644
--- a/.github/workflows/pr-test-npu.yml
+++ b/.github/workflows/pr-test-npu.yml
@@ -15,6 +15,7 @@ on:
       - "scripts/ci/**"
       - "test/**"
       - ".github/workflows/pr-test-npu.yml"
+    types: [synchronize, labeled]
   workflow_dispatch:
 
 concurrency:
@@ -23,8 +24,7 @@ concurrency:
 
 jobs:
   per-commit-1-ascend-npu:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     runs-on: linux-arm64-npu-1
     container:
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -58,8 +58,7 @@ jobs:
           python3 run_suite.py --suite per-commit-1-ascend-npu
 
   per-commit-2-ascend-npu:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     runs-on: linux-arm64-npu-2
     container:
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -93,8 +92,7 @@ jobs:
           python3 run_suite.py --suite per-commit-2-ascend-npu
 
   per-commit-4-ascend-npu:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     runs-on: linux-arm64-npu-4
     container:
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-910b-ubuntu22.04-py3.11
@@ -128,8 +126,7 @@ jobs:
           python3 run_suite.py --suite per-commit-4-ascend-npu --timeout-per-file 3600
 
   per-commit-16-ascend-a3:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     runs-on: linux-aarch64-a3-16
     container:
       image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.2.rc1-a3-ubuntu22.04-py3.11
@@ -161,24 +158,3 @@ jobs:
         run: |
           cd test/srt
           python3 run_suite.py --suite per-commit-16-ascend-a3 --timeout-per-file 5400
-
-  pr-test-npu-finish:
-    if: always()
-    needs:
-      - per-commit-1-ascend-npu
-      - per-commit-2-ascend-npu
-      - per-commit-4-ascend-npu
-      - per-commit-16-ascend-a3
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check all dependent job statuses
-        run: |
-          results=(${{ join(needs.*.result, ' ') }})
-          for result in "${results[@]}"; do
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
-              exit 1
-            fi
-          done
-          echo "All jobs completed successfully"
-          exit 0
diff --git a/.github/workflows/pr-test-pd-router.yml b/.github/workflows/pr-test-pd-router.yml
index 4c02e835a..04f75c2a1 100644
--- a/.github/workflows/pr-test-pd-router.yml
+++ b/.github/workflows/pr-test-pd-router.yml
@@ -13,6 +13,7 @@ on:
       - 'python/sglang/srt/disaggregation/**'
       - 'scripts/ci/ci_start_disaggregation_servers.sh'
       - 'sgl-router/**'
+    types: [synchronize, labeled]
   workflow_dispatch:
 
 concurrency:
@@ -26,8 +27,7 @@ permissions:
 
 jobs:
   test-disaggregation:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     runs-on: [h200]
     timeout-minutes: 45
 
diff --git a/.github/workflows/pr-test-rust.yml b/.github/workflows/pr-test-rust.yml
index cf6b031d6..1017e3c0b 100644
--- a/.github/workflows/pr-test-rust.yml
+++ b/.github/workflows/pr-test-rust.yml
@@ -9,6 +9,7 @@ on:
     branches: [ main ]
     paths:
       - "sgl-router/**"
+    types: [synchronize, labeled]
   workflow_dispatch:
 
 concurrency:
@@ -21,7 +22,7 @@ env:
 
 jobs:
   unit-test-rust:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     runs-on: ubuntu-latest
     steps:
       - name: Checkout code
@@ -81,7 +82,7 @@ jobs:
         run: sccache --show-stats
 
   pytest-rust:
-    if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     runs-on: BM.A10.4
     timeout-minutes: 25
     steps:
diff --git a/.github/workflows/pr-test-xeon.yml b/.github/workflows/pr-test-xeon.yml
index fcc70f286..9ba9be94b 100644
--- a/.github/workflows/pr-test-xeon.yml
+++ b/.github/workflows/pr-test-xeon.yml
@@ -17,6 +17,7 @@ on:
       - "test/**"
       - "sgl-kernel/**"
       - ".github/workflows/pr-test-xeon.yml"
+    types: [synchronize, labeled]
   workflow_dispatch:
 
 concurrency:
@@ -25,8 +26,7 @@ concurrency:
 
 jobs:
   build-test:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     runs-on: xeon-gnr
     env:
       HF_HOME: /home/sdp/.cache/huggingface
@@ -87,20 +87,3 @@ jobs:
         if: always()
         run: |
           docker rm -f ci_sglang_xeon || true
-
-  pr-test-xeon-finish:
-    if: always()
-    needs: [build-test]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check all dependent job statuses
-        run: |
-          results=(${{ join(needs.*.result, ' ') }})
-          for result in "${results[@]}"; do
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
-              exit 1
-            fi
-          done
-          echo "All jobs completed successfully"
-          exit 0
diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
index 41932259f..0a9f024b9 100644
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -2,9 +2,10 @@ name: PR Test
 
 on:
   push:
-    branches: [ main ]
+    branches: [main]
   pull_request:
-    branches: [ main ]
+    branches: [main]
+    types: [synchronize, labeled]
   workflow_dispatch:
     inputs:
       version:
@@ -21,47 +22,46 @@ concurrency:
   cancel-in-progress: true
 
 jobs:
-  # =============================================== sgl-kernel ====================================================
-
-  sgl-kernel-check-changes:
+  # =============================================== check changes ====================================================
+  check-changes:
     runs-on: ubuntu-latest
     outputs:
-      src: ${{ steps.filter.outputs.src }}
+      main_package: ${{ steps.filter.outputs.main_package }}
+      sgl_kernel: ${{ steps.filter.outputs.sgl_kernel }}
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
+      - name: Fail if the PR does not have the 'run-ci' label
+        if: github.event_name == 'pull_request' && !contains(github.event.pull_request.labels.*.name, 'run-ci')
+        run: |
+          echo "This pull request does not have the 'run-ci' label. Failing the workflow."
+          exit 1
+
+      - name: Fail if the PR is a draft
+        if: github.event_name == 'pull_request' && github.event.pull_request.draft == true
+        run: |
+          echo "This pull request is a draft. Failing the workflow."
+          exit 1
+
       - name: Detect file changes
         id: filter
         uses: dorny/paths-filter@v3
         with:
           filters: |
-            src:
+            main_package:
+              - "python/**"
+              - "scripts/ci/**"
+              - "test/**"
+              - ".github/workflows/pr-test.yml"
+            sgl_kernel:
               - "sgl-kernel/**"
 
-  sgl-kernel-lint:
-    runs-on: ubuntu-latest
-    needs: sgl-kernel-check-changes
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false &&
-      needs.sgl-kernel-check-changes.outputs.src == 'true'
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Check clang-format
-        uses: DoozyX/clang-format-lint-action@v0.18.1
-        with:
-          source: sgl-kernel
-          extensions: h,c,cpp,hpp,cu,cuh,cc
-          clangFormatVersion: 18
-          style: file
+  # =============================================== sgl-kernel ====================================================
 
   sgl-kernel-build-wheels:
-    needs: sgl-kernel-check-changes
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false &&
-      needs.sgl-kernel-check-changes.outputs.src == 'true'
+    needs: [check-changes]
+    if: needs.check-changes.outputs.sgl_kernel == 'true'
     runs-on: sgl-kernel-build-node
     strategy:
       matrix:
@@ -91,7 +91,6 @@ jobs:
         if: github.event_name != 'push' || (matrix.cuda-version != '11.8')
         run: |
           cd sgl-kernel
-          chmod +x ./build.sh
           ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}"
 
       - name: Upload artifacts
@@ -101,10 +100,8 @@ jobs:
           path: sgl-kernel/dist/*
 
   sgl-kernel-unit-test:
-    needs: [sgl-kernel-check-changes, sgl-kernel-build-wheels]
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false &&
-      needs.sgl-kernel-check-changes.outputs.src == 'true'
+    needs: [check-changes, sgl-kernel-build-wheels]
+    if: needs.check-changes.outputs.sgl_kernel == 'true'
     runs-on: 1-gpu-runner
     steps:
       - uses: actions/checkout@v4
@@ -121,13 +118,9 @@ jobs:
           merge-multiple: true
           pattern: wheel-python3.10-cuda12.9
 
-      - name: Install
+      - name: Install dependencies
         run: |
-          bash scripts/ci/ci_install_dependency.sh
-          pip3 install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126 && pip3 install pytest
-          pip3 uninstall sgl-kernel -y || true
-          pip3 install sgl-kernel/dist/*whl --force-reinstall --no-deps
-          pip3 list | grep sgl-kernel
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 30
@@ -135,15 +128,9 @@ jobs:
           cd sgl-kernel
           pytest tests/
 
-      - name: Uninstall dependencies
-        run: |
-          pip3 uninstall sgl-kernel -y
-
   sgl-kernel-mla-test:
-    needs: [sgl-kernel-check-changes, sgl-kernel-build-wheels]
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false &&
-      needs.sgl-kernel-check-changes.outputs.src == 'true'
+    needs: [check-changes, sgl-kernel-build-wheels]
+    if: needs.check-changes.outputs.sgl_kernel == 'true'
     runs-on: 1-gpu-runner
     steps:
       - uses: actions/checkout@v4
@@ -160,13 +147,9 @@ jobs:
           merge-multiple: true
           pattern: wheel-python3.10-cuda12.9
 
-      - name: Install
+      - name: Install dependencies
         run: |
-          bash scripts/ci/ci_install_dependency.sh
-          pip3 install torch==2.8.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/test/cu126
-          pip3 uninstall sgl-kernel -y || true
-          pip3 install sgl-kernel/dist/*whl --force-reinstall --no-deps
-          pip3 list | grep sgl-kernel
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 30
@@ -174,62 +157,19 @@ jobs:
           cd test/srt
           python3 test_mla_deepseek_v3.py
 
-      - name: Uninstall dependencies
-        run: |
-          pip3 uninstall sgl-kernel -y
-
-  sgl-kernel-finish:
-    needs: [sgl-kernel-unit-test, sgl-kernel-mla-test, sgl-kernel-lint, sgl-kernel-build-wheels]
-    if: always()
-    runs-on: ubuntu-latest
-    steps:
-      - name: Check all dependent job statuses
-        run: |
-          results=(${{ join(needs.*.result, ' ') }})
-          for result in "${results[@]}"; do
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
-              exit 1
-            fi
-          done
-          echo "All jobs completed successfully"
-          exit 0
-
   # =============================================== primary ====================================================
 
-  check-changes:
-    runs-on: ubuntu-latest
-    outputs:
-      src: ${{ steps.filter.outputs.src }}
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v4
-
-      - name: Detect file changes
-        id: filter
-        uses: dorny/paths-filter@v3
-        with:
-          filters: |
-            src:
-              - "python/**"
-              - "scripts/ci/**"
-              - "test/**"
-              - ".github/workflows/pr-test.yml"
-              - "sgl-kernel/**"
-
   unit-test-frontend:
-    needs: [check-changes, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 1-gpu-runner
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -238,7 +178,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 10
@@ -247,11 +187,9 @@ jobs:
           python3 run_suite.py --suite per-commit
 
   unit-test-backend-1-gpu:
-    needs: [check-changes, unit-test-frontend, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, unit-test-frontend, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 1-gpu-runner
     strategy:
       fail-fast: false
@@ -262,7 +200,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -271,7 +209,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 30
@@ -280,11 +218,9 @@ jobs:
           python3 run_suite.py --suite per-commit --auto-partition-id ${{ matrix.part }} --auto-partition-size 10
 
   unit-test-backend-2-gpu:
-    needs: [check-changes, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 2-gpu-runner
     strategy:
       fail-fast: false
@@ -295,7 +231,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -304,7 +240,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 30
@@ -313,11 +249,9 @@ jobs:
           python3 run_suite.py --suite per-commit-2-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
 
   unit-test-backend-4-gpu:
-    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 4-gpu-runner
     strategy:
       fail-fast: false
@@ -328,7 +262,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -337,7 +271,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 20
@@ -346,11 +280,9 @@ jobs:
           python3 run_suite.py --suite per-commit-4-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
 
   unit-test-backend-8-gpu:
-    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 8-gpu-runner
     strategy:
       fail-fast: false
@@ -361,7 +293,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -370,7 +302,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 20
@@ -379,18 +311,16 @@ jobs:
           python3 run_suite.py --suite per-commit-8-gpu --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
 
   performance-test-1-gpu-part-1:
-    needs: [check-changes, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 1-gpu-runner
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -399,7 +329,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Benchmark single latency
         timeout-minutes: 10
@@ -440,18 +370,16 @@ jobs:
           python3 -m unittest test_bench_serving.TestBenchServing.test_lora_online_latency_with_concurrent_adapter_updates
 
   performance-test-1-gpu-part-2:
-    needs: [check-changes, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 1-gpu-runner
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -460,7 +388,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Benchmark offline throughput (w/o RadixAttention)
         timeout-minutes: 10
@@ -493,18 +421,16 @@ jobs:
           python3 -m unittest test_bench_serving.TestBenchServing.test_vlm_online_latency
 
   performance-test-2-gpu:
-    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 2-gpu-runner
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -513,7 +439,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
 
       - name: Benchmark single latency (TP=2)
         timeout-minutes: 10
@@ -552,18 +478,16 @@ jobs:
           python3 -m unittest test_bench_serving.TestBenchServing.test_pp_long_context_prefill
 
   accuracy-test-1-gpu:
-    needs: [check-changes, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 1-gpu-runner
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -572,7 +496,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
           git clone https://github.com/merrymercy/human-eval.git
           cd human-eval
           pip install -e .
@@ -584,18 +508,16 @@ jobs:
           python3 test_eval_accuracy_large.py
 
   accuracy-test-2-gpu:
-    needs: [check-changes, accuracy-test-1-gpu, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, accuracy-test-1-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 2-gpu-runner
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -604,7 +526,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh
           git clone https://github.com/merrymercy/human-eval.git
           cd human-eval
           pip install -e .
@@ -616,18 +538,16 @@ jobs:
           python3 test_moe_eval_accuracy_large.py
 
   unit-test-deepep-4-gpu:
-    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 4-gpu-runner
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -636,7 +556,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_deepep.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_deepep.sh
 
       - name: Run test
         timeout-minutes: 20
@@ -645,18 +565,16 @@ jobs:
           python3 run_suite.py --suite per-commit-4-gpu-deepep
 
   unit-test-deepep-8-gpu:
-    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-        (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false &&
-        needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: 8-gpu-runner
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -665,7 +583,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} bash scripts/ci/ci_install_deepep.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_deepep.sh
 
       - name: Run test
         timeout-minutes: 20
@@ -674,11 +592,9 @@ jobs:
           python3 run_suite.py --suite per-commit-8-gpu-deepep
 
   unit-test-backend-8-gpu-b200:
-    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-check-changes, sgl-kernel-finish]
+    needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels]
     if: always() && !failure() && !cancelled() &&
-      (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-      github.event.pull_request.draft == false &&
-      needs.check-changes.outputs.src == 'true'
+        needs.check-changes.outputs.main_package == 'true'
     runs-on: b200-runner
     strategy:
       fail-fast: false
@@ -687,7 +603,7 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Download artifacts
-        if: needs.sgl-kernel-check-changes.outputs.src == 'true'
+        if: needs.check-changes.outputs.sgl_kernel == 'true'
         uses: actions/download-artifact@v4
         with:
           path: sgl-kernel/dist/
@@ -696,7 +612,7 @@ jobs:
 
       - name: Install dependencies
         run: |
-          CUSTOM_BUILD_SGL_KERNEL=${{needs.sgl-kernel-check-changes.outputs.src}} IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
+          CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} IS_BLACKWELL=1 bash scripts/ci/ci_install_dependency.sh
 
       - name: Run test
         timeout-minutes: 60
@@ -704,10 +620,13 @@ jobs:
           cd test/srt
           python3 run_suite.py --suite per-commit-8-gpu-b200 --auto-partition-id 0 --auto-partition-size 1
 
-
   pr-test-finish:
     needs: [
       check-changes,
+
+      sgl-kernel-build-wheels,
+      sgl-kernel-unit-test, sgl-kernel-mla-test,
+
       unit-test-frontend, unit-test-backend-1-gpu,
       unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu,
       performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu,
@@ -720,12 +639,26 @@ jobs:
     steps:
       - name: Check all dependent job statuses
         run: |
-          results=(${{ join(needs.*.result, ' ') }})
-          for result in "${results[@]}"; do
-            if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
-              echo "Job failed with result: $result"
+          # Convert the 'needs' context to a JSON string
+          json_needs='${{ toJson(needs) }}'
+
+          # Get a list of all job names from the JSON keys
+          job_names=$(echo "$json_needs" | jq -r 'keys_unsorted[]')
+
+          for job in $job_names; do
+            # For each job, extract its result
+            result=$(echo "$json_needs" | jq -r --arg j "$job" '.[$j].result')
+
+            # Print the job name and its result
+            echo "$job: $result"
+
+            # Check for failure or cancellation and exit if found
+            if [[ "$result" == "failure" || "$result" == "cancelled" ]]; then
+              echo "The above jobs failed."
               exit 1
             fi
           done
+
+          # If the loop completes, all jobs were successful
           echo "All jobs completed successfully"
           exit 0
diff --git a/.github/workflows/vllm-dependency-test.yml b/.github/workflows/vllm-dependency-test.yml
index 6db4ed6c2..6c4755009 100644
--- a/.github/workflows/vllm-dependency-test.yml
+++ b/.github/workflows/vllm-dependency-test.yml
@@ -7,12 +7,16 @@ on:
       - "python/**"
       - "scripts/ci/**"
       - "test/**"
+      - ".github/workflows/vllm-dependency-test.yml"
   pull_request:
     branches: [ main ]
     paths:
       - "python/**"
       - "scripts/ci/**"
       - "test/**"
+      - ".github/workflows/vllm-dependency-test.yml"
+    types: [synchronize, labeled]
+  workflow_dispatch:
 
 concurrency:
   group: vllm-dependency-test-${{ github.ref }}
@@ -20,8 +24,7 @@ concurrency:
 
 jobs:
   vllm-dependency-test:
-    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
-        github.event.pull_request.draft == false
+    if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci')
     runs-on: 1-gpu-runner
     steps:
       - name: Checkout code
@@ -32,12 +35,8 @@ jobs:
           bash scripts/ci/ci_install_dependency.sh
           pip install "bitsandbytes>=0.44.0"
 
-          pip install "sgl-kernel==0.3.9.post2"
-
       - name: Run vLLM dependency tests
-        timeout-minutes: 60
+        timeout-minutes: 30
         run: |
-          export SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK=1
-
           cd test/srt
-          python3 run_suite.py --suite vllm_dependency_test --timeout-per-file 3600
+          python3 run_suite.py --suite vllm_dependency_test --timeout-per-file 600
diff --git a/docs/developer_guide/contribution_guide.md b/docs/developer_guide/contribution_guide.md
index f8e6f692d..291dc89d3 100644
--- a/docs/developer_guide/contribution_guide.md
+++ b/docs/developer_guide/contribution_guide.md
@@ -68,6 +68,13 @@ You can identify potential reviewers for your code by checking the [code owners]
 Another effective strategy is to review the file modification history and contact individuals who have frequently edited the files.
 If you modify files protected by code owners, their approval is required to merge the code.
 
+## How to trigger CI
+To trigger CI, the pull request must have the "run-ci" label.
+
+- If you have write access to sgl-project/sglang, your pull request will be automatically tagged by @sglang-bot.
+- If you have triage access to sgl-project/sglang, you can manually add the label by clicking "Labels" on the right side of your pull request page.
+- If you do not have the above access, please request a review and ask other maintainers to add the label for you.
+
 ## General code style
 - Avoid code duplication. If the same code snippet (more than five lines) appears multiple times, extract it into a shared function.
 - Minimize device synchronization. Reduce expensive CPU-GPU synchronization operations, such as `tensor.item()` or `tensor.cpu()`, whenever possible. Use vectorized code.
diff --git a/python/sglang/README.md b/python/sglang/README.md
index ae0c479b9..3d16d84f8 100644
--- a/python/sglang/README.md
+++ b/python/sglang/README.md
@@ -1,4 +1,4 @@
-# Code Structures
+# Code Structure
 
 - `eval`: The evaluation utilities.
 - `lang`: The frontend language.
@@ -11,6 +11,6 @@
 - `bench_serving.py`: Benchmark online serving with dynamic requests.
 - `check_env.py`: Check the environment variables and dependencies.
 - `global_config.py`: The global configs and constants.
-- `launch_server.py`: The entry point for launching the local server.
+- `launch_server.py`: The entry point for launching a local server.
 - `utils.py`: Common utilities.
 - `version.py`: Version info.
diff --git a/sgl-kernel/README.md b/sgl-kernel/README.md
index 47f3dea54..06e285101 100644
--- a/sgl-kernel/README.md
+++ b/sgl-kernel/README.md
@@ -5,16 +5,15 @@
 [![PyPI](https://img.shields.io/pypi/v/sgl-kernel)](https://pypi.org/project/sgl-kernel)
 
 ## Installation
-For CUDA 12.1 and above:
 
 ```bash
 pip3 install sgl-kernel
 ```
 
-For CUDA 11.8:
+For CUDA 12.4:
 
 ```bash
-pip3 install sgl-kernel -i https://docs.sglang.ai/whl/cu118
+pip3 install sgl-kernel -i https://docs.sglang.ai/whl/cu124
 ```
 
 ## Build from source
diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py
index c0b0eb6d4..b7ae98269 100644
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -146,7 +146,6 @@ suites = {
         TestFile("test_full_deepseek_v3.py", 333),
     ],
     "per-commit-8-gpu-b200": [
-        # add more here
         TestFile("test_gpt_oss_4gpu.py", 600),
         TestFile("test_deepseek_v3_fp4_4gpu.py", 600),
     ],