From d645ae90a3f538c72baec8bbf312388c22e25eae Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Sun, 5 Oct 2025 18:05:41 -0700 Subject: [PATCH] Rename runner labels (#11228) --- .github/workflows/pr-test-pd-router.yml | 2 +- .github/workflows/pr-test-rust.yml | 2 +- .github/workflows/pr-test.yml | 11 +- .github/workflows/release-docker-dev.yml | 6 +- .github/workflows/release-docker.yml | 14 +- .../workflows/release-whl-kernel-cu118.yml | 92 ------------- .github/workflows/release-whl-kernel.yml | 121 ++---------------- docs/get_started/install.md | 2 +- docs/platforms/blackwell_gpu.md | 9 -- python/pyproject.toml | 7 +- scripts/ci/ci_install_dependency.sh | 43 ++----- sgl-kernel/README.md | 1 - test/srt/run_suite.py | 15 +-- 13 files changed, 48 insertions(+), 277 deletions(-) delete mode 100644 .github/workflows/release-whl-kernel-cu118.yml delete mode 100644 docs/platforms/blackwell_gpu.md diff --git a/.github/workflows/pr-test-pd-router.yml b/.github/workflows/pr-test-pd-router.yml index 68900c94f..95278aed8 100644 --- a/.github/workflows/pr-test-pd-router.yml +++ b/.github/workflows/pr-test-pd-router.yml @@ -28,7 +28,7 @@ permissions: jobs: test-disaggregation: if: github.event_name != 'pull_request' || (contains(github.event.pull_request.labels.*.name, 'run-ci') && contains(github.event.pull_request.labels.*.name, 'router-benchmark')) - runs-on: [h200] + runs-on: [8-gpu-h200-oracle] timeout-minutes: 45 steps: diff --git a/.github/workflows/pr-test-rust.yml b/.github/workflows/pr-test-rust.yml index 1017e3c0b..3608bdaa2 100644 --- a/.github/workflows/pr-test-rust.yml +++ b/.github/workflows/pr-test-rust.yml @@ -83,7 +83,7 @@ jobs: pytest-rust: if: github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-ci') - runs-on: BM.A10.4 + runs-on: 4-gpu-a10 timeout-minutes: 25 steps: - name: Checkout code diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index d983c91bf..7f504e3de 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -62,7 +62,7 @@ jobs: sgl-kernel-build-wheels: needs: [check-changes] if: needs.check-changes.outputs.sgl_kernel == 'true' - runs-on: sgl-kernel-build-node + runs-on: x64-kernel-build-node strategy: matrix: include: @@ -323,7 +323,7 @@ jobs: needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) - runs-on: 8-gpu-runner + runs-on: 8-gpu-h200 strategy: fail-fast: false matrix: @@ -641,7 +641,7 @@ jobs: needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) - runs-on: 8-gpu-runner + runs-on: 8-gpu-h200 steps: - name: Checkout code uses: actions/checkout@v4 @@ -668,7 +668,7 @@ jobs: needs: [check-changes, unit-test-backend-2-gpu, sgl-kernel-build-wheels] if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) - runs-on: 4-b200-runner + runs-on: 4-gpu-b200 strategy: fail-fast: false steps: @@ -702,7 +702,8 @@ jobs: unit-test-frontend, unit-test-backend-1-gpu, unit-test-backend-2-gpu, unit-test-backend-4-gpu, unit-test-backend-8-gpu, - performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu, + performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-1-gpu-part-3, + performance-test-2-gpu, accuracy-test-1-gpu, accuracy-test-2-gpu, unit-test-deepep-4-gpu, unit-test-deepep-8-gpu, unit-test-backend-4-gpu-b200, diff --git a/.github/workflows/release-docker-dev.yml b/.github/workflows/release-docker-dev.yml index 4b4701099..04fb366eb 100644 --- a/.github/workflows/release-docker-dev.yml +++ b/.github/workflows/release-docker-dev.yml @@ -8,7 +8,7 @@ on: jobs: build-dev-x86: if: ${{ github.repository == 'sgl-project/sglang' }} - runs-on: nvidia + runs-on: x64-docker-build-node strategy: matrix: variant: @@ -48,12 +48,12 @@ jobs: build-dev-arm: if: ${{ github.repository == 'sgl-project/sglang' }} - runs-on: sgl-kernel-release-node-arm + runs-on: arm-docker-build-node strategy: matrix: variant: - version: 12.9.1 - type: blackwell_aarch64 + type: all_aarch64 tag: dev-arm64 steps: - name: Delete huge unnecessary tools folder diff --git a/.github/workflows/release-docker.yml b/.github/workflows/release-docker.yml index 7b5a6dda7..c7bc8194e 100644 --- a/.github/workflows/release-docker.yml +++ b/.github/workflows/release-docker.yml @@ -14,13 +14,9 @@ jobs: strategy: matrix: variant: - - cuda_version: "12.6.1" - build_type: "all" - - cuda_version: "12.8.1" - build_type: "blackwell" - cuda_version: "12.9.1" - build_type: "blackwell" - runs-on: nvidia + build_type: "all" + runs-on: x64-docker-build-node steps: - name: Delete huge unnecessary tools folder run: rm -rf /opt/hostedtoolcache @@ -67,8 +63,6 @@ jobs: if [ "${{ matrix.variant.build_type }}" = "all" ]; then tag_suffix="" - elif [ "${{ matrix.variant.build_type }}" = "blackwell" ]; then - tag_suffix="-b200" else echo "Unsupported build type" exit 1 @@ -87,8 +81,8 @@ jobs: matrix: variant: - cuda_version: "12.9.1" - build_type: "blackwell_aarch64" - runs-on: sgl-kernel-release-node-arm + build_type: "all_aarch64" + runs-on: arm-docker-build-node steps: - name: Delete huge unnecessary tools folder run: rm -rf /opt/hostedtoolcache diff --git a/.github/workflows/release-whl-kernel-cu118.yml b/.github/workflows/release-whl-kernel-cu118.yml deleted file mode 100644 index 4757bcaa1..000000000 --- a/.github/workflows/release-whl-kernel-cu118.yml +++ /dev/null @@ -1,92 +0,0 @@ -name: Release SGLang Kernel Wheel (cu118) - -on: - workflow_dispatch: - inputs: - tag_name: - type: string - push: - branches: - - main - paths: - - sgl-kernel/python/sgl_kernel/version.py - -jobs: - build-wheels: - if: github.repository == 'sgl-project/sglang' - runs-on: sgl-kernel-release-node - strategy: - matrix: - python-version: ["3.9"] - cuda-version: ["11.8"] - - steps: - - uses: actions/checkout@v4 - with: - submodules: "recursive" - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Build wheels for Python ${{ matrix.python-version }} and CUDA ${{ matrix.cuda-version }} - run: | - cd sgl-kernel - chmod +x ./build.sh - ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} - path: sgl-kernel/dist/* - - release: - needs: build-wheels - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Download artifacts - uses: actions/download-artifact@v4 - with: - path: sgl-kernel/dist/ - merge-multiple: true - pattern: wheel-* - - - name: Set tag name - id: set_tag_name - run: | - if [ -z "${{ inputs.tag_name }}" ]; then - TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)" - echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT - else - echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT - fi - - - name: Release - uses: softprops/action-gh-release@v2 - with: - tag_name: ${{ steps.set_tag_name.outputs.tag_name }} - repository: sgl-project/whl - token: ${{ secrets.WHL_TOKEN }} - files: | - sgl-kernel/dist/* - - - name: Clone wheel index - run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl - env: - WHL_TOKEN: ${{ secrets.WHL_TOKEN }} - - - name: Update wheel index - run: python3 scripts/update_kernel_whl_index.py - - - name: Push wheel index - run: | - cd sgl-whl - git config --local user.name "github-actions[bot]" - git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" - git add -A - git commit -m "update whl index" - git push diff --git a/.github/workflows/release-whl-kernel.yml b/.github/workflows/release-whl-kernel.yml index c80fd1fd1..5657332cf 100644 --- a/.github/workflows/release-whl-kernel.yml +++ b/.github/workflows/release-whl-kernel.yml @@ -19,7 +19,7 @@ concurrency: jobs: build-cu129: if: github.repository == 'sgl-project/sglang' - runs-on: sgl-kernel-release-node + runs-on: x64-kernel-build-node strategy: matrix: python-version: ["3.10"] @@ -46,38 +46,14 @@ jobs: pip install twine python3 -m twine upload --skip-existing dist/* -u __token__ -p ${{ secrets.PYPI_TOKEN }} - build-cu124: - if: github.repository == 'sgl-project/sglang' - needs: build-cu129 - runs-on: sgl-kernel-release-node - strategy: - matrix: - python-version: ["3.10"] - cuda-version: ["12.4"] - steps: - - uses: actions/checkout@v4 - with: - submodules: "recursive" - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Build wheels - run: | - cd sgl-kernel - chmod +x ./build.sh - ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" - - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} path: sgl-kernel/dist/* - release-cu124: - needs: build-cu124 + release-cu129: + needs: build-cu129 runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 @@ -114,99 +90,20 @@ jobs: WHL_TOKEN: ${{ secrets.WHL_TOKEN }} - name: Update wheel index - run: python3 scripts/update_kernel_whl_index.py --cuda 124 + run: python3 scripts/update_kernel_whl_index.py --cuda 129 - name: Push wheel index run: | cd sgl-whl - git config --local user.name "github-actions[bot]" - git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" - git add -A - git commit -m "update whl index" - git push - - build-cu128: - if: github.repository == 'sgl-project/sglang' - needs: build-cu129 - runs-on: sgl-kernel-release-node - strategy: - matrix: - python-version: ["3.10"] - cuda-version: ["12.8"] - steps: - - uses: actions/checkout@v4 - with: - submodules: "recursive" - - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - - - name: Build wheels - run: | - cd sgl-kernel - chmod +x ./build.sh - ./build.sh "${{ matrix.python-version }}" "${{ matrix.cuda-version }}" - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - name: wheel-python${{ matrix.python-version }}-cuda${{ matrix.cuda-version }} - path: sgl-kernel/dist/* - - release-cu128: - needs: build-cu128 - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Download artifacts - uses: actions/download-artifact@v4 - with: - path: sgl-kernel/dist/ - merge-multiple: true - pattern: wheel-* - - - name: Set tag name - id: set_tag_name - run: | - if [ -z "${{ inputs.tag_name }}" ]; then - TAG_NAME="v$(cat sgl-kernel/python/sgl_kernel/version.py | cut -d'"' -f2)" - echo "tag_name=$TAG_NAME" >> $GITHUB_OUTPUT - else - echo "tag_name=${{ inputs.tag_name }}" >> $GITHUB_OUTPUT - fi - - - name: Release - uses: softprops/action-gh-release@v2 - with: - tag_name: ${{ steps.set_tag_name.outputs.tag_name }} - repository: sgl-project/whl - token: ${{ secrets.WHL_TOKEN }} - files: | - sgl-kernel/dist/* - - - name: Clone wheel index - run: git clone https://oauth2:${WHL_TOKEN}@github.com/sgl-project/whl.git sgl-whl - env: - WHL_TOKEN: ${{ secrets.WHL_TOKEN }} - - - name: Update wheel index - run: python3 scripts/update_kernel_whl_index.py --cuda 128 - - - name: Push wheel index - run: | - cd sgl-whl - git config --local user.name "github-actions[bot]" - git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "sglang-bot" + git config --local user.email "sglangbot@gmail.com" git add -A git commit -m "update whl index" git push build-cu129-aarch64: if: github.repository == 'sgl-project/sglang' - runs-on: sgl-kernel-release-node-arm + runs-on: arm-kernel-build-node strategy: matrix: python-version: ["3.10"] @@ -282,8 +179,8 @@ jobs: - name: Push wheel index run: | cd sgl-whl - git config --local user.name "github-actions[bot]" - git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config --local user.name "sglang-bot" + git config --local user.email "sglangbot@gmail.com" git add -A git commit -m "update whl index" git push diff --git a/docs/get_started/install.md b/docs/get_started/install.md index 2721555fb..e5653774e 100644 --- a/docs/get_started/install.md +++ b/docs/get_started/install.md @@ -3,7 +3,7 @@ You can install SGLang using one of the methods below. This page primarily applies to common NVIDIA GPU platforms. -For other or newer platforms, please refer to the dedicated pages for [NVIDIA Blackwell GPUs](../platforms/blackwell_gpu.md), [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md). +For other or newer platforms, please refer to the dedicated pages for [AMD GPUs](../platforms/amd_gpu.md), [Intel Xeon CPUs](../platforms/cpu_server.md), [NVIDIA Jetson](../platforms/nvidia_jetson.md), [Ascend NPUs](../platforms/ascend_npu.md). ## Method 1: With pip or uv diff --git a/docs/platforms/blackwell_gpu.md b/docs/platforms/blackwell_gpu.md deleted file mode 100644 index 8c433b3f0..000000000 --- a/docs/platforms/blackwell_gpu.md +++ /dev/null @@ -1,9 +0,0 @@ -# Blackwell GPUs - -We will release the pre-built wheels soon. Before that, please try to compile from source or check the blackwell docker images from [lmsysorg/sglang](https://hub.docker.com/r/lmsysorg/sglang/tags). - -## B200 with x86 CPUs -TODO - -## GB200/GB300 with ARM CPUs -TODO diff --git a/python/pyproject.toml b/python/pyproject.toml index ab2adc9e5..fb095f2a0 100755 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -91,9 +91,14 @@ tracing = [ "opentelemetry-sdk", ] all = ["sglang[test]", "sglang[decord]"] +all_aarch64 = ["sglang[test]"] +dev = ["sglang[test]", "sglang[decord]"] + + +# The following will be deprecated in 2 weeks blackwell = ["sglang[test]", "sglang[decord]"] blackwell_aarch64 = ["sglang[test]"] -dev = ["sglang[test]", "sglang[decord]"] + [project.urls] "Homepage" = "https://github.com/sgl-project/sglang" diff --git a/scripts/ci/ci_install_dependency.sh b/scripts/ci/ci_install_dependency.sh index e7f9f0c42..61ea15565 100755 --- a/scripts/ci/ci_install_dependency.sh +++ b/scripts/ci/ci_install_dependency.sh @@ -3,21 +3,16 @@ set -euxo pipefail IS_BLACKWELL=${IS_BLACKWELL:-0} - -if [ "$IS_BLACKWELL" = "1" ]; then - CU_VERSION="cu129" -else - CU_VERSION="cu126" -fi - -# Clear torch compilation cache -python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)' +CU_VERSION="cu128" # Kill existing processes SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" bash "${SCRIPT_DIR}/../killall_sglang.sh" echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-}" +# Clear torch compilation cache +python3 -c 'import os, shutil, tempfile, getpass; cache_dir = os.environ.get("TORCHINDUCTOR_CACHE_DIR") or os.path.join(tempfile.gettempdir(), "torchinductor_" + getpass.getuser()); shutil.rmtree(cache_dir, ignore_errors=True)' + # Install apt packages apt install -y git libnuma-dev @@ -29,7 +24,7 @@ if [ "$IS_BLACKWELL" = "1" ]; then PIP_INSTALL_SUFFIX="--break-system-packages" # Clean up existing installations - $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm $PIP_INSTALL_SUFFIX || true + $PIP_CMD uninstall -y flashinfer_python sgl-kernel sglang vllm torch $PIP_INSTALL_SUFFIX || true else # In normal cases, we use uv, which is much faster than pip. pip install --upgrade pip @@ -40,7 +35,7 @@ else PIP_INSTALL_SUFFIX="--index-strategy unsafe-best-match" # Clean up existing installations - $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm || true + $PIP_CMD uninstall flashinfer_python sgl-kernel sglang vllm torch || true fi # Install the main package @@ -49,26 +44,16 @@ $PIP_CMD install -e "python[dev]" --extra-index-url https://download.pytorch.org # Install router for pd-disagg test SGLANG_ROUTER_BUILD_NO_RUST=1 $PIP_CMD install -e "sgl-router" $PIP_INSTALL_SUFFIX +# Install sgl-kernel SGL_KERNEL_VERSION_FROM_KERNEL=$(grep -Po '(?<=^version = ")[^"]*' sgl-kernel/pyproject.toml) SGL_KERNEL_VERSION_FROM_SRT=$(grep -Po -m1 '(?<=sgl-kernel==)[0-9A-Za-z\.\-]+' python/pyproject.toml) echo "SGL_KERNEL_VERSION_FROM_KERNEL=${SGL_KERNEL_VERSION_FROM_KERNEL} SGL_KERNEL_VERSION_FROM_SRT=${SGL_KERNEL_VERSION_FROM_SRT}" -if [ "$IS_BLACKWELL" = "1" ]; then - SGL_KERNEL_CUDA_VERSION=cu128 -else - SGL_KERNEL_CUDA_VERSION=cu124 -fi - if [ "${CUSTOM_BUILD_SGL_KERNEL:-}" = "true" ]; then ls -alh sgl-kernel/dist - WHEEL_FILE=$(ls sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl 2>/dev/null || true) - if [ -f "$WHEEL_FILE" ]; then - $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX - else - $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX - fi + $PIP_CMD install sgl-kernel/dist/sgl_kernel-${SGL_KERNEL_VERSION_FROM_KERNEL}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX else - $PIP_CMD install https://github.com/sgl-project/whl/releases/download/v${SGL_KERNEL_VERSION_FROM_SRT}/sgl_kernel-${SGL_KERNEL_VERSION_FROM_SRT}+${SGL_KERNEL_CUDA_VERSION}-cp310-abi3-manylinux2014_x86_64.whl --force-reinstall $PIP_INSTALL_SUFFIX + $PIP_CMD install sgl-kernel==${SGL_KERNEL_VERSION_FROM_SRT} --force-reinstall $PIP_INSTALL_SUFFIX fi # Show current packages @@ -86,14 +71,6 @@ if [ "$IS_BLACKWELL" != "1" ]; then $PIP_CMD install xformers --index-url https://download.pytorch.org/whl/${CU_VERSION} --no-deps $PIP_INSTALL_SUFFIX fi -# Install FlashMLA for attention backend tests -# $PIP_CMD install git+https://github.com/deepseek-ai/FlashMLA.git $PIP_INSTALL_SUFFIX - # Show current packages $PIP_CMD list - - -if [ -n "${HF_TOKEN:-}" ]; then - $PIP_CMD install -U "huggingface_hub[cli]" $PIP_INSTALL_SUFFIX - hf auth login --token $HF_TOKEN -fi +python3 -c "import torch; print(torch.version.cuda)" diff --git a/sgl-kernel/README.md b/sgl-kernel/README.md index f86d5851f..cd3c0288a 100644 --- a/sgl-kernel/README.md +++ b/sgl-kernel/README.md @@ -25,7 +25,6 @@ make build ``` Note: - The `sgl-kernel` is rapidly evolving. If you experience a compilation failure, try using `make rebuild`. ### Build with [ccache](https://github.com/ccache/ccache) diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 5904e4ab1..9aaad9482 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -88,7 +88,7 @@ suites = { TestFile("test_metrics.py", 32), TestFile("test_metrics_utils.py", 1), TestFile("test_mla.py", 167), - TestFile("test_mla_deepseek_v3.py", 1420), + TestFile("test_mla_deepseek_v3.py", 500), TestFile("test_mla_int8_deepseek_v3.py", 429), TestFile("test_mla_flashinfer.py", 302), TestFile("test_mla_fp8.py", 93), @@ -130,7 +130,7 @@ suites = { TestFile("lora/test_lora_tp.py", 116), TestFile("rl/test_update_weights_from_distributed.py", 103), TestFile("test_data_parallelism.py", 73), - TestFile("test_dp_attention.py", 277), + TestFile("test_dp_attention.py", 594), TestFile("test_load_weights_from_remote_instance.py", 72), TestFile("test_patch_torch.py", 19), TestFile("test_release_memory_occupation.py", 257), @@ -138,17 +138,16 @@ suites = { TestFile("hicache/test_hicache_storage_3fs_backend.py", 200), ], "per-commit-4-gpu": [ - TestFile("test_gpt_oss_4gpu.py", 600), - TestFile("test_local_attn.py", 250), - TestFile("test_pp_single_node.py", 372), - TestFile("models/test_qwen3_next_models.py", 200), - TestFile("models/test_falcon_h1_models.py", 200), + TestFile("test_gpt_oss_4gpu.py", 300), + TestFile("test_local_attn.py", 411), + TestFile("test_pp_single_node.py", 481), + TestFile("models/test_qwen3_next_models.py", 291), TestFile("test_multi_instance_release_memory_occupation.py", 64), ], "per-commit-8-gpu": [ TestFile("hicache/test_hicache_storage_mooncake_backend.py", 400), TestFile("lora/test_lora_llama4.py", 400), - TestFile("test_disaggregation.py", 600), + TestFile("test_disaggregation.py", 499), TestFile("test_disaggregation_dp_attention.py", 155), TestFile("test_disaggregation_different_tp.py", 600), TestFile("test_disaggregation_pp.py", 140),