diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 4f31d34bb..81d6571b4 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -693,6 +693,87 @@ jobs: cd test/srt python3 run_suite.py --suite per-commit-4-gpu-b200 --auto-partition-id 0 --auto-partition-size 1 --timeout-per-file 3600 + unit-test-disaggregation-2-gpu: + needs: [check-changes, sgl-kernel-build-wheels] + if: always() && !failure() && !cancelled() && + ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) + runs-on: 2-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Download artifacts + if: needs.check-changes.outputs.sgl_kernel == 'true' + uses: actions/download-artifact@v4 + with: + path: sgl-kernel/dist/ + merge-multiple: true + pattern: wheel-python3.10-cuda12.9 + + - name: Install dependencies + run: | + CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite per-commit-2-gpu-disaggregation + + unit-test-disaggregation-4-gpu: + needs: [check-changes, unit-test-disaggregation-2-gpu, sgl-kernel-build-wheels] + if: always() && !failure() && !cancelled() && + ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) + runs-on: 4-gpu-runner + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Download artifacts + if: needs.check-changes.outputs.sgl_kernel == 'true' + uses: actions/download-artifact@v4 + with: + path: sgl-kernel/dist/ + merge-multiple: true + pattern: wheel-python3.10-cuda12.9 + + - name: Install dependencies + run: | + CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite per-commit-4-gpu-disaggregation + + unit-test-disaggregation-8-gpu: + needs: [check-changes, unit-test-disaggregation-2-gpu, sgl-kernel-build-wheels] + if: always() && !failure() && !cancelled() && + ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) + runs-on: 8-gpu-h200 + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Download artifacts + if: needs.check-changes.outputs.sgl_kernel == 'true' + uses: actions/download-artifact@v4 + with: + path: sgl-kernel/dist/ + merge-multiple: true + pattern: wheel-python3.10-cuda12.9 + + - name: Install dependencies + run: | + CUSTOM_BUILD_SGL_KERNEL=${{needs.check-changes.outputs.sgl_kernel}} bash scripts/ci/ci_install_dependency.sh + + - name: Run test + timeout-minutes: 20 + run: | + cd test/srt + python3 run_suite.py --suite per-commit-8-gpu-disaggregation + pr-test-finish: needs: [ check-changes, @@ -707,6 +788,7 @@ jobs: accuracy-test-1-gpu, accuracy-test-2-gpu, unit-test-deepep-4-gpu, unit-test-deepep-8-gpu, unit-test-backend-4-gpu-b200, + unit-test-disaggregation-2-gpu, unit-test-disaggregation-4-gpu, unit-test-disaggregation-8-gpu, ] if: always() runs-on: ubuntu-latest diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 4c5a0779f..998e6b0bd 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -138,7 +138,6 @@ suites = { TestFile("lora/test_lora_tp.py", 116), TestFile("rl/test_update_weights_from_distributed.py", 103), TestFile("test_data_parallelism.py", 73), - TestFile("test_disaggregation.py", 499), TestFile("test_dp_attention.py", 594), TestFile("test_load_weights_from_remote_instance.py", 72), TestFile("test_patch_torch.py", 19), @@ -157,9 +156,6 @@ suites = { ], "per-commit-8-gpu": [ TestFile("lora/test_lora_llama4.py", 400), - TestFile("test_disaggregation_dp_attention.py", 155), - TestFile("test_disaggregation_different_tp.py", 600), - TestFile("test_disaggregation_pp.py", 140), TestFile("test_deepseek_v3_basic.py", 275), TestFile("test_deepseek_v3_mtp.py", 275), ], @@ -173,6 +169,16 @@ suites = { "per-commit-8-gpu-deepep": [ TestFile("ep/test_deepep_large.py", 338), ], + "per-commit-2-gpu-disaggregation": [ + TestFile("test_disaggregation_basic.py", 400), + ], + "per-commit-4-gpu-disaggregation": [ + TestFile("test_disaggregation_dp_attention.py", 155), + ], + "per-commit-8-gpu-disaggregation": [ + TestFile("test_disaggregation_different_tp.py", 600), + TestFile("test_disaggregation_pp.py", 140), + ], "per-commit-8-gpu-h20": [ TestFile("quant/test_w4a8_deepseek_v3.py", 371), ], diff --git a/test/srt/test_disaggregation.py b/test/srt/test_disaggregation_basic.py similarity index 100% rename from test/srt/test_disaggregation.py rename to test/srt/test_disaggregation_basic.py