diff --git a/.github/workflows/pr-test-amd.yml b/.github/workflows/pr-test-amd.yml index c6a00aeb6..afbcda40a 100644 --- a/.github/workflows/pr-test-amd.yml +++ b/.github/workflows/pr-test-amd.yml @@ -223,7 +223,7 @@ jobs: fail-fast: false matrix: runner: [linux-mi300-gpu-1, linux-mi325-gpu-1] - part: [0, 1] + part: [0, 1, 2, 3, 4, 5] runs-on: ${{matrix.runner}} steps: - name: Checkout code @@ -240,7 +240,7 @@ jobs: - name: Run test timeout-minutes: 40 run: | - bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 + bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 6 unit-test-backend-2-gpu-amd: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && @@ -266,6 +266,30 @@ jobs: run: | bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd + unit-test-backend-4-gpu-amd: + if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && + github.event.pull_request.draft == false + strategy: + matrix: + runner: [linux-mi300-gpu-4] + runs-on: ${{matrix.runner}} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Start CI container + run: bash scripts/amd_ci_start_container.sh + env: + GITHUB_WORKSPACE: ${{ github.workspace }} + + - name: Install dependencies + run: bash scripts/amd_ci_install_dependency.sh + + - name: Run test + timeout-minutes: 40 + run: | + bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-4-gpu-amd + unit-test-backend-8-gpu-amd: if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') && github.event.pull_request.draft == false diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 9cadecd42..f427be31d 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -104,6 +104,29 @@ suites = { TestFile("test_block_int8.py", 22), TestFile("test_create_kvindices.py", 2), TestFile("test_chunked_prefill.py", 313), + TestFile("test_embedding_openai_server.py", 141), + TestFile("test_eval_fp8_accuracy.py", 303), + TestFile("test_function_call_parser.py", 10), + TestFile("test_input_embeddings.py", 38), + TestFile("test_large_max_new_tokens.py", 41), + TestFile("test_metrics.py", 32), + TestFile("test_no_chunked_prefill.py", 108), + TestFile("test_no_overlap_scheduler.py", 234), + TestFile("test_penalty.py", 41), + TestFile("test_page_size.py", 60), + TestFile("test_pytorch_sampling_backend.py", 66), + TestFile("test_radix_attention.py", 105), + TestFile("test_reasoning_content.py", 89), + TestFile("test_enable_thinking.py", 70), + TestFile("test_request_length_validation.py", 31), + TestFile("test_retract_decode.py", 54), + TestFile("test_server_args.py", 1), + TestFile("test_skip_tokenizer_init.py", 117), + TestFile("test_torch_native_attention_backend.py", 123), + TestFile("test_triton_attention_backend.py", 150), + TestFile("test_update_weights_from_disk.py", 114), + TestFile("test_vertex_endpoint.py", 31), + TestFile("test_vision_chunked_prefill.py", 175), ], "per-commit-2-gpu": [ TestFile("models/lora/test_lora_tp.py", 116), @@ -116,13 +139,20 @@ suites = { TestFile("test_verl_engine_2_gpu.py", 64), ], "per-commit-2-gpu-amd": [ + TestFile("models/lora/test_lora_tp.py", 116), + TestFile("test_data_parallelism.py", 73), TestFile("test_mla_tp.py", 170), + TestFile("test_patch_torch.py", 19), + TestFile("test_update_weights_from_distributed.py", 103), ], "per-commit-4-gpu": [ TestFile("test_local_attn.py", 250), TestFile("test_pp_single_node.py", 150), TestFile("test_verl_engine_4_gpu.py", 64), ], + "per-commit-4-gpu-amd": [ + TestFile("test_pp_single_node.py", 150), + ], "per-commit-8-gpu": [ # Disabled deepep tests temporarily because it takes too much time. # TODO: re-enable them after reducing the test time with compilation cache and smaller models.