Enable more unit tests for AMD CI. (#6983)

2025-06-08 19:41:55 -07:00
parent 18efb5e8e0
commit 2c18642502
2 changed files with 56 additions and 2 deletions
--- a/.github/workflows/pr-test-amd.yml
+++ b/.github/workflows/pr-test-amd.yml
@@ -223,7 +223,7 @@ jobs:
      fail-fast: false
      matrix:
        runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
-        part: [0, 1]
+        part: [0, 1, 2, 3, 4, 5]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
@@ -240,7 +240,7 @@ jobs:
      - name: Run test
        timeout-minutes: 40
        run: |
-          bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
+          bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 6

  unit-test-backend-2-gpu-amd:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
@@ -266,6 +266,30 @@ jobs:
        run: |
          bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd

+  unit-test-backend-4-gpu-amd:
+    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
+      github.event.pull_request.draft == false
+    strategy:
+      matrix:
+        runner: [linux-mi300-gpu-4]
+    runs-on: ${{matrix.runner}}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Start CI container
+        run: bash scripts/amd_ci_start_container.sh
+        env:
+          GITHUB_WORKSPACE: ${{ github.workspace }}
+
+      - name: Install dependencies
+        run: bash scripts/amd_ci_install_dependency.sh
+
+      - name: Run test
+        timeout-minutes: 40
+        run: |
+          bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-4-gpu-amd
+
  unit-test-backend-8-gpu-amd:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
      github.event.pull_request.draft == false
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -104,6 +104,29 @@ suites = {
        TestFile("test_block_int8.py", 22),
        TestFile("test_create_kvindices.py", 2),
        TestFile("test_chunked_prefill.py", 313),
+        TestFile("test_embedding_openai_server.py", 141),
+        TestFile("test_eval_fp8_accuracy.py", 303),
+        TestFile("test_function_call_parser.py", 10),
+        TestFile("test_input_embeddings.py", 38),
+        TestFile("test_large_max_new_tokens.py", 41),
+        TestFile("test_metrics.py", 32),
+        TestFile("test_no_chunked_prefill.py", 108),
+        TestFile("test_no_overlap_scheduler.py", 234),
+        TestFile("test_penalty.py", 41),
+        TestFile("test_page_size.py", 60),
+        TestFile("test_pytorch_sampling_backend.py", 66),
+        TestFile("test_radix_attention.py", 105),
+        TestFile("test_reasoning_content.py", 89),
+        TestFile("test_enable_thinking.py", 70),
+        TestFile("test_request_length_validation.py", 31),
+        TestFile("test_retract_decode.py", 54),
+        TestFile("test_server_args.py", 1),
+        TestFile("test_skip_tokenizer_init.py", 117),
+        TestFile("test_torch_native_attention_backend.py", 123),
+        TestFile("test_triton_attention_backend.py", 150),
+        TestFile("test_update_weights_from_disk.py", 114),
+        TestFile("test_vertex_endpoint.py", 31),
+        TestFile("test_vision_chunked_prefill.py", 175),
    ],
    "per-commit-2-gpu": [
        TestFile("models/lora/test_lora_tp.py", 116),
@@ -116,13 +139,20 @@ suites = {
        TestFile("test_verl_engine_2_gpu.py", 64),
    ],
    "per-commit-2-gpu-amd": [
+        TestFile("models/lora/test_lora_tp.py", 116),
+        TestFile("test_data_parallelism.py", 73),
        TestFile("test_mla_tp.py", 170),
+        TestFile("test_patch_torch.py", 19),
+        TestFile("test_update_weights_from_distributed.py", 103),
    ],
    "per-commit-4-gpu": [
        TestFile("test_local_attn.py", 250),
        TestFile("test_pp_single_node.py", 150),
        TestFile("test_verl_engine_4_gpu.py", 64),
    ],
+    "per-commit-4-gpu-amd": [
+        TestFile("test_pp_single_node.py", 150),
+    ],
    "per-commit-8-gpu": [
        # Disabled deepep tests temporarily because it takes too much time.
        # TODO: re-enable them after reducing the test time with compilation cache and smaller models.