Enable more unit tests for AMD CI. (#6983)
This commit is contained in:
28
.github/workflows/pr-test-amd.yml
vendored
28
.github/workflows/pr-test-amd.yml
vendored
@@ -223,7 +223,7 @@ jobs:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||
part: [0, 1]
|
||||
part: [0, 1, 2, 3, 4, 5]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
@@ -240,7 +240,7 @@ jobs:
|
||||
- name: Run test
|
||||
timeout-minutes: 40
|
||||
run: |
|
||||
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
|
||||
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 6
|
||||
|
||||
unit-test-backend-2-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
@@ -266,6 +266,30 @@ jobs:
|
||||
run: |
|
||||
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-2-gpu-amd
|
||||
|
||||
unit-test-backend-4-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
strategy:
|
||||
matrix:
|
||||
runner: [linux-mi300-gpu-4]
|
||||
runs-on: ${{matrix.runner}}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Start CI container
|
||||
run: bash scripts/amd_ci_start_container.sh
|
||||
env:
|
||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||
|
||||
- name: Install dependencies
|
||||
run: bash scripts/amd_ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 40
|
||||
run: |
|
||||
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-4-gpu-amd
|
||||
|
||||
unit-test-backend-8-gpu-amd:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
|
||||
@@ -104,6 +104,29 @@ suites = {
|
||||
TestFile("test_block_int8.py", 22),
|
||||
TestFile("test_create_kvindices.py", 2),
|
||||
TestFile("test_chunked_prefill.py", 313),
|
||||
TestFile("test_embedding_openai_server.py", 141),
|
||||
TestFile("test_eval_fp8_accuracy.py", 303),
|
||||
TestFile("test_function_call_parser.py", 10),
|
||||
TestFile("test_input_embeddings.py", 38),
|
||||
TestFile("test_large_max_new_tokens.py", 41),
|
||||
TestFile("test_metrics.py", 32),
|
||||
TestFile("test_no_chunked_prefill.py", 108),
|
||||
TestFile("test_no_overlap_scheduler.py", 234),
|
||||
TestFile("test_penalty.py", 41),
|
||||
TestFile("test_page_size.py", 60),
|
||||
TestFile("test_pytorch_sampling_backend.py", 66),
|
||||
TestFile("test_radix_attention.py", 105),
|
||||
TestFile("test_reasoning_content.py", 89),
|
||||
TestFile("test_enable_thinking.py", 70),
|
||||
TestFile("test_request_length_validation.py", 31),
|
||||
TestFile("test_retract_decode.py", 54),
|
||||
TestFile("test_server_args.py", 1),
|
||||
TestFile("test_skip_tokenizer_init.py", 117),
|
||||
TestFile("test_torch_native_attention_backend.py", 123),
|
||||
TestFile("test_triton_attention_backend.py", 150),
|
||||
TestFile("test_update_weights_from_disk.py", 114),
|
||||
TestFile("test_vertex_endpoint.py", 31),
|
||||
TestFile("test_vision_chunked_prefill.py", 175),
|
||||
],
|
||||
"per-commit-2-gpu": [
|
||||
TestFile("models/lora/test_lora_tp.py", 116),
|
||||
@@ -116,13 +139,20 @@ suites = {
|
||||
TestFile("test_verl_engine_2_gpu.py", 64),
|
||||
],
|
||||
"per-commit-2-gpu-amd": [
|
||||
TestFile("models/lora/test_lora_tp.py", 116),
|
||||
TestFile("test_data_parallelism.py", 73),
|
||||
TestFile("test_mla_tp.py", 170),
|
||||
TestFile("test_patch_torch.py", 19),
|
||||
TestFile("test_update_weights_from_distributed.py", 103),
|
||||
],
|
||||
"per-commit-4-gpu": [
|
||||
TestFile("test_local_attn.py", 250),
|
||||
TestFile("test_pp_single_node.py", 150),
|
||||
TestFile("test_verl_engine_4_gpu.py", 64),
|
||||
],
|
||||
"per-commit-4-gpu-amd": [
|
||||
TestFile("test_pp_single_node.py", 150),
|
||||
],
|
||||
"per-commit-8-gpu": [
|
||||
# Disabled deepep tests temporarily because it takes too much time.
|
||||
# TODO: re-enable them after reducing the test time with compilation cache and smaller models.
|
||||
|
||||
Reference in New Issue
Block a user