[CI] Auto partition for test cases (#6379)

### What this PR does / why we need it? This patch add auto-partition feat for tests, for example, before this pr, we are running e2e single card test for 2h40min, after the auto partition, test case is automatically allocated into the required n parts based on its test duration (greedy strategy) and run in parallel. The advantage of doing this is that our overall test duration will become 1/n of the original. ### Does this PR introduce _any_ user-facing change? Before: e2e single card test spend 2h40min After: e2e single card test spend 1h13min ### How was this patch tested? ```shell python .github/workflows/scripts/run_suite.py --auto-partition-size 2 --auto-partition-id 0 args=Namespace(timeout_per_file=2000, suite='e2e-singlecard', auto_partition_id=0, auto_partition_size=2, continue_on_error=False, enable_retry=False, max_attempts=2, retry_wait_seconds=60, retry_timeout_increase=600) +----------------+--------------------+ | Suite | Partition | |----------------+--------------------| | e2e-singlecard | 1/2 (0-based id=0) | +----------------+--------------------+ ✅ Enabled 13 test(s) (est total 4020.0s): - tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py (est_time=1800) - tests/e2e/singlecard/test_aclgraph_accuracy.py (est_time=480) - tests/e2e/singlecard/test_guided_decoding.py (est_time=354) - tests/e2e/singlecard/test_batch_invariant.py (est_time=320) - tests/e2e/singlecard/pooling/test_embedding.py (est_time=270) - tests/e2e/singlecard/test_quantization.py (est_time=200) - tests/e2e/singlecard/test_llama32_lora.py (est_time=162) - tests/e2e/singlecard/test_cpu_offloading.py (est_time=132) - tests/e2e/singlecard/pooling/test_classification.py (est_time=120) - tests/e2e/singlecard/test_camem.py (est_time=77) - tests/e2e/singlecard/compile/test_norm_quant_fusion.py (est_time=70) - tests/e2e/singlecard/test_auto_fit_max_mode_len.py (est_time=25) - tests/e2e/singlecard/test_profile_execute_duration.py (est_time=10) (base) wangli@Mac-mini vllm-ascend % python .github/workflows/scripts/run_suite.py --auto-partition-size 2 --auto-partition-id 1 args=Namespace(timeout_per_file=2000, suite='e2e-singlecard', auto_partition_id=1, auto_partition_size=2, continue_on_error=False, enable_retry=False, max_attempts=2, retry_wait_seconds=60, retry_timeout_increase=600) +----------------+--------------------+ | Suite | Partition | |----------------+--------------------| | e2e-singlecard | 2/2 (0-based id=1) | +----------------+--------------------+ ✅ Enabled 13 test(s) (est total 4025.0s): - tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py (est_time=1500) - tests/e2e/singlecard/pooling/test_scoring.py (est_time=500) - tests/e2e/singlecard/test_aclgraph_batch_invariant.py (est_time=410) - tests/e2e/singlecard/test_vlm.py (est_time=354) - tests/e2e/singlecard/test_models.py (est_time=300) - tests/e2e/singlecard/test_multistream_overlap_shared_expert.py (est_time=200) - tests/e2e/singlecard/test_sampler.py (est_time=200) - tests/e2e/singlecard/test_async_scheduling.py (est_time=150) - tests/e2e/singlecard/test_aclgraph_mem.py (est_time=130) - tests/e2e/singlecard/test_ilama_lora.py (est_time=95) - tests/e2e/singlecard/test_completion_with_prompt_embeds.py (est_time=76) - tests/e2e/singlecard/test_qwen3_multi_loras.py (est_time=65) - tests/e2e/singlecard/test_xlite.py (est_time=45) ``` - vLLM version: v0.14.1 - vLLM main: dc917cceb8 --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2026-01-29 20:28:10 +08:00
parent 14bd55f30c
commit e35f304419
7 changed files with 797 additions and 184 deletions
--- a/.github/workflows/scripts/ci_utils.py
+++ b/.github/workflows/scripts/ci_utils.py
@@ -0,0 +1,101 @@
+import logging
+import os
+import subprocess
+import time
+from dataclasses import dataclass
+
+# Configure logger to output to stdout
+logging.basicConfig(level=logging.INFO, format="%(message)s")
+logger = logging.getLogger(__name__)
+
+
+class Colors:
+    HEADER = "\033[95m"
+    OKBLUE = "\033[94m"
+    OKCYAN = "\033[96m"
+    OKGREEN = "\033[92m"
+    WARNING = "\033[93m"
+    FAIL = "\033[91m"
+    ENDC = "\033[0m"
+    BOLD = "\033[1m"
+    UNDERLINE = "\033[4m"
+
+
+@dataclass
+class TestFile:
+    name: str
+    estimated_time: float = 60
+    is_skipped: bool = False
+
+
+def run_e2e_files(
+    files: list[TestFile],
+    continue_on_error: bool = False,
+):
+    """
+    Run a list of test files.
+
+    Args:
+        files: List of TestFile objects to run
+        continue_on_error: If True, continue running remaining tests even if one fails.
+                          If False, stop at first failure (default behavior for PR tests).
+    """
+    tic = time.perf_counter()
+    success = True
+    passed_tests = []
+    failed_tests = []
+
+    for i, file in enumerate(files):
+        filename, estimated_time = file.name, file.estimated_time
+
+        full_path = os.path.join(os.getcwd(), filename)
+        logger.info(f".\n.\n{Colors.HEADER}Begin ({i}/{len(files)}):{Colors.ENDC}\npytest -sv {full_path}\n.\n.\n")
+        file_tic = time.perf_counter()
+
+        process = subprocess.Popen(
+            ["pytest", "-sv", "--durations=0", "--color=yes", full_path],
+            stdout=None,
+            stderr=None,
+            env=os.environ,
+        )
+        process.wait()
+
+        elapsed = time.perf_counter() - file_tic
+        ret_code = process.returncode
+
+        logger.info(
+            f".\n.\n{Colors.HEADER}End ({i}/{len(files)}):{Colors.ENDC}\n{filename=}, \
+                {elapsed=:.0f}, {estimated_time=}\n.\n.\n"
+        )
+
+        if ret_code == 0:
+            passed_tests.append(filename)
+        else:
+            logger.info(f"\n{Colors.FAIL}✗ FAILED: {filename} returned exit code {ret_code}{Colors.ENDC}\n")
+            failed_tests.append((filename, f"exit code {ret_code}"))
+            success = False
+            if not continue_on_error:
+                break
+
+    elapsed_total = time.perf_counter() - tic
+
+    if success:
+        logger.info(f"{Colors.OKGREEN}Success. Time elapsed: {elapsed_total:.2f}s{Colors.ENDC}")
+    else:
+        logger.info(f"{Colors.FAIL}Fail. Time elapsed: {elapsed_total:.2f}s{Colors.ENDC}")
+
+    # Print summary
+    logger.info(f"\n{'=' * 60}")
+    logger.info(f"Test Summary: {Colors.OKGREEN}{len(passed_tests)}/{len(files)} passed{Colors.ENDC}")
+    logger.info(f"{'=' * 60}")
+    if passed_tests:
+        logger.info(f"{Colors.OKGREEN}✓ PASSED:{Colors.ENDC}")
+        for test in passed_tests:
+            logger.info(f"  {test}")
+    if failed_tests:
+        logger.info(f"\n{Colors.FAIL}✗ FAILED:{Colors.ENDC}")
+        for test, reason in failed_tests:
+            logger.info(f"  {test} ({reason})")
+    logger.info(f"{'=' * 60}\n")
+
+    return 0 if success else -1