[CI] Enable auto upgrade e2e estimated time for auto-partition suites (#6840)

### What this PR does / why we need it? This patch add a schedule triggered workflow for auto upgrade e2e estimated-time for batter load balance 1. The workflow will run the full e2e test to get the duration of each test. 2. The script `update_estimated_time.py` will upgrade the [config.json](https://github.com/vllm-project/vllm-ascend/blob/main/.github/workflows/scripts/config.yaml) according to the latest time 3. The workflow will submit a pull request that includes changes to `config.json` automatically <img width="2484" height="764" alt="image" src="https://github.com/user-attachments/assets/02f3459c-bb3b-4f8e-9966-8bb2e5c1bbea" /> ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: 83b47f67b1 - ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: 83b47f67b1 --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2026-03-04 10:38:34 +08:00
parent c7fd7a25f7
commit d431d7d526
5 changed files with 575 additions and 262 deletions
--- a/.github/workflows/scripts/ci_utils.py
+++ b/.github/workflows/scripts/ci_utils.py
@@ -1,24 +1,13 @@
-import logging
-import os
 import subprocess
 import time
 from dataclasses import dataclass

-# Configure logger to output to stdout
-logging.basicConfig(level=logging.INFO, format="%(message)s")
-logger = logging.getLogger(__name__)

-
-class Colors:
+class _Color:
    HEADER = "\033[95m"
-    OKBLUE = "\033[94m"
-    OKCYAN = "\033[96m"
-    OKGREEN = "\033[92m"
-    WARNING = "\033[93m"
-    FAIL = "\033[91m"
-    ENDC = "\033[0m"
-    BOLD = "\033[1m"
-    UNDERLINE = "\033[4m"
+    GREEN = "\033[92m"
+    RED = "\033[91m"
+    RESET = "\033[0m"


@dataclass
@@ -28,74 +17,77 @@ class TestFile:
    is_skipped: bool = False


-def run_e2e_files(
+@dataclass
+class TestRecord:
+    name: str
+    passed: bool
+    elapsed: float
+    estimated: float
+
+    def to_dict(self) -> dict:
+        return {
+            "name": self.name,
+            "passed": self.passed,
+            "elapsed": self.elapsed,
+            "estimated": self.estimated,
+        }
+
+
+def run_tests(
    files: list[TestFile],
    continue_on_error: bool = False,
-):
+) -> tuple[int, list[TestRecord]]:
    """
-    Run a list of test files.
+    Run each TestFile with pytest and collect timing results.

    Args:
-        files: List of TestFile objects to run
-        continue_on_error: If True, continue running remaining tests even if one fails.
-                          If False, stop at first failure (default behavior for PR tests).
+        files: Tests to run (skipped entries should already be filtered out).
+        continue_on_error: If True, keep running after a failure.
+        report_path: If provided, write a Markdown timing report here.
+
+    Returns:
+        (exit_code, records) — exit_code is 0 on full success, -1 otherwise.
    """
-    tic = time.perf_counter()
-    success = True
-    passed_tests = []
-    failed_tests = []
+    records: list[TestRecord] = []
+    all_passed = True
+    total_start = time.perf_counter()

-    for i, file in enumerate(files):
-        filename, estimated_time = file.name, file.estimated_time
-
-        full_path = os.path.join(os.getcwd(), filename)
-        logger.info(f".\n.\n{Colors.HEADER}Begin ({i}/{len(files)}):{Colors.ENDC}\npytest -sv {full_path}\n.\n.\n")
-        file_tic = time.perf_counter()
-
-        process = subprocess.Popen(
-            ["pytest", "-sv", "--durations=0", "--color=yes", full_path],
-            stdout=None,
-            stderr=None,
-            env=os.environ,
-        )
-        process.wait()
-
-        elapsed = time.perf_counter() - file_tic
-        ret_code = process.returncode
-
-        logger.info(
-            f".\n.\n{Colors.HEADER}End ({i}/{len(files)}):{Colors.ENDC}\n{filename=}, \
-                {elapsed=:.0f}, {estimated_time=}\n.\n.\n"
+    for i, test in enumerate(files):
+        print(f"\n{'.' * 60}", flush=True)
+        print(
+            f"{_Color.HEADER}[{i + 1}/{len(files)}] START  {test.name}{_Color.RESET}",
+            flush=True,
        )

-        if ret_code == 0:
-            passed_tests.append(filename)
-        else:
-            logger.info(f"\n{Colors.FAIL}✗ FAILED: {filename} returned exit code {ret_code}{Colors.ENDC}\n")
-            failed_tests.append((filename, f"exit code {ret_code}"))
-            success = False
+        start = time.perf_counter()
+        result = subprocess.run(["pytest", "-sv", "--durations=0", "--color=yes", test.name])
+        elapsed = time.perf_counter() - start
+        passed = result.returncode == 0
+
+        records.append(TestRecord(name=test.name, passed=passed, elapsed=elapsed, estimated=test.estimated_time))
+
+        color = _Color.GREEN if passed else _Color.RED
+        status = "PASSED" if passed else f"FAILED (exit code {result.returncode})"
+        print(
+            f"{color}[{i + 1}/{len(files)}] {status}  {test.name}  ({elapsed:.0f}s){_Color.RESET}",
+            flush=True,
+        )
+
+        if not passed:
+            all_passed = False
            if not continue_on_error:
                break

-    elapsed_total = time.perf_counter() - tic
+    total_elapsed = time.perf_counter() - total_start
+    passed_count = sum(1 for r in records if r.passed)

-    if success:
-        logger.info(f"{Colors.OKGREEN}Success. Time elapsed: {elapsed_total:.2f}s{Colors.ENDC}")
-    else:
-        logger.info(f"{Colors.FAIL}Fail. Time elapsed: {elapsed_total:.2f}s{Colors.ENDC}")
+    print(f"\n{'=' * 60}")
+    color = _Color.GREEN if all_passed else _Color.RED
+    print(f"{color}Summary: {passed_count}/{len(files)} passed  ({total_elapsed:.2f}s total){_Color.RESET}")
+    print("=" * 60)
+    for r in records:
+        icon = f"{_Color.GREEN}✓{_Color.RESET}" if r.passed else f"{_Color.RED}✗{_Color.RESET}"
+        print(f"  {icon} {r.name}  ({r.elapsed:.0f}s)")
+    print(flush=True)

-    # Print summary
-    logger.info(f"\n{'=' * 60}")
-    logger.info(f"Test Summary: {Colors.OKGREEN}{len(passed_tests)}/{len(files)} passed{Colors.ENDC}")
-    logger.info(f"{'=' * 60}")
-    if passed_tests:
-        logger.info(f"{Colors.OKGREEN}✓ PASSED:{Colors.ENDC}")
-        for test in passed_tests:
-            logger.info(f"  {test}")
-    if failed_tests:
-        logger.info(f"\n{Colors.FAIL}✗ FAILED:{Colors.ENDC}")
-        for test, reason in failed_tests:
-            logger.info(f"  {test} ({reason})")
-    logger.info(f"{'=' * 60}\n")
-
-    return 0 if success else -1
+    return (0 if all_passed else -1), records