From e35f304419825fbf92acf772bf8d7bbe4e43d167 Mon Sep 17 00:00:00 2001 From: Li Wang Date: Thu, 29 Jan 2026 20:28:10 +0800 Subject: [PATCH] [CI] Auto partition for test cases (#6379) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? This patch add auto-partition feat for tests, for example, before this pr, we are running e2e single card test for 2h40min, after the auto partition, test case is automatically allocated into the required n parts based on its test duration (greedy strategy) and run in parallel. The advantage of doing this is that our overall test duration will become 1/n of the original. ### Does this PR introduce _any_ user-facing change? Before: e2e single card test spend 2h40min After: e2e single card test spend 1h13min ### How was this patch tested? ```shell python .github/workflows/scripts/run_suite.py --auto-partition-size 2 --auto-partition-id 0 args=Namespace(timeout_per_file=2000, suite='e2e-singlecard', auto_partition_id=0, auto_partition_size=2, continue_on_error=False, enable_retry=False, max_attempts=2, retry_wait_seconds=60, retry_timeout_increase=600) +----------------+--------------------+ | Suite | Partition | |----------------+--------------------| | e2e-singlecard | 1/2 (0-based id=0) | +----------------+--------------------+ ✅ Enabled 13 test(s) (est total 4020.0s): - tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py (est_time=1800) - tests/e2e/singlecard/test_aclgraph_accuracy.py (est_time=480) - tests/e2e/singlecard/test_guided_decoding.py (est_time=354) - tests/e2e/singlecard/test_batch_invariant.py (est_time=320) - tests/e2e/singlecard/pooling/test_embedding.py (est_time=270) - tests/e2e/singlecard/test_quantization.py (est_time=200) - tests/e2e/singlecard/test_llama32_lora.py (est_time=162) - tests/e2e/singlecard/test_cpu_offloading.py (est_time=132) - tests/e2e/singlecard/pooling/test_classification.py (est_time=120) - tests/e2e/singlecard/test_camem.py (est_time=77) - tests/e2e/singlecard/compile/test_norm_quant_fusion.py (est_time=70) - tests/e2e/singlecard/test_auto_fit_max_mode_len.py (est_time=25) - tests/e2e/singlecard/test_profile_execute_duration.py (est_time=10) (base) wangli@Mac-mini vllm-ascend % python .github/workflows/scripts/run_suite.py --auto-partition-size 2 --auto-partition-id 1 args=Namespace(timeout_per_file=2000, suite='e2e-singlecard', auto_partition_id=1, auto_partition_size=2, continue_on_error=False, enable_retry=False, max_attempts=2, retry_wait_seconds=60, retry_timeout_increase=600) +----------------+--------------------+ | Suite | Partition | |----------------+--------------------| | e2e-singlecard | 2/2 (0-based id=1) | +----------------+--------------------+ ✅ Enabled 13 test(s) (est total 4025.0s): - tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py (est_time=1500) - tests/e2e/singlecard/pooling/test_scoring.py (est_time=500) - tests/e2e/singlecard/test_aclgraph_batch_invariant.py (est_time=410) - tests/e2e/singlecard/test_vlm.py (est_time=354) - tests/e2e/singlecard/test_models.py (est_time=300) - tests/e2e/singlecard/test_multistream_overlap_shared_expert.py (est_time=200) - tests/e2e/singlecard/test_sampler.py (est_time=200) - tests/e2e/singlecard/test_async_scheduling.py (est_time=150) - tests/e2e/singlecard/test_aclgraph_mem.py (est_time=130) - tests/e2e/singlecard/test_ilama_lora.py (est_time=95) - tests/e2e/singlecard/test_completion_with_prompt_embeds.py (est_time=76) - tests/e2e/singlecard/test_qwen3_multi_loras.py (est_time=65) - tests/e2e/singlecard/test_xlite.py (est_time=45) ``` - vLLM version: v0.14.1 - vLLM main: https://github.com/vllm-project/vllm/commit/dc917cceb877dfd13f98c538c4c96158047d98bd --------- Signed-off-by: wangli --- .github/workflows/READMD.md | 83 ++++++ .github/workflows/_e2e_test.yaml | 398 +++++++++++++------------ .github/workflows/pr_test_full.yaml | 2 +- .github/workflows/scripts/ci_utils.py | 101 +++++++ .github/workflows/scripts/config.yaml | 152 ++++++++++ .github/workflows/scripts/run_suite.py | 244 +++++++++++++++ .pre-commit-config.yaml | 1 + 7 files changed, 797 insertions(+), 184 deletions(-) create mode 100644 .github/workflows/READMD.md create mode 100644 .github/workflows/scripts/ci_utils.py create mode 100644 .github/workflows/scripts/config.yaml create mode 100644 .github/workflows/scripts/run_suite.py diff --git a/.github/workflows/READMD.md b/.github/workflows/READMD.md new file mode 100644 index 00000000..f5212953 --- /dev/null +++ b/.github/workflows/READMD.md @@ -0,0 +1,83 @@ +# E2E Test Workflow Guide + +This document provides a guide on how to manage and extend the E2E test suite for `vllm-ascend`. It covers how to add new test cases and understand the automatic partitioning mechanism. + +## 1. Adding a New Test Case + +All E2E test cases are defined and managed in the `.github/workflows/scripts/config.yaml` file. + +### Steps + +1. **Prepare the Test Script**: Ensure your test script (`.py` file) is placed in the appropriate location under the `tests/e2e/` directory (e.g., `tests/e2e/singlecard/` or `tests/e2e/multicard/`). + +2. **Modify `config.yaml`**: + Open `.github/workflows/scripts/config.yaml` and locate the corresponding test suite (e.g., `e2e-singlecard` or `e2e-multicard-2-cards`). + +3. **Add Configuration Entry**: + Add a new entry under the corresponding list. Each entry contains the following fields: + * `name`: The relative path to the test file. If you only need to run a specific test function within the file, use `::` as a separator, e.g., `path/to/test.py::test_func`. + * `estimated_time`: The estimated time (in seconds) required to run the test. **This field is crucial** as it is used for automatic load balancing (partitioning). + * `is_skipped` (Optional): If set to `true`, the test will be skipped. + +### Example + +Suppose you want to add a new test named `tests/e2e/singlecard/test_new_feature.py` with an estimated runtime of 120 seconds: + +```yaml +suites: + e2e-singlecard: + # ... other existing tests ... + - name: tests/e2e/singlecard/test_new_feature.py + estimated_time: 120 +``` + +To add a specific test function: + +```yaml + - name: tests/e2e/singlecard/test_new_feature.py::test_specific_case + estimated_time: 60 +``` + +## 2. Automatic Partitioning Mechanism + +To speed up CI execution, we support splitting large test suites into multiple parallel Jobs (partitions). The partitioning logic is primarily implemented in the `auto_partition` function in `.github/workflows/scripts/run_suite.py`. + +### Principle + +The partitioning algorithm uses a Greedy Approach to achieve load balancing, aiming to make the total estimated runtime of each partition as equal as possible. + +1. **Read Configuration**: The script reads all non-skipped test cases and their `estimated_time` from `config.yaml`. +2. **Sort**: Test cases are sorted by `estimated_time` in descending order. +3. **Assign**: Iterating through the sorted test cases, each case is assigned to the partition (Bucket) with the current minimum total time. + +### How to Modify Partitioning Logic + +If you need to adjust the partitioning strategy, please modify the `.github/workflows/scripts/run_suite.py` file. + +* **Algorithm Location**: `auto_partition` function. +* **Input Parameters**: + * `files`: List of test files (including `estimated_time`). + * `rank`: Index of the current partition (0 to size-1). + * `size`: Total number of partitions. +* **Invocation**: + CI workflows (e.g., `.github/workflows/_e2e_test.yaml`) call the script via command-line arguments: + ```bash + python3 .github/workflows/scripts/run_suite.py --suite --auto-partition-id --auto-partition-size + ``` + +### Notes + +* **Accurate Estimated Time**: To achieve the best load balancing, please provide an accurate `estimated_time` in `config.yaml`. If a new test is very time-consuming but the estimated time is set too low, it may cause a specific partition to timeout. +* **Number of Partitions**: The number of partitions (`auto-partition-size`) is typically defined in the `strategy.matrix` of the GitHub Actions workflow definition file (e.g., `_e2e_test.yaml`). + +## 3. Running Tests Locally + +You can use the `run_suite.py` script to run test suites locally: + +```bash +# Run the full e2e-singlecard suite +python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard + +# Simulate partitioned execution (e.g., partition 0 of 2) +python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard --auto-partition-id 0 --auto-partition-size 2 +``` diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index a0c81b2e..8c6bc1f0 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -20,9 +20,13 @@ on: type: boolean jobs: - e2e: - name: singlecard + e2e-light: + name: singlecard-light + if: ${{ inputs.type == 'light' }} runs-on: linux-aarch64-a2b3-1 + strategy: + matrix: + part: [0] container: image: ${{ inputs.image }} env: @@ -30,6 +34,8 @@ jobs: VLLM_USE_MODELSCOPE: True HF_HUB_OFFLINE: 1 steps: + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 - name: Check npu and CANN info run: | npu-smi info @@ -43,9 +49,6 @@ jobs: apt-get update -y apt install git -y - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v6 - - name: Install system dependencies run: | apt-get -y install `cat packages.txt` @@ -78,67 +81,26 @@ jobs: env: PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 VLLM_WORKER_MULTIPROC_METHOD: spawn - if: ${{ inputs.type == 'light' }} run: | - pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py::test_piecewise_res_consistency - pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py::test_qwen3_w8a8_quant + python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 - - name: Run e2e test - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 - if: ${{ inputs.type == 'full' }} - run: | - # We found that if running aclgraph tests in batch, it will cause AclmdlRICaptureBegin error. So we run - # the test separately. - # basic - pytest -sv --durations=0 tests/e2e/singlecard/test_auto_fit_max_mode_len.py - pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_accuracy.py - pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_batch_invariant.py - pytest -sv --durations=0 tests/e2e/singlecard/test_aclgraph_mem.py - pytest -sv --durations=0 tests/e2e/singlecard/test_async_scheduling.py - pytest -sv --durations=0 tests/e2e/singlecard/test_batch_invariant.py - pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py - pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py - pytest -sv --durations=0 tests/e2e/singlecard/test_cpu_offloading.py - pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py - pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py - pytest -sv --durations=0 tests/e2e/singlecard/test_llama32_lora.py - pytest -sv --durations=0 tests/e2e/singlecard/test_qwen3_multi_loras.py - pytest -sv --durations=0 tests/e2e/singlecard/test_models.py - pytest -sv --durations=0 tests/e2e/singlecard/test_multistream_overlap_shared_expert.py - pytest -sv --durations=0 tests/e2e/singlecard/test_profile_execute_duration.py - pytest -sv --durations=0 tests/e2e/singlecard/test_quantization.py - pytest -sv --durations=0 tests/e2e/singlecard/test_sampler.py - pytest -sv --durations=0 tests/e2e/singlecard/test_vlm.py - pytest -sv --durations=0 tests/e2e/singlecard/test_xlite.py - - # compile - pytest -sv --durations=0 tests/e2e/singlecard/compile/test_norm_quant_fusion.py - - # model_runner_v2 - # pytest -sv --durations=0 tests/e2e/singlecard/model_runner_v2/test_basic.py - - # pooling - pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_classification.py - pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_embedding.py - pytest -sv --durations=0 tests/e2e/singlecard/pooling/test_scoring.py - - # spec_decode - pytest -sv --durations=0 tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py - pytest -sv --durations=0 tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py - - e2e-2-cards: - name: multicard-2 - runs-on: linux-aarch64-a3-2 + e2e-full: + name: singlecard-full + if: ${{ inputs.type == 'full' }} + runs-on: linux-aarch64-a2b3-1 + strategy: + matrix: + part: [0, 1] container: - image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 + image: ${{ inputs.image }} env: VLLM_LOGGING_LEVEL: ERROR VLLM_USE_MODELSCOPE: True - HCCL_BUFFSIZE: 1024 HF_HUB_OFFLINE: 1 steps: + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 + - name: Check npu and CANN info run: | npu-smi info @@ -152,8 +114,202 @@ jobs: apt-get update -y apt install git -y + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev clang-15 + + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v6 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm }} + path: ./vllm-empty + fetch-depth: 1 + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + env: + PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi + run: | + pip install -r requirements-dev.txt + pip install -v -e . + - name: Run e2e test + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256 + run: | + python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard --auto-partition-id ${{ matrix.part }} --auto-partition-size 2 + + e2e-2-cards-light: + name: multicard-2-light + if: ${{ inputs.type == 'light' }} + runs-on: linux-aarch64-a3-2 + strategy: + matrix: + part: [0] + container: + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + HCCL_BUFFSIZE: 1024 + HF_HUB_OFFLINE: 1 + steps: - name: Checkout vllm-project/vllm-ascend repo uses: actions/checkout@v6 + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y + + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev clang-15 + + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v6 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm }} + path: ./vllm-empty + fetch-depth: 1 + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + env: + PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi + run: | + pip install -r requirements-dev.txt + pip install -v -e . + - name: Run vllm-project/vllm-ascend test (light) + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + run: | + python3 .github/workflows/scripts/run_suite.py --suite e2e-2card-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 + + e2e-2-cards-full: + name: multicard-2-full + if: ${{ inputs.type == 'full' }} + runs-on: linux-aarch64-a3-2 + strategy: + matrix: + part: [0] + container: + image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.0-a3-ubuntu22.04-py3.11 + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + HCCL_BUFFSIZE: 1024 + HF_HUB_OFFLINE: 1 + steps: + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y + + - name: Install system dependencies + run: | + apt-get -y install `cat packages.txt` + apt-get -y install gcc g++ cmake libnuma-dev clang-15 + + update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 + update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 + + - name: Checkout vllm-project/vllm repo + uses: actions/checkout@v6 + with: + repository: vllm-project/vllm + ref: ${{ inputs.vllm }} + path: ./vllm-empty + fetch-depth: 1 + + - name: Install vllm-project/vllm from source + working-directory: ./vllm-empty + run: | + VLLM_TARGET_DEVICE=empty pip install -e . + + - name: Install vllm-project/vllm-ascend + env: + PIP_EXTRA_INDEX_URL: https://mirrors.huaweicloud.com/ascend/repos/pypi + run: | + pip install -r requirements-dev.txt + pip install -v -e . + - name: Run vllm-project/vllm-ascend test (full) + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + run: | + python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-2-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 + + - name: Run vllm-project/vllm-ascend test (non triton) + if: ${{ inputs.type == 'full' && matrix.part == 0 }} + env: + VLLM_WORKER_MULTIPROC_METHOD: spawn + run: | + python3 -m pip uninstall -y triton-ascend + pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py + + e2e-4-cards-full: + name: multicard-4-full + if: ${{ inputs.type == 'full' }} + runs-on: linux-aarch64-a3-4 + strategy: + matrix: + part: [0] + container: + image: m.daocloud.io/quay.io/ascend/cann:8.5.0-a3-ubuntu22.04-py3.11 + env: + VLLM_LOGGING_LEVEL: ERROR + VLLM_USE_MODELSCOPE: True + HF_HUB_OFFLINE: 1 + steps: + - name: Checkout vllm-project/vllm-ascend repo + uses: actions/checkout@v6 + - name: Check npu and CANN info + run: | + npu-smi info + cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info + + - name: Config mirrors + run: | + sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list + pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple + pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local + apt-get update -y + apt install git -y - name: Install system dependencies run: | @@ -183,135 +339,11 @@ jobs: pip install -r requirements-dev.txt pip install -v -e . - - name: Run vllm-project/vllm-ascend test (light) - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - if: ${{ inputs.type == 'light' }} - run: | - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek3_2_w8a8_pruning_mtp_tp2_ep - - - name: Run vllm-project/vllm-ascend test (full) - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - if: ${{ inputs.type == 'full' }} - run: | - # this test fail with triton. Fix me. - # pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_qwen3_performance.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_data_parallel.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_expert_parallel.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_external_launcher.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_full_graph_mode.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py - - # To avoid oom, we need to run the test in a single process. - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_multistream_moe_tp2 - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_w4a8_dynamic_tp2 - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_moe_sp_tp2 - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_w4a8_accuracy_tp2 - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_moe_fc2_tp2 - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_v2_lite_fc1_tp2 - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_dense_fc1_tp2 - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_dense_prefetch_mlp_weight_tp2 - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek3_2_w8a8_pruning_mtp_tp2_ep - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_w4a4_distributed_tp2 - - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_offline_weight_load.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_pipeline_parallel.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_prefix_caching.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_quantization.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_qwen3_moe.py - # This test is broken, fix me - #pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_shared_expert_dp.py - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_single_request_aclgraph.py - - - name: Run vllm-project/vllm-ascend test (non triton) - if: ${{ inputs.type == 'full' }} - env: - VLLM_WORKER_MULTIPROC_METHOD: spawn - run: | - python3 -m pip uninstall -y triton-ascend - pytest -sv --durations=0 tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py - - e2e-4-cards: - name: multicard-4 - needs: [e2e-2-cards] - if: ${{ needs.e2e-2-cards.result == 'success' && inputs.type == 'full' }} - runs-on: linux-aarch64-a3-4 - container: - image: m.daocloud.io/quay.io/ascend/cann:8.5.0-a3-ubuntu22.04-py3.11 - env: - VLLM_LOGGING_LEVEL: ERROR - VLLM_USE_MODELSCOPE: True - HF_HUB_OFFLINE: 1 - steps: - - name: Check npu and CANN info - run: | - npu-smi info - cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info - - - name: Config mirrors - run: | - sed -Ei 's@(ports|archive).ubuntu.com@cache-service.nginx-pypi-cache.svc.cluster.local:8081@g' /etc/apt/sources.list - pip config set global.index-url http://cache-service.nginx-pypi-cache.svc.cluster.local/pypi/simple - pip config set global.trusted-host cache-service.nginx-pypi-cache.svc.cluster.local - apt-get update -y - apt install git wget curl -y - git config --global url."https://gh-proxy.test.osinfra.cn/https://github.com/".insteadOf https://github.com/ - - - name: Checkout vllm-project/vllm-ascend repo - uses: actions/checkout@v6 - with: - path: ./vllm-ascend - - - name: Install system dependencies - run: | - apt-get -y install `cat packages.txt` - apt-get -y install gcc g++ cmake libnuma-dev clang-15 - - update-alternatives --install /usr/bin/clang clang /usr/bin/clang-15 20 - update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-15 20 - - - name: Checkout vllm-project/vllm repo - uses: actions/checkout@v6 - with: - repository: vllm-project/vllm - ref: ${{ inputs.vllm }} - path: ./vllm-empty - - - name: Install vllm-project/vllm from source - working-directory: ./vllm-empty - run: | - VLLM_TARGET_DEVICE=empty pip install -e . - - - name: Install vllm-project/vllm-ascend - working-directory: ./vllm-ascend - run: | - export PIP_EXTRA_INDEX_URL=https://mirrors.huaweicloud.com/ascend/repos/pypi - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/Ascend/ascend-toolkit/latest/x86_64-linux/devlib - pip install -r requirements-dev.txt - pip install -v -e . - - name: Run vllm-project/vllm-ascend test for V1 Engine - working-directory: ./vllm-ascend env: VLLM_WORKER_MULTIPROC_METHOD: spawn run: | - pytest -sv --durations=0 tests/e2e/multicard/4-cards/test_data_parallel_tp2.py - pytest -sv --durations=0 tests/e2e/multicard/4-cards/test_kimi_k2.py - pytest -sv --durations=0 tests/e2e/multicard/4-cards/test_qwen3_next.py - - # recover once aclgraph stream bug fixed. - # long_sequence - # pytest -sv --durations=0 tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py - # pytest -sv --durations=0 tests/e2e/multicard/4-cards/long_sequence/test_basic.py - # pytest -sv --durations=0 tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py - # pytest -sv --durations=0 tests/e2e/multicard/4-cards/long_sequence/test_mtp.py - - # # spec_decode - # pytest -sv --durations=0 tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py + python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-4-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1 e2e_310p: name: 310p singlecard diff --git a/.github/workflows/pr_test_full.yaml b/.github/workflows/pr_test_full.yaml index 5937d221..740d2cb1 100644 --- a/.github/workflows/pr_test_full.yaml +++ b/.github/workflows/pr_test_full.yaml @@ -77,7 +77,7 @@ jobs: matrix: vllm_version: [dc917cceb877dfd13f98c538c4c96158047d98bd, v0.14.1] needs: [changes] - if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} + if: ${{ needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.e2e_tracker == true }} uses: ./.github/workflows/_e2e_test.yaml with: vllm: ${{ matrix.vllm_version }} diff --git a/.github/workflows/scripts/ci_utils.py b/.github/workflows/scripts/ci_utils.py new file mode 100644 index 00000000..c1bc4cbf --- /dev/null +++ b/.github/workflows/scripts/ci_utils.py @@ -0,0 +1,101 @@ +import logging +import os +import subprocess +import time +from dataclasses import dataclass + +# Configure logger to output to stdout +logging.basicConfig(level=logging.INFO, format="%(message)s") +logger = logging.getLogger(__name__) + + +class Colors: + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + WARNING = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + + +@dataclass +class TestFile: + name: str + estimated_time: float = 60 + is_skipped: bool = False + + +def run_e2e_files( + files: list[TestFile], + continue_on_error: bool = False, +): + """ + Run a list of test files. + + Args: + files: List of TestFile objects to run + continue_on_error: If True, continue running remaining tests even if one fails. + If False, stop at first failure (default behavior for PR tests). + """ + tic = time.perf_counter() + success = True + passed_tests = [] + failed_tests = [] + + for i, file in enumerate(files): + filename, estimated_time = file.name, file.estimated_time + + full_path = os.path.join(os.getcwd(), filename) + logger.info(f".\n.\n{Colors.HEADER}Begin ({i}/{len(files)}):{Colors.ENDC}\npytest -sv {full_path}\n.\n.\n") + file_tic = time.perf_counter() + + process = subprocess.Popen( + ["pytest", "-sv", "--durations=0", "--color=yes", full_path], + stdout=None, + stderr=None, + env=os.environ, + ) + process.wait() + + elapsed = time.perf_counter() - file_tic + ret_code = process.returncode + + logger.info( + f".\n.\n{Colors.HEADER}End ({i}/{len(files)}):{Colors.ENDC}\n{filename=}, \ + {elapsed=:.0f}, {estimated_time=}\n.\n.\n" + ) + + if ret_code == 0: + passed_tests.append(filename) + else: + logger.info(f"\n{Colors.FAIL}✗ FAILED: {filename} returned exit code {ret_code}{Colors.ENDC}\n") + failed_tests.append((filename, f"exit code {ret_code}")) + success = False + if not continue_on_error: + break + + elapsed_total = time.perf_counter() - tic + + if success: + logger.info(f"{Colors.OKGREEN}Success. Time elapsed: {elapsed_total:.2f}s{Colors.ENDC}") + else: + logger.info(f"{Colors.FAIL}Fail. Time elapsed: {elapsed_total:.2f}s{Colors.ENDC}") + + # Print summary + logger.info(f"\n{'=' * 60}") + logger.info(f"Test Summary: {Colors.OKGREEN}{len(passed_tests)}/{len(files)} passed{Colors.ENDC}") + logger.info(f"{'=' * 60}") + if passed_tests: + logger.info(f"{Colors.OKGREEN}✓ PASSED:{Colors.ENDC}") + for test in passed_tests: + logger.info(f" {test}") + if failed_tests: + logger.info(f"\n{Colors.FAIL}✗ FAILED:{Colors.ENDC}") + for test, reason in failed_tests: + logger.info(f" {test} ({reason})") + logger.info(f"{'=' * 60}\n") + + return 0 if success else -1 diff --git a/.github/workflows/scripts/config.yaml b/.github/workflows/scripts/config.yaml new file mode 100644 index 00000000..91d180c0 --- /dev/null +++ b/.github/workflows/scripts/config.yaml @@ -0,0 +1,152 @@ +e2e-singlecard: + - name: tests/e2e/singlecard/test_auto_fit_max_mode_len.py + estimated_time: 25 + - name: tests/e2e/singlecard/test_aclgraph_accuracy.py + estimated_time: 480 + - name: tests/e2e/singlecard/test_aclgraph_batch_invariant.py + estimated_time: 410 + - name: tests/e2e/singlecard/test_aclgraph_mem.py + estimated_time: 130 + - name: tests/e2e/singlecard/test_async_scheduling.py + estimated_time: 150 + - name: tests/e2e/singlecard/test_batch_invariant.py + estimated_time: 320 + - name: tests/e2e/singlecard/test_camem.py + estimated_time: 77 + - name: tests/e2e/singlecard/test_completion_with_prompt_embeds.py + estimated_time: 76 + - name: tests/e2e/singlecard/test_cpu_offloading.py + estimated_time: 132 + - name: tests/e2e/singlecard/test_guided_decoding.py + estimated_time: 354 + - name: tests/e2e/singlecard/test_ilama_lora.py + estimated_time: 95 + - name: tests/e2e/singlecard/test_llama32_lora.py + estimated_time: 162 + - name: tests/e2e/singlecard/test_qwen3_multi_loras.py + estimated_time: 65 + - name: tests/e2e/singlecard/test_models.py + estimated_time: 300 + - name: tests/e2e/singlecard/test_multistream_overlap_shared_expert.py + estimated_time: 200 + - name: tests/e2e/singlecard/test_profile_execute_duration.py + estimated_time: 10 + - name: tests/e2e/singlecard/test_quantization.py + estimated_time: 200 + - name: tests/e2e/singlecard/test_sampler.py + estimated_time: 200 + - name: tests/e2e/singlecard/test_vlm.py + estimated_time: 354 + - name: tests/e2e/singlecard/test_xlite.py + estimated_time: 45 + - name: tests/e2e/singlecard/compile/test_norm_quant_fusion.py + estimated_time: 70 + - name: tests/e2e/singlecard/pooling/test_classification.py + estimated_time: 120 + - name: tests/e2e/singlecard/pooling/test_embedding.py + estimated_time: 270 + - name: tests/e2e/singlecard/pooling/test_scoring.py + estimated_time: 500 + - name: tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py + estimated_time: 1500 + - name: tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py + estimated_time: 1800 + - name: tests/e2e/singlecard/model_runner_v2/test_basic.py + estimated_time: 80 + is_skipped: true + +e2e-singlecard-light: + - name: tests/e2e/singlecard/test_aclgraph_accuracy.py::test_piecewise_res_consistency + estimated_time: 220 + - name: tests/e2e/singlecard/test_quantization.py::test_qwen3_w8a8_quant + estimated_time: 90 + +e2e-2card-light: + - name: tests/e2e/multicard/2-cards/test_qwen3_moe.py::test_qwen3_moe_distributed_mp_tp2_ep + estimated_time: 220 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek3_2_w8a8_pruning_mtp_tp2_ep + estimated_time: 90 + +e2e-multicard-2-cards: + # TODO: recover skipped tests + - name: tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py + estimated_time: 0 + is_skipped: true + - name: tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py + estimated_time: 0 + is_skipped: true + - name: tests/e2e/multicard/2-cards/test_offline_weight_load.py + estimated_time: 0 + is_skipped: true + - name: tests/e2e/multicard/2-cards/test_shared_expert_dp.py + estimated_time: 0 + is_skipped: true + - name: tests/e2e/multicard/2-cards/test_qwen3_performance.py + estimated_time: 180 + - name: tests/e2e/multicard/2-cards/test_data_parallel.py + estimated_time: 380 + - name: tests/e2e/multicard/2-cards/test_expert_parallel.py + estimated_time: 170 + - name: tests/e2e/multicard/2-cards/test_external_launcher.py + estimated_time: 300 + - name: tests/e2e/multicard/2-cards/test_full_graph_mode.py + estimated_time: 400 + - name: tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py + estimated_time: 60 + # Run the test in a separate step to avoid oom + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_multistream_moe_tp2 + estimated_time: 100 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_w4a8_dynamic_tp2 + estimated_time: 80 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_moe_sp_tp2 + estimated_time: 132 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_w4a8_accuracy_tp2 + estimated_time: 132 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_moe_fc2_tp2 + estimated_time: 140 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek_v2_lite_fc1_tp2 + estimated_time: 82 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_dense_fc1_tp2 + estimated_time: 73 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_dense_prefetch_mlp_weight_tp2 + estimated_time: 71 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_deepseek3_2_w8a8_pruning_mtp_tp2_ep + estimated_time: 111 + - name: tests/e2e/multicard/2-cards/test_offline_inference_distributed.py::test_qwen3_w4a4_distributed_tp2 + estimated_time: 180 + - name: tests/e2e/multicard/2-cards/test_pipeline_parallel.py + estimated_time: 270 + - name: tests/e2e/multicard/2-cards/test_prefix_caching.py + estimated_time: 430 + - name: tests/e2e/multicard/2-cards/test_quantization.py + estimated_time: 70 + - name: tests/e2e/multicard/2-cards/test_qwen3_moe.py + estimated_time: 1050 + - name: tests/e2e/multicard/2-cards/test_single_request_aclgraph.py + estimated_time: 215 + +e2e-multicard-4-cards: + # TODO: recover skipped tests + - name: tests/e2e/multicard/4-cards/test_qwen3_next.py + estimated_time: 1250 + - name: tests/e2e/multicard/4-cards/test_data_parallel_tp2.py + estimated_time: 60 + is_skipped: true + - name: tests/e2e/multicard/4-cards/test_kimi_k2.py + estimated_time: 100 + is_skipped: true + - name: tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py + estimated_time: 60 + is_skipped: true + - name: tests/e2e/multicard/4-cards/long_sequence/test_basic.py + estimated_time: 60 + is_skipped: true + - name: tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill.py + estimated_time: 60 + is_skipped: true + - name: tests/e2e/multicard/4-cards/long_sequence/test_mtp.py + estimated_time: 60 + is_skipped: true + - name: tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py + estimated_time: 60 + is_skipped: true diff --git a/.github/workflows/scripts/run_suite.py b/.github/workflows/scripts/run_suite.py new file mode 100644 index 00000000..ae90d8f5 --- /dev/null +++ b/.github/workflows/scripts/run_suite.py @@ -0,0 +1,244 @@ +import argparse +import os +from pathlib import Path + +import tabulate +import yaml +from ci_utils import TestFile, run_e2e_files + + +def load_suites_from_config(config_path: str = "config.yaml") -> dict[str, list[TestFile]]: + # Get absolute path relative to this script + script_dir = Path(__file__).parent + abs_config_path = script_dir / config_path + + with open(abs_config_path) as f: + suites_data = yaml.safe_load(f) + + suites = {} + + for suite_name, test_files in suites_data.items(): + suites[suite_name] = [] + for file_data in test_files: + name = file_data.get("name") + estimated_time = file_data.get("estimated_time", 60) + is_skipped = file_data.get("is_skipped", False) + suites[suite_name].append(TestFile(name, estimated_time, is_skipped)) + + return suites + + +suites = load_suites_from_config() + + +def auto_partition(files, rank, size): + """ + Partition files into size sublists with approximately equal sums of estimated times + using stable sorting, and return the partition for the specified rank. + + Args: + files (list): List of file objects with estimated_time attribute + rank (int): Index of the partition to return (0 to size-1) + size (int): Number of partitions + + Returns: + list: List of file objects in the specified rank's partition + """ + # Filter out skipped files + files = [f for f in files if not f.is_skipped] + weights = [f.estimated_time for f in files] + + if not weights or size <= 0 or size > len(weights): + return [] + + # Create list of (weight, original_index) tuples + # Using negative index as secondary key to maintain original order for equal weights + indexed_weights = [(w, -i) for i, w in enumerate(weights)] + # Stable sort in descending order by weight + # If weights are equal, larger (negative) index comes first (i.e., earlier original position) + indexed_weights = sorted(indexed_weights, reverse=True) + + # Extract original indices (negate back to positive) + indexed_weights = [(w, -i) for w, i in indexed_weights] + + # Initialize partitions and their sums + partitions = [[] for _ in range(size)] + sums = [0.0] * size + + # Greedy approach: assign each weight to partition with smallest current sum + for weight, idx in indexed_weights: + # Find partition with minimum sum + min_sum_idx = sums.index(min(sums)) + partitions[min_sum_idx].append(idx) + sums[min_sum_idx] += weight + + # Return the files corresponding to the indices in the specified rank's partition + indices = partitions[rank] + return [files[i] for i in indices] + + +def _get_disk_covered_dirs(all_suite_files: set[str], project_root: Path | str) -> list[str]: + covered_dirs = set() + for file_path in all_suite_files: + # e.g. tests/e2e/singlecard/test_foo.py -> tests/e2e/singlecard + parent_dir = (project_root / file_path).parent if os.path.isfile(file_path) else (project_root / file_path) + if parent_dir.exists(): + # Store relative path to project root + try: + rel_dir = parent_dir.relative_to(project_root) + + # Check if this directory is already covered by a parent directory + is_covered = False + for existing_dir in list(covered_dirs): + # If existing_dir is a parent of rel_dir, rel_dir is already covered + if existing_dir in rel_dir.parents or existing_dir == rel_dir: + is_covered = True + break + # If rel_dir is a parent of existing_dir, replace existing_dir with rel_dir + elif rel_dir in existing_dir.parents: + covered_dirs.remove(existing_dir) + # We continue checking other existing_dirs, but we know rel_dir should be added + # unless another parent covers it (which is handled by the first if block logic effectively + # but we need to be careful with modification during iteration, so we use list copy) + + if not is_covered: + covered_dirs.add(rel_dir) + + except ValueError: + pass + return covered_dirs + + +def _sanity_check_suites(suites: dict[str, list[TestFile]]): + """ + Check if all test files defined in the suites exist on disk. + """ + # 1. Collect all test files defined in all suites + all_suite_files = set() + for suite in suites.values(): + for test_file in suite: + # Handle ::test_case syntax + file_path = test_file.name.split("::")[0] + all_suite_files.add(file_path) + + # 2. Identify all directories covered by the suites + project_root = Path.cwd() + if not (project_root / "tests").exists(): + script_dir = Path(__file__).parent + # .github/workflows/scripts -> ../../../ -> root + project_root = script_dir.parents[2] + # For now, we only check dirs under [tests/e2e/singlecard, tests/e2e/multicard] + covered_dirs = _get_disk_covered_dirs(all_suite_files, project_root) + + # 3. Scan disk for all test_*.py files in these directories + all_disk_files = set() + for dir_path in covered_dirs: + full_dir_path = project_root / dir_path + # rglob is equivalent to glob('**/' + pattern) + for py_file in full_dir_path.rglob("test_*.py"): + try: + rel_path = py_file.relative_to(project_root) + all_disk_files.add(str(rel_path)) + except ValueError: + pass + + # 4. Find files on disk but missing from ANY suite + # We check if a disk file is present in 'all_suite_files' (union of all suites) + missing_files = sorted(list(all_disk_files - all_suite_files)) + + missing_text = "\n".join(f'TestFile("{x}"),' for x in missing_files) + + if missing_files: + assert len(missing_files) == 0, ( + f"Some test files found on disk in covered directories are not in ANY test suite.\n" + f"Scanned directories: {sorted([str(d) for d in covered_dirs])}\n" + f"Missing files:\n" + f"{missing_text}\n" + f"If this is intentional, please label them as 'is_skipped=True' and add them to the test suite." + ) + + # 5. check if all files in suites exist on disk + non_existent_files = sorted(list(all_suite_files - all_disk_files)) + non_existent_text = "\n".join(f'TestFile("{x}"),' for x in non_existent_files) + assert len(non_existent_files) == 0, ( + f"Some test files in test suite do not exist on disk:\n" + f"{non_existent_text}\n" + f"Please check if the test files are correctly specified in the local repository." + ) + + +def main(): + arg_parser = argparse.ArgumentParser() + arg_parser.add_argument( + "--suite", + type=str, + default=list(suites.keys())[0], + choices=list(suites.keys()) + ["all"], + help="The suite to run", + ) + arg_parser.add_argument( + "--auto-partition-id", + type=int, + help="Use auto load balancing. The part id.", + ) + arg_parser.add_argument( + "--auto-partition-size", + type=int, + help="Use auto load balancing. The number of parts.", + ) + arg_parser.add_argument( + "--continue-on-error", + action="store_true", + default=False, + help="Continue running remaining tests even if one fails (useful for nightly tests)", + ) + args = arg_parser.parse_args() + print(f"{args=}") + + _sanity_check_suites(suites) + files = suites[args.suite] + + files_disabled = [f for f in files if f.is_skipped] + + if args.auto_partition_size: + files = auto_partition(files, args.auto_partition_id, args.auto_partition_size) + + # Print test info at beginning (similar to test/run_suite.py pretty_print_tests) + if args.auto_partition_size: + partition_info = ( + f"{args.auto_partition_id + 1}/{args.auto_partition_size} (0-based id={args.auto_partition_id})" + ) + else: + partition_info = "full" + + headers = ["Suite", "Partition"] + rows = [[args.suite, partition_info]] + msg = tabulate.tabulate(rows, headers=headers, tablefmt="psql") + "\n" + + total_est_time = sum(f.estimated_time for f in files) + msg += f"✅ Enabled {len(files)} test(s) (est total {total_est_time:.1f}s):\n" + for f in files: + msg += f" - {f.name} (est_time={f.estimated_time})\n" + msg += f"\n❌ Disabled {len(files_disabled)} test(s)(Please consider to recover them):\n" + for f in files_disabled: + msg += f" - {f.name} (est_time={f.estimated_time})\n" + + print(msg, flush=True) + + exit_code = run_e2e_files( + files, + continue_on_error=args.continue_on_error, + ) + + # Print tests again at the end for visibility + msg = "\n" + tabulate.tabulate(rows, headers=headers, tablefmt="psql") + "\n" + msg += f"✅ Executed {len(files)} test(s) (est total {total_est_time:.1f}s):\n" + for f in files: + msg += f" - {f.name} (est_time={f.estimated_time})\n" + print(msg, flush=True) + + exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3ccc1806..723cde5b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -47,6 +47,7 @@ repos: rev: v1.7.7 hooks: - id: actionlint + exclude: '.*\.github/workflows/scripts/.*\.ya?ml$' - repo: local hooks: - id: png-lint