diff --git a/.github/workflows/READMD.md b/.github/workflows/READMD.md index f5212953..20918947 100644 --- a/.github/workflows/READMD.md +++ b/.github/workflows/READMD.md @@ -47,8 +47,10 @@ To speed up CI execution, we support splitting large test suites into multiple p The partitioning algorithm uses a Greedy Approach to achieve load balancing, aiming to make the total estimated runtime of each partition as equal as possible. 1. **Read Configuration**: The script reads all non-skipped test cases and their `estimated_time` from `config.yaml`. -2. **Sort**: Test cases are sorted by `estimated_time` in descending order. +2. **Sort(Balanced Assignment)**: Test cases are sorted by `estimated_time` in descending order. This ensures that the heaviest tasks are distributed first to achieve optimal load balancing across partitions. 3. **Assign**: Iterating through the sorted test cases, each case is assigned to the partition (Bucket) with the current minimum total time. +4. **Re-sort (Fast Feedback)**: Within each partition, tests are re-sorted by `estimated_time` in ascending order. This allows the CI to cover as many test cases as possible in the early stages. + > TIP: If you need to prioritize a new test case, you can temporarily set its estimated_time to 0 to ensure it runs first, then update it to the actual value later. ### How to Modify Partitioning Logic diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index 8c6bc1f0..a6d16183 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -25,6 +25,7 @@ jobs: if: ${{ inputs.type == 'light' }} runs-on: linux-aarch64-a2b3-1 strategy: + fail-fast: false matrix: part: [0] container: @@ -89,6 +90,7 @@ jobs: if: ${{ inputs.type == 'full' }} runs-on: linux-aarch64-a2b3-1 strategy: + fail-fast: false matrix: part: [0, 1] container: @@ -153,6 +155,7 @@ jobs: if: ${{ inputs.type == 'light' }} runs-on: linux-aarch64-a3-2 strategy: + fail-fast: false matrix: part: [0] container: @@ -216,6 +219,7 @@ jobs: if: ${{ inputs.type == 'full' }} runs-on: linux-aarch64-a3-2 strategy: + fail-fast: false matrix: part: [0] container: @@ -287,6 +291,7 @@ jobs: if: ${{ inputs.type == 'full' }} runs-on: linux-aarch64-a3-4 strategy: + fail-fast: false matrix: part: [0] container: diff --git a/.github/workflows/scripts/run_suite.py b/.github/workflows/scripts/run_suite.py index ae90d8f5..bbd4d6dc 100644 --- a/.github/workflows/scripts/run_suite.py +++ b/.github/workflows/scripts/run_suite.py @@ -74,6 +74,7 @@ def auto_partition(files, rank, size): # Return the files corresponding to the indices in the specified rank's partition indices = partitions[rank] + indices.sort(key=lambda i: files[i].estimated_time) return [files[i] for i in indices] @@ -189,7 +190,7 @@ def main(): arg_parser.add_argument( "--continue-on-error", action="store_true", - default=False, + default=True, help="Continue running remaining tests even if one fails (useful for nightly tests)", ) args = arg_parser.parse_args()