[CI] Enable auto upgrade e2e estimated time for auto-partition suites (#6840)
### What this PR does / why we need it? This patch add a schedule triggered workflow for auto upgrade e2e estimated-time for batter load balance 1. The workflow will run the full e2e test to get the duration of each test. 2. The script `update_estimated_time.py` will upgrade the [config.json](https://github.com/vllm-project/vllm-ascend/blob/main/.github/workflows/scripts/config.yaml) according to the latest time 3. The workflow will submit a pull request that includes changes to `config.json` automatically <img width="2484" height="764" alt="image" src="https://github.com/user-attachments/assets/02f3459c-bb3b-4f8e-9966-8bb2e5c1bbea" /> ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main:83b47f67b1- ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main:83b47f67b1--------- Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
123
.github/workflows/_e2e_test.yaml
vendored
123
.github/workflows/_e2e_test.yaml
vendored
@@ -15,6 +15,10 @@ on:
|
||||
contains_310:
|
||||
required: true
|
||||
type: boolean
|
||||
continue_on_error:
|
||||
required: false
|
||||
type: boolean
|
||||
default: false
|
||||
|
||||
jobs:
|
||||
e2e-light:
|
||||
@@ -80,7 +84,29 @@ jobs:
|
||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
run: |
|
||||
python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
|
||||
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-singlecard-light \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 1 \
|
||||
--auto-upgrade-estimated-times \
|
||||
--continue-on-error
|
||||
else
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-singlecard-light \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 1
|
||||
fi
|
||||
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
with:
|
||||
name: timing-data-singlecard-light-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
if-no-files-found: warn
|
||||
retention-days: 5
|
||||
|
||||
e2e-full:
|
||||
name: singlecard-full
|
||||
@@ -146,7 +172,28 @@ jobs:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
PYTORCH_NPU_ALLOC_CONF: max_split_size_mb:256
|
||||
run: |
|
||||
python3 .github/workflows/scripts/run_suite.py --suite e2e-singlecard --auto-partition-id ${{ matrix.part }} --auto-partition-size 2
|
||||
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-singlecard \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 2 \
|
||||
--auto-upgrade-estimated-times \
|
||||
--continue-on-error
|
||||
else
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-singlecard \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 2
|
||||
fi
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
with:
|
||||
name: timing-data-singlecard-full-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
if-no-files-found: warn
|
||||
retention-days: 5
|
||||
|
||||
e2e-2-cards-light:
|
||||
name: multicard-2-light
|
||||
@@ -210,7 +257,29 @@ jobs:
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
run: |
|
||||
python3 .github/workflows/scripts/run_suite.py --suite e2e-2card-light --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
|
||||
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-2card-light \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 1 \
|
||||
--auto-upgrade-estimated-times \
|
||||
--continue-on-error
|
||||
else
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-2card-light \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 1
|
||||
fi
|
||||
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
with:
|
||||
name: timing-data-2card-light-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
if-no-files-found: warn
|
||||
retention-days: 5
|
||||
|
||||
e2e-2-cards-full:
|
||||
name: multicard-2-full
|
||||
@@ -274,7 +343,29 @@ jobs:
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
run: |
|
||||
python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-2-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
|
||||
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-multicard-2-cards \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 1 \
|
||||
--auto-upgrade-estimated-times \
|
||||
--continue-on-error
|
||||
else
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-multicard-2-cards \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 1
|
||||
fi
|
||||
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
with:
|
||||
name: timing-data-2card-full-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
if-no-files-found: warn
|
||||
retention-days: 5
|
||||
|
||||
- name: Run vllm-project/vllm-ascend test (non triton)
|
||||
if: ${{ inputs.type == 'full' && matrix.part == 0 }}
|
||||
@@ -346,7 +437,29 @@ jobs:
|
||||
env:
|
||||
VLLM_WORKER_MULTIPROC_METHOD: spawn
|
||||
run: |
|
||||
python3 .github/workflows/scripts/run_suite.py --suite e2e-multicard-4-cards --auto-partition-id ${{ matrix.part }} --auto-partition-size 1
|
||||
if [ "${{ inputs.continue_on_error }}" = "true" ]; then
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-multicard-4-cards \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 1 \
|
||||
--auto-upgrade-estimated-times \
|
||||
--continue-on-error
|
||||
else
|
||||
python3 .github/workflows/scripts/run_suite.py \
|
||||
--suite e2e-multicard-4-cards \
|
||||
--auto-partition-id "${{ matrix.part }}" \
|
||||
--auto-partition-size 1
|
||||
fi
|
||||
|
||||
|
||||
- name: Upload timing data
|
||||
uses: actions/upload-artifact@v4
|
||||
if: ${{ inputs.continue_on_error == true }}
|
||||
with:
|
||||
name: timing-data-4card-full-part${{ matrix.part }}
|
||||
path: test_timing_data.json
|
||||
if-no-files-found: warn
|
||||
retention-days: 5
|
||||
|
||||
e2e_310p:
|
||||
name: 310p singlecard
|
||||
|
||||
Reference in New Issue
Block a user