[Version] Drop 0.16.0 support (#7153)

### What this PR does / why we need it?
Drop 0.16.0 support in main
- Fix eagle proposer break introduced by
https://github.com/vllm-project/vllm/pull/34552. Mainly change to use
the draft attention group to initialize the attention metadata builder.
- Fix the `ModelRunner` has no attribute `cudagraph_capture_sizes`
error, which is a bug in vLLM v0.17.0, and fixed by a later pr
https://github.com/vllm-project/vllm/pull/30515

- vLLM version: v0.16.0
- vLLM main:
4034c3d32e
---------
Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
Mengqing Cao
2026-03-13 16:14:15 +08:00
committed by GitHub
parent 7ed9e9de69
commit 986cd45397
20 changed files with 255 additions and 268 deletions

View File

@@ -32,7 +32,7 @@ on:
description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
vllm_version:
required: false
default: "v0.16.0"
default: "v0.17.0"
type: string
description: vllm version to use
vllm_ascend_remote_url:

View File

@@ -39,7 +39,7 @@ on:
vllm_version:
required: false
type: string
default: "v0.16.0"
default: "v0.17.0"
is_pr_test:
required: true
type: boolean

View File

@@ -75,7 +75,7 @@ jobs:
name: e2e-full
strategy:
matrix:
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d, v0.16.0]
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d, v0.17.0]
needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.e2e_tracker == true }}
uses: ./.github/workflows/_e2e_test.yaml

View File

@@ -90,7 +90,7 @@ jobs:
if: ${{ needs.lint.result == 'success' && (needs.changes.outputs.e2e_tracker == 'true' || needs.changes.outputs.ut_tracker == 'true') }}
strategy:
matrix:
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d, v0.16.0]
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d, v0.17.0]
uses: ./.github/workflows/_unit_test.yaml
with:
vllm: ${{ matrix.vllm_version }}
@@ -102,7 +102,7 @@ jobs:
name: e2e-light
strategy:
matrix:
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d, v0.16.0]
vllm_version: [4034c3d32e30d01639459edd3ab486f56993876d, v0.17.0]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.

View File

@@ -276,7 +276,7 @@ jobs:
- Qwen3-Omni-30B-A3B-Instruct
uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
with:
vllm: v0.16.0
vllm: v0.17.0
runner: ${{ matrix.test_config.os }}
model_list: ${{ toJson(matrix.test_config.model_list) }}
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.5.1-910b-ubuntu22.04-py3.11'

View File

@@ -51,7 +51,7 @@ jobs:
strategy:
matrix:
include:
- vllm_branch: v0.16.0
- vllm_branch: v0.17.0
vllm_ascend_branch: main
max-parallel: 1
container: