upgrade vLLM to main (#4608)

1. fix https://github.com/vllm-project/vllm/pull/28542
The model structure modifications we involved in are:
     - Qwen2.5-VL(still exist some patch)
     - Qwen2-VL
     - Qwen2
     - DeepSeek series
     - Qwen-moe series
2. fix https://github.com/vllm-project/vllm/pull/29121
   the output token now  type changed from np to `list[list[int]]`

3. fix https://github.com/vllm-project/vllm/pull/29262
    `xformers` backend for multimodal now has been deprecated
4. fix https://github.com/vllm-project/vllm/pull/29342

5. fix https://github.com/vllm-project/vllm/pull/28579
6. fix https://github.com/vllm-project/vllm/pull/28718
7. fix https://github.com/vllm-project/vllm/issues/28665
8. fix https://github.com/vllm-project/vllm/pull/26847
vllm introduced the `optimization-level`, some default config has been
changed, and the param `--enforce-eager` has been deprecated
9. fix http://github.com/vllm-project/vllm/pull/29223 it retuns tuple
for sampler.
10. fix https://github.com/vllm-project/vllm/pull/29471 we'll remove the
related patch to avoid this kind of error.

Co-authored-by: hfadzxy <starmoon_zhang@163.com>
Co-authored-by: wangli <wangli858794774@gmail.com>


- vLLM version: v0.11.2

---------

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Signed-off-by: wangli <wangli858794774@gmail.com>
Signed-off-by: hfadzxy <starmoon_zhang@163.com>
Co-authored-by: wangli <wangli858794774@gmail.com>
Co-authored-by: hfadzxy <starmoon_zhang@163.com>
This commit is contained in:
wangxiyuan
2025-12-02 22:10:52 +08:00
committed by GitHub
parent 4588cdac02
commit 7f2673ea2d
60 changed files with 383 additions and 374 deletions

View File

@@ -32,7 +32,7 @@ on:
description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
vllm_version:
required: false
default: "v0.11.2"
default: "86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24"
type: string
description: vllm version to use
vllm_ascend_remote_url:

View File

@@ -36,7 +36,7 @@ jobs:
- name: Get vLLM version
run: |
VLLM_COMMIT=v0.11.2
VLLM_COMMIT=86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
- name: Checkout repository

View File

@@ -51,7 +51,7 @@ jobs:
strategy:
matrix:
include:
- vllm_branch: v0.11.2
- vllm_branch: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
vllm_ascend_branch: main
max-parallel: 1
container:

View File

@@ -86,7 +86,7 @@ jobs:
tests: tests/e2e/nightly/ops
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
with:
vllm: v0.11.2
vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
runner: ${{ matrix.test_config.os }}
tests: ${{ matrix.test_config.tests }}
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2'
@@ -134,7 +134,7 @@ jobs:
- Qwen3-Next-80B-A3B-Instruct
uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
with:
vllm: v0.11.2
vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
runner: ${{ matrix.test_config.os }}
model_list: ${{ toJson(matrix.test_config.model_list) }}
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11'

View File

@@ -139,7 +139,7 @@ jobs:
tests: tests/e2e/nightly/models/test_glm4_5.py
uses: ./.github/workflows/_e2e_nightly_single_node.yaml
with:
vllm: v0.11.2
vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
runner: ${{ matrix.test_config.os }}
image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3'
tests: ${{ matrix.test_config.tests }}

View File

@@ -69,7 +69,7 @@ jobs:
name: e2e-full
strategy:
matrix:
vllm_version: [v0.11.2]
vllm_version: [86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24]
needs: [changes]
if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
uses: ./.github/workflows/_e2e_test.yaml

View File

@@ -42,7 +42,7 @@ jobs:
lint:
uses: ./.github/workflows/pre-commit.yml
with:
vllm: v0.11.2
vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
changes:
runs-on: ubuntu-latest
outputs:
@@ -84,7 +84,7 @@ jobs:
SOC_VERSION: ascend910b1
strategy:
matrix:
vllm_version: [v0.11.2]
vllm_version: [86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24]
steps:
- name: Install packages
run: |
@@ -142,7 +142,7 @@ jobs:
name: e2e-light
strategy:
matrix:
vllm_version: [v0.11.2]
vllm_version: [86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24]
# Note (yikun): If CI resource are limited we can split job into two chain jobs
needs: [lint, changes]
# only trigger e2e test after lint passed and the change is e2e related with pull request.

View File

@@ -72,7 +72,7 @@ jobs:
- DeepSeek-V2-Lite
uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
with:
vllm: v0.11.2
vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
runner: ${{ matrix.runner }}
image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
model_list: ${{ toJson(matrix.model_list) }}