upgrade vLLM to main (#4608)

1. fix https://github.com/vllm-project/vllm/pull/28542 The model structure modifications we involved in are: - Qwen2.5-VL(still exist some patch) - Qwen2-VL - Qwen2 - DeepSeek series - Qwen-moe series 2. fix https://github.com/vllm-project/vllm/pull/29121 the output token now type changed from np to `list[list[int]]` 3. fix https://github.com/vllm-project/vllm/pull/29262 `xformers` backend for multimodal now has been deprecated 4. fix https://github.com/vllm-project/vllm/pull/29342 5. fix https://github.com/vllm-project/vllm/pull/28579 6. fix https://github.com/vllm-project/vllm/pull/28718 7. fix https://github.com/vllm-project/vllm/issues/28665 8. fix https://github.com/vllm-project/vllm/pull/26847 vllm introduced the `optimization-level`, some default config has been changed, and the param `--enforce-eager` has been deprecated 9. fix http://github.com/vllm-project/vllm/pull/29223 it retuns tuple for sampler. 10. fix https://github.com/vllm-project/vllm/pull/29471 we'll remove the related patch to avoid this kind of error. Co-authored-by: hfadzxy <starmoon_zhang@163.com> Co-authored-by: wangli <wangli858794774@gmail.com> - vLLM version: v0.11.2 --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Signed-off-by: wangli <wangli858794774@gmail.com> Signed-off-by: hfadzxy <starmoon_zhang@163.com> Co-authored-by: wangli <wangli858794774@gmail.com> Co-authored-by: hfadzxy <starmoon_zhang@163.com>
2025-12-02 22:10:52 +08:00
parent 4588cdac02
commit 7f2673ea2d
60 changed files with 383 additions and 374 deletions
--- a/.github/workflows/_e2e_nightly_multi_node.yaml
+++ b/.github/workflows/_e2e_nightly_multi_node.yaml
@@ -32,7 +32,7 @@ on:
        description: how many pods will be pulled up via lws.yaml, indicates number of nodes we need
      vllm_version:
        required: false
-        default: "v0.11.2"
+        default: "86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24"
        type: string
        description: vllm version to use
      vllm_ascend_remote_url:
--- a/.github/workflows/format_pr_body.yaml
+++ b/.github/workflows/format_pr_body.yaml
@@ -36,7 +36,7 @@ jobs:

      - name: Get vLLM version
        run: |
-          VLLM_COMMIT=v0.11.2
+          VLLM_COMMIT=86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
          echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV

      - name: Checkout repository
--- a/.github/workflows/nightly_benchmarks.yaml
+++ b/.github/workflows/nightly_benchmarks.yaml
@@ -51,7 +51,7 @@ jobs:
    strategy:
      matrix:
        include:
-          - vllm_branch: v0.11.2
+          - vllm_branch: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
            vllm_ascend_branch: main
      max-parallel: 1
    container:
--- a/.github/workflows/vllm_ascend_test_nightly_a2.yaml
+++ b/.github/workflows/vllm_ascend_test_nightly_a2.yaml
@@ -86,7 +86,7 @@ jobs:
            tests: tests/e2e/nightly/ops
    uses: ./.github/workflows/_e2e_nightly_single_node.yaml
    with:
-      vllm: v0.11.2
+      vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
      runner: ${{ matrix.test_config.os }}
      tests: ${{ matrix.test_config.tests }}
      image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a2'
@@ -134,7 +134,7 @@ jobs:
              - Qwen3-Next-80B-A3B-Instruct
    uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
    with:
-      vllm: v0.11.2
+      vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
      runner: ${{ matrix.test_config.os }}
      model_list: ${{ toJson(matrix.test_config.model_list) }}
      image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11'
--- a/.github/workflows/vllm_ascend_test_nightly_a3.yaml
+++ b/.github/workflows/vllm_ascend_test_nightly_a3.yaml
@@ -139,7 +139,7 @@ jobs:
            tests: tests/e2e/nightly/models/test_glm4_5.py
    uses: ./.github/workflows/_e2e_nightly_single_node.yaml
    with:
-      vllm: v0.11.2
+      vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
      runner: ${{ matrix.test_config.os }}
      image: 'swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/vllm-ascend:nightly-a3'
      tests: ${{ matrix.test_config.tests }}
--- a/.github/workflows/vllm_ascend_test_pr_full.yaml
+++ b/.github/workflows/vllm_ascend_test_pr_full.yaml
@@ -69,7 +69,7 @@ jobs:
    name: e2e-full
    strategy:
      matrix:
-        vllm_version: [v0.11.2]
+        vllm_version: [86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24]
    needs: [changes]
    if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
    uses: ./.github/workflows/_e2e_test.yaml
--- a/.github/workflows/vllm_ascend_test_pr_light.yaml
+++ b/.github/workflows/vllm_ascend_test_pr_light.yaml
@@ -42,7 +42,7 @@ jobs:
  lint:
    uses: ./.github/workflows/pre-commit.yml
    with:
-      vllm: v0.11.2
+      vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
  changes:
    runs-on: ubuntu-latest
    outputs:
@@ -84,7 +84,7 @@ jobs:
        SOC_VERSION: ascend910b1
    strategy:
      matrix:
-        vllm_version: [v0.11.2]
+        vllm_version: [86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24]
    steps:
      - name: Install packages
        run: |
@@ -142,7 +142,7 @@ jobs:
    name: e2e-light
    strategy:
      matrix:
-        vllm_version: [v0.11.2]
+        vllm_version: [86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24]
    # Note (yikun): If CI resource are limited we can split job into two chain jobs
    needs: [lint, changes]
    # only trigger e2e test after lint passed and the change is e2e related with pull request.
--- a/.github/workflows/vllm_ascend_test_report.yaml
+++ b/.github/workflows/vllm_ascend_test_report.yaml
@@ -72,7 +72,7 @@ jobs:
              - DeepSeek-V2-Lite
    uses: ./.github/workflows/_e2e_nightly_single_node_models.yaml
    with:
-      vllm: v0.11.2
+      vllm: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
      runner: ${{ matrix.runner }}
      image: swr.cn-southwest-2.myhuaweicloud.com/base_image/ascend-ci/cann:8.3.rc2-910b-ubuntu22.04-py3.11
      model_list: ${{ toJson(matrix.model_list) }}