From 0747a6e68caedeb4eb9fb2f52947f8cc7f68dfa9 Mon Sep 17 00:00:00 2001 From: Yikun Jiang Date: Sun, 14 Sep 2025 06:57:59 +0800 Subject: [PATCH] Bump vLLM version to v0.10.2 (#2914) ### What this PR does / why we need it? Bump vLLM version to v0.10.2 ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI passed - vLLM version: v0.10.2rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/15b8fef453b373b84406207a947005a4d9d68acc Signed-off-by: Yikun Jiang --- .github/workflows/accuracy_test.yaml | 2 +- .github/workflows/nightly_benchmarks.yaml | 2 +- .github/workflows/vllm_ascend_dist.yaml | 2 +- .github/workflows/vllm_ascend_test.yaml | 6 +++--- .github/workflows/vllm_ascend_test_310p.yaml | 2 +- .github/workflows/vllm_ascend_test_full.yaml | 4 ++-- Dockerfile | 2 +- Dockerfile.310p | 2 +- Dockerfile.310p.openEuler | 2 +- Dockerfile.a3 | 2 +- Dockerfile.a3.openEuler | 2 +- Dockerfile.openEuler | 2 +- docs/source/conf.py | 2 +- 13 files changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml index 396b507..c2d4250 100644 --- a/.github/workflows/accuracy_test.yaml +++ b/.github/workflows/accuracy_test.yaml @@ -112,7 +112,7 @@ jobs: uses: actions/checkout@v4 with: repository: vllm-project/vllm - ref: v0.10.2rc3 + ref: v0.10.2 path: ./vllm-empty - name: Install vllm-project/vllm from source diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index ae5edb3..0b2e84c 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -51,7 +51,7 @@ jobs: strategy: matrix: include: - - vllm_branch: v0.10.2rc3 + - vllm_branch: v0.10.2 vllm_ascend_branch: main vllm_use_v1: 1 max-parallel: 1 diff --git a/.github/workflows/vllm_ascend_dist.yaml b/.github/workflows/vllm_ascend_dist.yaml index a3498d0..b0b49a9 100644 --- a/.github/workflows/vllm_ascend_dist.yaml +++ b/.github/workflows/vllm_ascend_dist.yaml @@ -43,7 +43,7 @@ jobs: strategy: matrix: os: [linux-aarch64-a3-8] - vllm_version: [v0.10.2rc3] + vllm_version: [v0.10.2] name: vLLM Ascend test runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index c0295b0..1a342d2 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -82,7 +82,7 @@ jobs: VLLM_USE_MODELSCOPE: True strategy: matrix: - vllm_version: [v0.10.2rc3] + vllm_version: [v0.10.2] steps: - name: Install packages run: | @@ -138,7 +138,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-1] - vllm_version: [v0.10.2rc3] + vllm_version: [v0.10.2] name: singlecard e2e test - light runs-on: ${{ matrix.os }} container: @@ -203,7 +203,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-2] - vllm_version: [v0.10.2rc3] + vllm_version: [v0.10.2] name: multicard e2e test - light runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test_310p.yaml b/.github/workflows/vllm_ascend_test_310p.yaml index 99c8df3..c6afb6b 100644 --- a/.github/workflows/vllm_ascend_test_310p.yaml +++ b/.github/workflows/vllm_ascend_test_310p.yaml @@ -53,7 +53,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-310p-1, linux-aarch64-310p-4] - vllm_version: [v0.10.2rc3] + vllm_version: [v0.10.2] name: 310p e2e test runs-on: ${{ matrix.os }} container: diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index 1717ce5..877d948 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -72,7 +72,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-1] - vllm_version: [v0.10.2rc3] + vllm_version: [v0.10.2] name: singlecard e2e test - full runs-on: ${{ matrix.os }} container: @@ -156,7 +156,7 @@ jobs: max-parallel: 2 matrix: os: [linux-aarch64-a2-2] - vllm_version: [v0.10.2rc3] + vllm_version: [v0.10.2] name: multicard e2e test - full runs-on: ${{ matrix.os }} container: diff --git a/Dockerfile b/Dockerfile index fb93643..a0d2b5b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2rc3 +ARG VLLM_TAG=v0.10.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.310p b/Dockerfile.310p index 7544616..402ae08 100644 --- a/Dockerfile.310p +++ b/Dockerfile.310p @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2rc3 +ARG VLLM_TAG=v0.10.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler index e3a7c3c..68db234 100644 --- a/Dockerfile.310p.openEuler +++ b/Dockerfile.310p.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2rc3 +ARG VLLM_TAG=v0.10.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/Dockerfile.a3 b/Dockerfile.a3 index 351e78f..8da3af0 100644 --- a/Dockerfile.a3 +++ b/Dockerfile.a3 @@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL} # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2rc3 +ARG VLLM_TAG=v0.10.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \ diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler index 46cbf40..61c4507 100644 --- a/Dockerfile.a3.openEuler +++ b/Dockerfile.a3.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2rc3 +ARG VLLM_TAG=v0.10.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler index 75af138..c78490c 100644 --- a/Dockerfile.openEuler +++ b/Dockerfile.openEuler @@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/ # Install vLLM ARG VLLM_REPO=https://github.com/vllm-project/vllm.git -ARG VLLM_TAG=v0.10.2rc3 +ARG VLLM_TAG=v0.10.2 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it. diff --git a/docs/source/conf.py b/docs/source/conf.py index ee52b4a..7b432bd 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -77,7 +77,7 @@ myst_substitutions = { # CANN image tag 'cann_image_tag': "8.2.rc1-910b-ubuntu22.04-py3.11", # vllm version in ci - 'ci_vllm_version': 'v0.10.2rc3', + 'ci_vllm_version': 'v0.10.2', } # Add any paths that contain templates here, relative to this directory.