From 0747a6e68caedeb4eb9fb2f52947f8cc7f68dfa9 Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Sun, 14 Sep 2025 06:57:59 +0800
Subject: [PATCH] Bump vLLM version to v0.10.2 (#2914)

### What this PR does / why we need it?
Bump vLLM version to v0.10.2

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
CI passed
- vLLM version: v0.10.2rc3
- vLLM main:
https://github.com/vllm-project/vllm/commit/15b8fef453b373b84406207a947005a4d9d68acc

Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
---
 .github/workflows/accuracy_test.yaml         | 2 +-
 .github/workflows/nightly_benchmarks.yaml    | 2 +-
 .github/workflows/vllm_ascend_dist.yaml      | 2 +-
 .github/workflows/vllm_ascend_test.yaml      | 6 +++---
 .github/workflows/vllm_ascend_test_310p.yaml | 2 +-
 .github/workflows/vllm_ascend_test_full.yaml | 4 ++--
 Dockerfile                                   | 2 +-
 Dockerfile.310p                              | 2 +-
 Dockerfile.310p.openEuler                    | 2 +-
 Dockerfile.a3                                | 2 +-
 Dockerfile.a3.openEuler                      | 2 +-
 Dockerfile.openEuler                         | 2 +-
 docs/source/conf.py                          | 2 +-
 13 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml
index 396b507..c2d4250 100644
--- a/.github/workflows/accuracy_test.yaml
+++ b/.github/workflows/accuracy_test.yaml
@@ -112,7 +112,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           repository: vllm-project/vllm
-          ref: v0.10.2rc3
+          ref: v0.10.2
           path: ./vllm-empty
 
       - name: Install vllm-project/vllm from source
diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml
index ae5edb3..0b2e84c 100644
--- a/.github/workflows/nightly_benchmarks.yaml
+++ b/.github/workflows/nightly_benchmarks.yaml
@@ -51,7 +51,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - vllm_branch: v0.10.2rc3
+          - vllm_branch: v0.10.2
             vllm_ascend_branch: main
             vllm_use_v1: 1
       max-parallel: 1
diff --git a/.github/workflows/vllm_ascend_dist.yaml b/.github/workflows/vllm_ascend_dist.yaml
index a3498d0..b0b49a9 100644
--- a/.github/workflows/vllm_ascend_dist.yaml
+++ b/.github/workflows/vllm_ascend_dist.yaml
@@ -43,7 +43,7 @@ jobs:
     strategy:
       matrix:
         os: [linux-aarch64-a3-8]
-        vllm_version: [v0.10.2rc3]
+        vllm_version: [v0.10.2]
     name: vLLM Ascend test
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
index c0295b0..1a342d2 100644
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -82,7 +82,7 @@ jobs:
         VLLM_USE_MODELSCOPE: True
     strategy:
       matrix:
-        vllm_version: [v0.10.2rc3]
+        vllm_version: [v0.10.2]
     steps:
       - name: Install packages
         run: |
@@ -138,7 +138,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-1]
-        vllm_version: [v0.10.2rc3]
+        vllm_version: [v0.10.2]
     name: singlecard e2e test - light
     runs-on: ${{ matrix.os }}
     container:
@@ -203,7 +203,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-2]
-        vllm_version: [v0.10.2rc3]
+        vllm_version: [v0.10.2]
     name: multicard e2e test - light
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test_310p.yaml b/.github/workflows/vllm_ascend_test_310p.yaml
index 99c8df3..c6afb6b 100644
--- a/.github/workflows/vllm_ascend_test_310p.yaml
+++ b/.github/workflows/vllm_ascend_test_310p.yaml
@@ -53,7 +53,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
-        vllm_version: [v0.10.2rc3]
+        vllm_version: [v0.10.2]
     name: 310p e2e test
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml
index 1717ce5..877d948 100644
--- a/.github/workflows/vllm_ascend_test_full.yaml
+++ b/.github/workflows/vllm_ascend_test_full.yaml
@@ -72,7 +72,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-1]
-        vllm_version: [v0.10.2rc3]
+        vllm_version: [v0.10.2]
     name: singlecard e2e test - full
     runs-on: ${{ matrix.os }}
     container:
@@ -156,7 +156,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-2]
-        vllm_version: [v0.10.2rc3]
+        vllm_version: [v0.10.2]
     name: multicard e2e test - full
     runs-on: ${{ matrix.os }}
     container:
diff --git a/Dockerfile b/Dockerfile
index fb93643..a0d2b5b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc3
+ARG VLLM_TAG=v0.10.2
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.310p b/Dockerfile.310p
index 7544616..402ae08 100644
--- a/Dockerfile.310p
+++ b/Dockerfile.310p
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc3
+ARG VLLM_TAG=v0.10.2
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler
index e3a7c3c..68db234 100644
--- a/Dockerfile.310p.openEuler
+++ b/Dockerfile.310p.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc3
+ARG VLLM_TAG=v0.10.2
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/Dockerfile.a3 b/Dockerfile.a3
index 351e78f..8da3af0 100644
--- a/Dockerfile.a3
+++ b/Dockerfile.a3
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc3
+ARG VLLM_TAG=v0.10.2
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler
index 46cbf40..61c4507 100644
--- a/Dockerfile.a3.openEuler
+++ b/Dockerfile.a3.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc3
+ARG VLLM_TAG=v0.10.2
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
index 75af138..c78490c 100644
--- a/Dockerfile.openEuler
+++ b/Dockerfile.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc3
+ARG VLLM_TAG=v0.10.2
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index ee52b4a..7b432bd 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -77,7 +77,7 @@ myst_substitutions = {
     # CANN image tag
     'cann_image_tag': "8.2.rc1-910b-ubuntu22.04-py3.11",
     # vllm version in ci
-    'ci_vllm_version': 'v0.10.2rc3',
+    'ci_vllm_version': 'v0.10.2',
 }
 
 # Add any paths that contain templates here, relative to this directory.