From f97a64ba7fb377464ad3cb16815711486a2c929f Mon Sep 17 00:00:00 2001
From: Yikun Jiang <yikunkero@gmail.com>
Date: Sat, 13 Sep 2025 19:15:48 +0800
Subject: [PATCH] Bump vLLM version to v0.10.2rc3 (#2911)

### What this PR does / why we need it?
Bump vLLM version to v0.10.2rc3
https://github.com/vllm-project/vllm/compare/v0.10.2rc2...v0.10.2rc3

### Does this PR introduce _any_ user-facing change?
No

### How was this patch tested?
CI passed

- vLLM version: v0.10.2rc2
- vLLM main:
https://github.com/vllm-project/vllm/commit/15b8fef453b373b84406207a947005a4d9d68acc

Signed-off-by: Yikun Jiang <yikunkero@gmail.com>
---
 .github/workflows/accuracy_test.yaml         | 2 +-
 .github/workflows/nightly_benchmarks.yaml    | 2 +-
 .github/workflows/vllm_ascend_dist.yaml      | 2 +-
 .github/workflows/vllm_ascend_test.yaml      | 6 +++---
 .github/workflows/vllm_ascend_test_310p.yaml | 2 +-
 .github/workflows/vllm_ascend_test_full.yaml | 4 ++--
 Dockerfile                                   | 2 +-
 Dockerfile.310p                              | 2 +-
 Dockerfile.310p.openEuler                    | 2 +-
 Dockerfile.a3                                | 2 +-
 Dockerfile.a3.openEuler                      | 2 +-
 Dockerfile.openEuler                         | 2 +-
 docs/source/conf.py                          | 2 +-
 13 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/accuracy_test.yaml b/.github/workflows/accuracy_test.yaml
index 490a9af..396b507 100644
--- a/.github/workflows/accuracy_test.yaml
+++ b/.github/workflows/accuracy_test.yaml
@@ -112,7 +112,7 @@ jobs:
         uses: actions/checkout@v4
         with:
           repository: vllm-project/vllm
-          ref: v0.10.2rc2
+          ref: v0.10.2rc3
           path: ./vllm-empty
 
       - name: Install vllm-project/vllm from source
diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml
index af35a35..ae5edb3 100644
--- a/.github/workflows/nightly_benchmarks.yaml
+++ b/.github/workflows/nightly_benchmarks.yaml
@@ -51,7 +51,7 @@ jobs:
     strategy:
       matrix:
         include:
-          - vllm_branch: v0.10.2rc2
+          - vllm_branch: v0.10.2rc3
             vllm_ascend_branch: main
             vllm_use_v1: 1
       max-parallel: 1
diff --git a/.github/workflows/vllm_ascend_dist.yaml b/.github/workflows/vllm_ascend_dist.yaml
index b3c91e7..a3498d0 100644
--- a/.github/workflows/vllm_ascend_dist.yaml
+++ b/.github/workflows/vllm_ascend_dist.yaml
@@ -43,7 +43,7 @@ jobs:
     strategy:
       matrix:
         os: [linux-aarch64-a3-8]
-        vllm_version: [v0.10.2rc2]
+        vllm_version: [v0.10.2rc3]
     name: vLLM Ascend test
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
index 1e8414a..c0295b0 100644
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -82,7 +82,7 @@ jobs:
         VLLM_USE_MODELSCOPE: True
     strategy:
       matrix:
-        vllm_version: [v0.10.2rc2]
+        vllm_version: [v0.10.2rc3]
     steps:
       - name: Install packages
         run: |
@@ -138,7 +138,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-1]
-        vllm_version: [v0.10.2rc2]
+        vllm_version: [v0.10.2rc3]
     name: singlecard e2e test - light
     runs-on: ${{ matrix.os }}
     container:
@@ -203,7 +203,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-2]
-        vllm_version: [v0.10.2rc2]
+        vllm_version: [v0.10.2rc3]
     name: multicard e2e test - light
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test_310p.yaml b/.github/workflows/vllm_ascend_test_310p.yaml
index cbd472e..99c8df3 100644
--- a/.github/workflows/vllm_ascend_test_310p.yaml
+++ b/.github/workflows/vllm_ascend_test_310p.yaml
@@ -53,7 +53,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-310p-1, linux-aarch64-310p-4]
-        vllm_version: [v0.10.2rc2]
+        vllm_version: [v0.10.2rc3]
     name: 310p e2e test
     runs-on: ${{ matrix.os }}
     container:
diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml
index 5694530..1717ce5 100644
--- a/.github/workflows/vllm_ascend_test_full.yaml
+++ b/.github/workflows/vllm_ascend_test_full.yaml
@@ -72,7 +72,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-1]
-        vllm_version: [v0.10.2rc2]
+        vllm_version: [v0.10.2rc3]
     name: singlecard e2e test - full
     runs-on: ${{ matrix.os }}
     container:
@@ -156,7 +156,7 @@ jobs:
       max-parallel: 2
       matrix:
         os: [linux-aarch64-a2-2]
-        vllm_version: [v0.10.2rc2]
+        vllm_version: [v0.10.2rc3]
     name: multicard e2e test - full
     runs-on: ${{ matrix.os }}
     container:
diff --git a/Dockerfile b/Dockerfile
index ff2bcb3..fb93643 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc2
+ARG VLLM_TAG=v0.10.2rc3
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.310p b/Dockerfile.310p
index 366bf33..7544616 100644
--- a/Dockerfile.310p
+++ b/Dockerfile.310p
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc2
+ARG VLLM_TAG=v0.10.2rc3
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.310p.openEuler b/Dockerfile.310p.openEuler
index 78b1951..e3a7c3c 100644
--- a/Dockerfile.310p.openEuler
+++ b/Dockerfile.310p.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc2
+ARG VLLM_TAG=v0.10.2rc3
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/Dockerfile.a3 b/Dockerfile.a3
index 40bf51b..351e78f 100644
--- a/Dockerfile.a3
+++ b/Dockerfile.a3
@@ -37,7 +37,7 @@ RUN pip config set global.index-url ${PIP_INDEX_URL}
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc2
+ARG VLLM_TAG=v0.10.2rc3
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
 RUN VLLM_TARGET_DEVICE="empty" python3 -m pip install -v -e /vllm-workspace/vllm/ --extra-index https://download.pytorch.org/whl/cpu/ && \
diff --git a/Dockerfile.a3.openEuler b/Dockerfile.a3.openEuler
index f462522..46cbf40 100644
--- a/Dockerfile.a3.openEuler
+++ b/Dockerfile.a3.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc2
+ARG VLLM_TAG=v0.10.2rc3
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/Dockerfile.openEuler b/Dockerfile.openEuler
index 2c0ee86..75af138 100644
--- a/Dockerfile.openEuler
+++ b/Dockerfile.openEuler
@@ -34,7 +34,7 @@ COPY . /vllm-workspace/vllm-ascend/
 
 # Install vLLM
 ARG VLLM_REPO=https://github.com/vllm-project/vllm.git
-ARG VLLM_TAG=v0.10.2rc2
+ARG VLLM_TAG=v0.10.2rc3
 
 RUN git clone --depth 1 $VLLM_REPO --branch $VLLM_TAG /vllm-workspace/vllm
 # In x86, triton will be installed by vllm. But in Ascend, triton doesn't work correctly. we need to uninstall it.
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 98eaa46..ee52b4a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -77,7 +77,7 @@ myst_substitutions = {
     # CANN image tag
     'cann_image_tag': "8.2.rc1-910b-ubuntu22.04-py3.11",
     # vllm version in ci
-    'ci_vllm_version': 'v0.10.2rc2',
+    'ci_vllm_version': 'v0.10.2rc3',
 }
 
 # Add any paths that contain templates here, relative to this directory.