From ac1c2cd9ac6f7ed6ba7505065959393717e9b903 Mon Sep 17 00:00:00 2001
From: wangxiyuan <wangxiyuan1007@gmail.com>
Date: Thu, 25 Sep 2025 14:20:10 +0800
Subject: [PATCH] [CI] Upgrade vllm version - 0925 (#3167)

Upgrade vLLM to newest commit.

1. Remove the useless func get_state_cls, it has been removed from vLLM
already.
https://github.com/vllm-project/vllm/commit/e6750d0b18e07631bb2ea7f256f7dd444d4936fa
2. Fix ut broken by
https://github.com/vllm-project/vllm/commit/6160ba4151084c78164a0f472ce4da04067f9705


- vLLM version: v0.10.2
- vLLM main:
https://github.com/vllm-project/vllm/commit/b1068903fdca26cf6b4a1a51a32c3365ce3ac636

---------

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
---
 .github/workflows/format_pr_body.yaml            | 2 +-
 .github/workflows/vllm_ascend_test.yaml          | 6 +++---
 .github/workflows/vllm_ascend_test_full.yaml     | 2 +-
 tests/ut/attention/test_attention_v1.py          | 7 +------
 tests/ut/ops/test_fused_ops.py                   | 2 ++
 tests/ut/torchair/ops/test_torchair_fused_moe.py | 2 ++
 vllm_ascend/attention/attention_v1.py            | 5 -----
 7 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/format_pr_body.yaml b/.github/workflows/format_pr_body.yaml
index b053c51..7fc23ee 100644
--- a/.github/workflows/format_pr_body.yaml
+++ b/.github/workflows/format_pr_body.yaml
@@ -36,7 +36,7 @@ jobs:
 
       - name: Get vLLM version
         run: |
-          VLLM_COMMIT=b1068903fdca26cf6b4a1a51a32c3365ce3ac636
+          VLLM_COMMIT=52d0cb845866869d587fc013a7c59e60a86ebcf2
           echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV
 
       - name: Checkout repository
diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml
index 49052b5..d1e1af5 100644
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -42,7 +42,7 @@ jobs:
   lint:
     uses: ./.github/workflows/pre-commit.yml
     with:
-      vllm: b1068903fdca26cf6b4a1a51a32c3365ce3ac636
+      vllm: 52d0cb845866869d587fc013a7c59e60a86ebcf2
 
   changes:
     runs-on: ubuntu-latest
@@ -83,7 +83,7 @@ jobs:
         VLLM_USE_MODELSCOPE: True
     strategy:
       matrix:
-        vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2]
+        vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2]
     steps:
       - name: Install packages
         run: |
@@ -138,7 +138,7 @@ jobs:
     name: e2e-light
     strategy:
       matrix:
-        vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2]
+        vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2]
     # Note (yikun): If CI resource are limited we can split job into two chain jobs
     needs: [lint, changes]
     # only trigger e2e test after lint passed and the change is e2e related with pull request.
diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml
index d5144cc..1d628dd 100644
--- a/.github/workflows/vllm_ascend_test_full.yaml
+++ b/.github/workflows/vllm_ascend_test_full.yaml
@@ -68,7 +68,7 @@ jobs:
     name: e2e-full
     strategy:
       matrix:
-        vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2]
+        vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2]
     needs: [changes]
     if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
     uses: ./.github/workflows/_e2e_test.yaml
diff --git a/tests/ut/attention/test_attention_v1.py b/tests/ut/attention/test_attention_v1.py
index b286f81..d553637 100644
--- a/tests/ut/attention/test_attention_v1.py
+++ b/tests/ut/attention/test_attention_v1.py
@@ -7,8 +7,7 @@ from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
                                                 AscendAttentionBackendImpl,
                                                 AscendAttentionMetadataBuilder,
                                                 AscendAttentionState,
-                                                AscendMetadata,
-                                                CommonAttentionState)
+                                                AscendMetadata)
 from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
 
 
@@ -25,10 +24,6 @@ class TestAscendAttentionBackend(TestBase):
         self.assertEqual(AscendAttentionBackend.get_metadata_cls(),
                          AscendMetadata)
 
-    def test_get_state_cls(self):
-        self.assertEqual(AscendAttentionBackend.get_state_cls(),
-                         CommonAttentionState)
-
     def test_get_builder_cls(self):
         self.assertEqual(AscendAttentionBackend.get_builder_cls(),
                          AscendAttentionMetadataBuilder)
diff --git a/tests/ut/ops/test_fused_ops.py b/tests/ut/ops/test_fused_ops.py
index a91fe5b..19c6c96 100644
--- a/tests/ut/ops/test_fused_ops.py
+++ b/tests/ut/ops/test_fused_ops.py
@@ -201,6 +201,8 @@ def default_moe_config():
 def moe_method(mock_dist_env):
     moe = MagicMock()
     moe.moe_parallel_config.return_value = MagicMock(ep_size=4)
+    moe.moe_parallel_config.use_ep = False
+    moe.moe_parallel_config.dp_size = 1
     return AscendUnquantizedFusedMoEMethod(moe)
 
 
diff --git a/tests/ut/torchair/ops/test_torchair_fused_moe.py b/tests/ut/torchair/ops/test_torchair_fused_moe.py
index 155ee78..a550a67 100644
--- a/tests/ut/torchair/ops/test_torchair_fused_moe.py
+++ b/tests/ut/torchair/ops/test_torchair_fused_moe.py
@@ -153,6 +153,8 @@ def default_moe_config():
 def moe_method(mock_dist_env):
     moe = MagicMock()
     moe.moe_parallel_config.return_value = MagicMock(ep_size=4)
+    moe.moe_parallel_config.use_ep = False
+    moe.moe_parallel_config.dp_size = 1
     return TorchairAscendUnquantizedFusedMoEMethod(moe)
 
 
diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py
index 963a947..d289bb4 100644
--- a/vllm_ascend/attention/attention_v1.py
+++ b/vllm_ascend/attention/attention_v1.py
@@ -24,7 +24,6 @@ import torch.nn as nn
 import torch_npu
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                               AttentionLayer, AttentionType)
-from vllm.attention.backends.utils import CommonAttentionState
 from vllm.config import VllmConfig
 from vllm.forward_context import ForwardContext, get_forward_context
 from vllm.utils import cdiv, direct_register_custom_op
@@ -56,10 +55,6 @@ class AscendAttentionBackend(AttentionBackend):
     def get_metadata_cls() -> Type["AscendMetadata"]:
         return AscendMetadata
 
-    @staticmethod
-    def get_state_cls() -> Type["CommonAttentionState"]:
-        return CommonAttentionState
-
     @staticmethod
     def get_builder_cls() -> type["AscendAttentionMetadataBuilder"]:
         return AscendAttentionMetadataBuilder