[CI] Upgrade vllm version - 0925 (#3167)

Upgrade vLLM to newest commit. 1. Remove the useless func get_state_cls, it has been removed from vLLM already. e6750d0b18 2. Fix ut broken by 6160ba4151 - vLLM version: v0.10.2 - vLLM main: b1068903fd --------- Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
2025-09-25 14:20:10 +08:00
parent 33c118c80e
commit ac1c2cd9ac
7 changed files with 10 additions and 16 deletions
--- a/.github/workflows/format_pr_body.yaml
+++ b/.github/workflows/format_pr_body.yaml
@@ -36,7 +36,7 @@ jobs:

      - name: Get vLLM version
        run: |
-          VLLM_COMMIT=b1068903fdca26cf6b4a1a51a32c3365ce3ac636
+          VLLM_COMMIT=52d0cb845866869d587fc013a7c59e60a86ebcf2
          echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV

      - name: Checkout repository
--- a/.github/workflows/vllm_ascend_test.yaml
+++ b/.github/workflows/vllm_ascend_test.yaml
@@ -42,7 +42,7 @@ jobs:
  lint:
    uses: ./.github/workflows/pre-commit.yml
    with:
-      vllm: b1068903fdca26cf6b4a1a51a32c3365ce3ac636
+      vllm: 52d0cb845866869d587fc013a7c59e60a86ebcf2

  changes:
    runs-on: ubuntu-latest
@@ -83,7 +83,7 @@ jobs:
        VLLM_USE_MODELSCOPE: True
    strategy:
      matrix:
-        vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2]
+        vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2]
    steps:
      - name: Install packages
        run: |
@@ -138,7 +138,7 @@ jobs:
    name: e2e-light
    strategy:
      matrix:
-        vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2]
+        vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2]
    # Note (yikun): If CI resource are limited we can split job into two chain jobs
    needs: [lint, changes]
    # only trigger e2e test after lint passed and the change is e2e related with pull request.
--- a/.github/workflows/vllm_ascend_test_full.yaml
+++ b/.github/workflows/vllm_ascend_test_full.yaml
@@ -68,7 +68,7 @@ jobs:
    name: e2e-full
    strategy:
      matrix:
-        vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2]
+        vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2]
    needs: [changes]
    if: ${{ needs.changes.outputs.e2e_tracker == 'true' }}
    uses: ./.github/workflows/_e2e_test.yaml
--- a/tests/ut/attention/test_attention_v1.py
+++ b/tests/ut/attention/test_attention_v1.py
@@ -7,8 +7,7 @@ from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
                                                AscendAttentionBackendImpl,
                                                AscendAttentionMetadataBuilder,
                                                AscendAttentionState,
-                                                AscendMetadata,
-                                                CommonAttentionState)
+                                                AscendMetadata)
 from vllm_ascend.attention.utils import AscendCommonAttentionMetadata


@@ -25,10 +24,6 @@ class TestAscendAttentionBackend(TestBase):
        self.assertEqual(AscendAttentionBackend.get_metadata_cls(),
                         AscendMetadata)

-    def test_get_state_cls(self):
-        self.assertEqual(AscendAttentionBackend.get_state_cls(),
-                         CommonAttentionState)
-
    def test_get_builder_cls(self):
        self.assertEqual(AscendAttentionBackend.get_builder_cls(),
                         AscendAttentionMetadataBuilder)
--- a/tests/ut/ops/test_fused_ops.py
+++ b/tests/ut/ops/test_fused_ops.py
@@ -201,6 +201,8 @@ def default_moe_config():
 def moe_method(mock_dist_env):
    moe = MagicMock()
    moe.moe_parallel_config.return_value = MagicMock(ep_size=4)
+    moe.moe_parallel_config.use_ep = False
+    moe.moe_parallel_config.dp_size = 1
    return AscendUnquantizedFusedMoEMethod(moe)


--- a/tests/ut/torchair/ops/test_torchair_fused_moe.py
+++ b/tests/ut/torchair/ops/test_torchair_fused_moe.py
@@ -153,6 +153,8 @@ def default_moe_config():
 def moe_method(mock_dist_env):
    moe = MagicMock()
    moe.moe_parallel_config.return_value = MagicMock(ep_size=4)
+    moe.moe_parallel_config.use_ep = False
+    moe.moe_parallel_config.dp_size = 1
    return TorchairAscendUnquantizedFusedMoEMethod(moe)


--- a/vllm_ascend/attention/attention_v1.py
+++ b/vllm_ascend/attention/attention_v1.py
@@ -24,7 +24,6 @@ import torch.nn as nn
 import torch_npu
 from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
                                              AttentionLayer, AttentionType)
-from vllm.attention.backends.utils import CommonAttentionState
 from vllm.config import VllmConfig
 from vllm.forward_context import ForwardContext, get_forward_context
 from vllm.utils import cdiv, direct_register_custom_op
@@ -56,10 +55,6 @@ class AscendAttentionBackend(AttentionBackend):
    def get_metadata_cls() -> Type["AscendMetadata"]:
        return AscendMetadata

-    @staticmethod
-    def get_state_cls() -> Type["CommonAttentionState"]:
-        return CommonAttentionState
-
    @staticmethod
    def get_builder_cls() -> type["AscendAttentionMetadataBuilder"]:
        return AscendAttentionMetadataBuilder