From ac1c2cd9ac6f7ed6ba7505065959393717e9b903 Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Thu, 25 Sep 2025 14:20:10 +0800 Subject: [PATCH] [CI] Upgrade vllm version - 0925 (#3167) Upgrade vLLM to newest commit. 1. Remove the useless func get_state_cls, it has been removed from vLLM already. https://github.com/vllm-project/vllm/commit/e6750d0b18e07631bb2ea7f256f7dd444d4936fa 2. Fix ut broken by https://github.com/vllm-project/vllm/commit/6160ba4151084c78164a0f472ce4da04067f9705 - vLLM version: v0.10.2 - vLLM main: https://github.com/vllm-project/vllm/commit/b1068903fdca26cf6b4a1a51a32c3365ce3ac636 --------- Signed-off-by: wangxiyuan --- .github/workflows/format_pr_body.yaml | 2 +- .github/workflows/vllm_ascend_test.yaml | 6 +++--- .github/workflows/vllm_ascend_test_full.yaml | 2 +- tests/ut/attention/test_attention_v1.py | 7 +------ tests/ut/ops/test_fused_ops.py | 2 ++ tests/ut/torchair/ops/test_torchair_fused_moe.py | 2 ++ vllm_ascend/attention/attention_v1.py | 5 ----- 7 files changed, 10 insertions(+), 16 deletions(-) diff --git a/.github/workflows/format_pr_body.yaml b/.github/workflows/format_pr_body.yaml index b053c51..7fc23ee 100644 --- a/.github/workflows/format_pr_body.yaml +++ b/.github/workflows/format_pr_body.yaml @@ -36,7 +36,7 @@ jobs: - name: Get vLLM version run: | - VLLM_COMMIT=b1068903fdca26cf6b4a1a51a32c3365ce3ac636 + VLLM_COMMIT=52d0cb845866869d587fc013a7c59e60a86ebcf2 echo "VLLM_COMMIT=https://github.com/vllm-project/vllm/commit/$VLLM_COMMIT" >> $GITHUB_ENV - name: Checkout repository diff --git a/.github/workflows/vllm_ascend_test.yaml b/.github/workflows/vllm_ascend_test.yaml index 49052b5..d1e1af5 100644 --- a/.github/workflows/vllm_ascend_test.yaml +++ b/.github/workflows/vllm_ascend_test.yaml @@ -42,7 +42,7 @@ jobs: lint: uses: ./.github/workflows/pre-commit.yml with: - vllm: b1068903fdca26cf6b4a1a51a32c3365ce3ac636 + vllm: 52d0cb845866869d587fc013a7c59e60a86ebcf2 changes: runs-on: ubuntu-latest @@ -83,7 +83,7 @@ jobs: VLLM_USE_MODELSCOPE: True strategy: matrix: - vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2] + vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2] steps: - name: Install packages run: | @@ -138,7 +138,7 @@ jobs: name: e2e-light strategy: matrix: - vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2] + vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2] # Note (yikun): If CI resource are limited we can split job into two chain jobs needs: [lint, changes] # only trigger e2e test after lint passed and the change is e2e related with pull request. diff --git a/.github/workflows/vllm_ascend_test_full.yaml b/.github/workflows/vllm_ascend_test_full.yaml index d5144cc..1d628dd 100644 --- a/.github/workflows/vllm_ascend_test_full.yaml +++ b/.github/workflows/vllm_ascend_test_full.yaml @@ -68,7 +68,7 @@ jobs: name: e2e-full strategy: matrix: - vllm_version: [b1068903fdca26cf6b4a1a51a32c3365ce3ac636, v0.10.2] + vllm_version: [52d0cb845866869d587fc013a7c59e60a86ebcf2, v0.10.2] needs: [changes] if: ${{ needs.changes.outputs.e2e_tracker == 'true' }} uses: ./.github/workflows/_e2e_test.yaml diff --git a/tests/ut/attention/test_attention_v1.py b/tests/ut/attention/test_attention_v1.py index b286f81..d553637 100644 --- a/tests/ut/attention/test_attention_v1.py +++ b/tests/ut/attention/test_attention_v1.py @@ -7,8 +7,7 @@ from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend, AscendAttentionBackendImpl, AscendAttentionMetadataBuilder, AscendAttentionState, - AscendMetadata, - CommonAttentionState) + AscendMetadata) from vllm_ascend.attention.utils import AscendCommonAttentionMetadata @@ -25,10 +24,6 @@ class TestAscendAttentionBackend(TestBase): self.assertEqual(AscendAttentionBackend.get_metadata_cls(), AscendMetadata) - def test_get_state_cls(self): - self.assertEqual(AscendAttentionBackend.get_state_cls(), - CommonAttentionState) - def test_get_builder_cls(self): self.assertEqual(AscendAttentionBackend.get_builder_cls(), AscendAttentionMetadataBuilder) diff --git a/tests/ut/ops/test_fused_ops.py b/tests/ut/ops/test_fused_ops.py index a91fe5b..19c6c96 100644 --- a/tests/ut/ops/test_fused_ops.py +++ b/tests/ut/ops/test_fused_ops.py @@ -201,6 +201,8 @@ def default_moe_config(): def moe_method(mock_dist_env): moe = MagicMock() moe.moe_parallel_config.return_value = MagicMock(ep_size=4) + moe.moe_parallel_config.use_ep = False + moe.moe_parallel_config.dp_size = 1 return AscendUnquantizedFusedMoEMethod(moe) diff --git a/tests/ut/torchair/ops/test_torchair_fused_moe.py b/tests/ut/torchair/ops/test_torchair_fused_moe.py index 155ee78..a550a67 100644 --- a/tests/ut/torchair/ops/test_torchair_fused_moe.py +++ b/tests/ut/torchair/ops/test_torchair_fused_moe.py @@ -153,6 +153,8 @@ def default_moe_config(): def moe_method(mock_dist_env): moe = MagicMock() moe.moe_parallel_config.return_value = MagicMock(ep_size=4) + moe.moe_parallel_config.use_ep = False + moe.moe_parallel_config.dp_size = 1 return TorchairAscendUnquantizedFusedMoEMethod(moe) diff --git a/vllm_ascend/attention/attention_v1.py b/vllm_ascend/attention/attention_v1.py index 963a947..d289bb4 100644 --- a/vllm_ascend/attention/attention_v1.py +++ b/vllm_ascend/attention/attention_v1.py @@ -24,7 +24,6 @@ import torch.nn as nn import torch_npu from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, AttentionLayer, AttentionType) -from vllm.attention.backends.utils import CommonAttentionState from vllm.config import VllmConfig from vllm.forward_context import ForwardContext, get_forward_context from vllm.utils import cdiv, direct_register_custom_op @@ -56,10 +55,6 @@ class AscendAttentionBackend(AttentionBackend): def get_metadata_cls() -> Type["AscendMetadata"]: return AscendMetadata - @staticmethod - def get_state_cls() -> Type["CommonAttentionState"]: - return CommonAttentionState - @staticmethod def get_builder_cls() -> type["AscendAttentionMetadataBuilder"]: return AscendAttentionMetadataBuilder