[Refactor] remove some metadata variables in attention_v1. (#5160)

RFC: https://github.com/vllm-project/vllm-ascend/issues/4629 Reason: The metadata data class contains an excessive number of variables. We will inherit the metadata of the community and simultaneously remove some variables that are no longer needed at present. Todo: 1. remove attn_state partly. - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: weijinqian_v1 <weijinqian@huawei.com> Co-authored-by: weijinqian_v1 <weijinqian@huawei.com>
2025-12-19 14:57:09 +08:00
parent bc05a81bf2
commit 35ad11b637
9 changed files with 41 additions and 53 deletions
--- a/vllm_ascend/attention/mla_v1.py
+++ b/vllm_ascend/attention/mla_v1.py
@@ -23,6 +23,7 @@ from vllm_ascend import envs
 from vllm_ascend.ascend_config import get_ascend_config
 from vllm_ascend.attention.attention_v1 import AscendAttentionState
 from vllm_ascend.attention.utils import (AscendCommonAttentionMetadata,
+                                         enable_cp,
                                         maybe_save_kv_layer_to_connector,
                                         split_decodes_and_prefills,
                                         trans_rope_weight, transdata,
@@ -57,8 +58,7 @@ class AscendMLABackend(AttentionBackend):

    @staticmethod
    def get_builder_cls():
-        prefill_config = get_current_vllm_config().parallel_config
-        if prefill_config.prefill_context_parallel_size > 1 or prefill_config.decode_context_parallel_size > 1:
+        if enable_cp():
            from vllm_ascend.attention.mla_cp import AscendMlaCPMetadataBuilder
            return AscendMlaCPMetadataBuilder
        return AscendMLAMetadataBuilder
@@ -70,8 +70,7 @@ class AscendMLABackend(AttentionBackend):

    @staticmethod
    def get_impl_cls() -> Type["MLAAttentionImpl"]:
-        prefill_config = get_current_vllm_config().parallel_config
-        if prefill_config.prefill_context_parallel_size > 1 or prefill_config.decode_context_parallel_size > 1:
+        if enable_cp():
            from vllm_ascend.attention.mla_cp import AscendMlaCPImpl
            return AscendMlaCPImpl
        return AscendMLAImpl