[Refactor] remove some metadata variables in attention_v1. (#5160)
RFC: https://github.com/vllm-project/vllm-ascend/issues/4629
Reason:
The metadata data class contains an excessive number of variables. We
will inherit the metadata of the community and simultaneously remove
some variables that are no longer needed at present.
Todo:
1. remove attn_state partly.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: weijinqian_v1 <weijinqian@huawei.com>
Co-authored-by: weijinqian_v1 <weijinqian@huawei.com>
This commit is contained in:
@@ -23,6 +23,7 @@ from vllm_ascend import envs
|
||||
from vllm_ascend.ascend_config import get_ascend_config
|
||||
from vllm_ascend.attention.attention_v1 import AscendAttentionState
|
||||
from vllm_ascend.attention.utils import (AscendCommonAttentionMetadata,
|
||||
enable_cp,
|
||||
maybe_save_kv_layer_to_connector,
|
||||
split_decodes_and_prefills,
|
||||
trans_rope_weight, transdata,
|
||||
@@ -57,8 +58,7 @@ class AscendMLABackend(AttentionBackend):
|
||||
|
||||
@staticmethod
|
||||
def get_builder_cls():
|
||||
prefill_config = get_current_vllm_config().parallel_config
|
||||
if prefill_config.prefill_context_parallel_size > 1 or prefill_config.decode_context_parallel_size > 1:
|
||||
if enable_cp():
|
||||
from vllm_ascend.attention.mla_cp import AscendMlaCPMetadataBuilder
|
||||
return AscendMlaCPMetadataBuilder
|
||||
return AscendMLAMetadataBuilder
|
||||
@@ -70,8 +70,7 @@ class AscendMLABackend(AttentionBackend):
|
||||
|
||||
@staticmethod
|
||||
def get_impl_cls() -> Type["MLAAttentionImpl"]:
|
||||
prefill_config = get_current_vllm_config().parallel_config
|
||||
if prefill_config.prefill_context_parallel_size > 1 or prefill_config.decode_context_parallel_size > 1:
|
||||
if enable_cp():
|
||||
from vllm_ascend.attention.mla_cp import AscendMlaCPImpl
|
||||
return AscendMlaCPImpl
|
||||
return AscendMLAImpl
|
||||
|
||||
Reference in New Issue
Block a user