[Misc][V0 Deprecation] Remove V0 Attention (#1835)

### What this PR does / why we need it? This PR is a part of https://github.com/vllm-project/vllm-ascend/issues/1620. - vLLM version: v0.9.2 - vLLM main: 8dfb45ca33 Signed-off-by: shen-shanshan <467638484@qq.com>
2025-07-18 14:10:13 +08:00
parent 33ef5dc813
commit d08ff304cd
2 changed files with 2 additions and 1229 deletions
--- a/vllm_ascend/attention/attention.py
+++ b/vllm_ascend/attention/attention.py
--- a/vllm_ascend/attention/mla_v1.py
+++ b/vllm_ascend/attention/mla_v1.py
@@ -15,7 +15,6 @@ from vllm.model_executor.layers.linear import (LinearBase,
 from vllm.utils import cdiv, round_down

 from vllm_ascend.ascend_config import get_ascend_config
-from vllm_ascend.attention.attention import _ALLOWED_NUM_QUERIES_PER_KV
 from vllm_ascend.attention.attention_v1 import AscendAttentionState
 from vllm_ascend.multistream.base import MSAttentionMetadataSplitConfig
 from vllm_ascend.multistream.context import get_multistream_comm_context
@@ -27,6 +26,8 @@ from vllm_ascend.worker.npu_input_batch import InputBatch
 if TYPE_CHECKING:
    from vllm.v1.core.sched.output import SchedulerOutput

+_ALLOWED_NUM_QUERIES_PER_KV = [32, 64, 128]
+

@dataclass
 class CommonAttentionMetadata: