[Misc][V0 Deprecation] Remove V0 Attention (#1835)

### What this PR does / why we need it?
This PR is a part of
https://github.com/vllm-project/vllm-ascend/issues/1620.

- vLLM version: v0.9.2
- vLLM main:
8dfb45ca33

Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
Shanshan Shen
2025-07-18 14:10:13 +08:00
committed by GitHub
parent 33ef5dc813
commit d08ff304cd
2 changed files with 2 additions and 1229 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -15,7 +15,6 @@ from vllm.model_executor.layers.linear import (LinearBase,
from vllm.utils import cdiv, round_down
from vllm_ascend.ascend_config import get_ascend_config
from vllm_ascend.attention.attention import _ALLOWED_NUM_QUERIES_PER_KV
from vllm_ascend.attention.attention_v1 import AscendAttentionState
from vllm_ascend.multistream.base import MSAttentionMetadataSplitConfig
from vllm_ascend.multistream.context import get_multistream_comm_context
@@ -27,6 +26,8 @@ from vllm_ascend.worker.npu_input_batch import InputBatch
if TYPE_CHECKING:
from vllm.v1.core.sched.output import SchedulerOutput
_ALLOWED_NUM_QUERIES_PER_KV = [32, 64, 128]
@dataclass
class CommonAttentionMetadata: