[Misc][V0 Deprecation] Remove V0 related codes of test, example, platform (#1805)

### What this PR does / why we need it?
Remove V0 related codes of test, example, platform.

This PR is a part of
https://github.com/vllm-project/vllm-ascend/issues/1620.

- vLLM version: v0.9.2
- vLLM main:
235bfd5dfe

---------

Signed-off-by: shen-shanshan <467638484@qq.com>
This commit is contained in:
Shanshan Shen
2025-07-15 19:58:55 +08:00
committed by GitHub
parent a929699e98
commit f96100fad5
5 changed files with 10 additions and 460 deletions

View File

@@ -117,7 +117,7 @@ class NPUPlatform(Platform):
@classmethod
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
if not envs.VLLM_USE_V1:
raise ValueError("vLLM Ascend does not support V0 engine")
raise ValueError("vLLM Ascend does not support V0 engine.")
# initialize ascend config from vllm additional_config
ascend_config = init_ascend_config(vllm_config)
@@ -208,16 +208,16 @@ class NPUPlatform(Platform):
@classmethod
def get_attn_backend_cls(cls, selected_backend, head_size, dtype,
kv_cache_dtype, block_size, use_v1, use_mla):
if use_v1 and use_mla:
return "vllm_ascend.attention.mla_v1.AscendMLABackend"
if not use_v1:
raise ValueError("vLLM Ascend does not support V0 engine.")
use_torchair = get_ascend_config().torchair_graph_config.enabled
if use_v1 and use_torchair:
return "vllm_ascend.attention.attention_v1_torchair.AscendAttentionTorchairBackend"
if use_v1:
return "vllm_ascend.attention.attention_v1.AscendAttentionBackend"
if use_mla:
return "vllm_ascend.attention.attention.AscendMLAAttentionBackend"
return "vllm_ascend.attention.attention.AscendAttentionBackend"
return "vllm_ascend.attention.mla_v1.AscendMLABackend"
elif use_torchair:
return "vllm_ascend.attention.attention_v1_torchair.AscendAttentionTorchairBackend"
else:
return "vllm_ascend.attention.attention_v1.AscendAttentionBackend"
@classmethod
def get_punica_wrapper(cls) -> str: