Drop 0.12.0 support (#5146)

We decided to release v0.13.0 soon. So no need to support 0.12.0 now.
Let's drop it.

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-12-20 09:38:53 +08:00
committed by GitHub
parent 243ab7d720
commit 758d81dcb1
21 changed files with 63 additions and 149 deletions

View File

@@ -3,6 +3,7 @@ from unittest.mock import MagicMock, patch
import pytest
import torch
from vllm.attention.selector import AttentionSelectorConfig
from vllm.config.compilation import CompilationMode, CUDAGraphMode
from vllm.platforms import PlatformEnum
@@ -484,28 +485,30 @@ class TestNPUPlatform(TestBase):
self.assertEqual(vllm_config.compilation_config.custom_ops, [])
def test_get_attn_backend_cls_use_v1_and_mla(self):
result = self.platform.get_attn_backend_cls(
selected_backend="ascend",
head_size=64,
dtype="float16",
kv_cache_dtype="float16",
block_size=64,
use_sparse=False,
attn_selector_config = AttentionSelectorConfig(
dtype=torch.float16,
head_size=0,
kv_cache_dtype=None,
block_size=128,
use_mla=True,
use_sparse=False,
)
result = self.platform.get_attn_backend_cls("ascend",
attn_selector_config)
self.assertEqual(result,
"vllm_ascend.attention.mla_v1.AscendMLABackend")
def test_get_attn_backend_cls_use_v1_only(self):
result = self.platform.get_attn_backend_cls(
selected_backend="ascend",
head_size=64,
dtype="float16",
kv_cache_dtype="float16",
block_size=64,
use_sparse=False,
attn_selector_config = AttentionSelectorConfig(
dtype=torch.float16,
head_size=0,
kv_cache_dtype=None,
block_size=128,
use_mla=False,
use_sparse=False,
)
result = self.platform.get_attn_backend_cls("ascend",
attn_selector_config)
self.assertEqual(
result,
"vllm_ascend.attention.attention_v1.AscendAttentionBackend")