Drop vLLM 0.13.0 support (#6069)

### What this PR does / why we need it?
Drop vLLM 0.13.0 support, upgrade to 0.14.0

- vLLM version: v0.13.0
- vLLM main:
d68209402d

---------

Signed-off-by: hfadzxy <starmoon_zhang@163.com>
This commit is contained in:
zhangxinyuehfad
2026-01-23 09:45:08 +08:00
committed by GitHub
parent 27a513b672
commit 819a4459ce
39 changed files with 86 additions and 272 deletions

View File

@@ -17,7 +17,6 @@ from vllm_ascend.attention.mla_v1 import (AscendMLABackend,
AscendMLAPrefillMetadata,
ChunkedContextMetadata)
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
from vllm_ascend.utils import vllm_version_is
class TestAscendMLABackend(TestBase):
@@ -477,10 +476,7 @@ class TestAscendMLAMetadataBuilderBuild(TestBase):
self.mock_vllm_config.model_config = model_config
self.kv_cache_spec = MagicMock()
self.kv_cache_spec.num_layers = 32
if vllm_version_is('0.13.0'):
self.kv_cache_spec.head_size = 128
else:
self.kv_cache_spec.head_size = 64
self.kv_cache_spec.head_size = 64
self.kv_cache_spec.num_heads = 32
def tearDown(self):

View File

@@ -5,17 +5,11 @@ import pytest
import torch
from vllm.config.compilation import CompilationMode, CUDAGraphMode
from vllm.platforms import PlatformEnum
from vllm.v1.attention.selector import AttentionSelectorConfig # type: ignore
from tests.ut.base import TestBase
from vllm_ascend.platform import NPUPlatform
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD, AscendDeviceType, vllm_version_is
# isort: off
if vllm_version_is("0.13.0"):
from vllm.attention.selector import AttentionSelectorConfig # type: ignore
else:
from vllm.v1.attention.selector import AttentionSelectorConfig # type: ignore
# isort: on
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD, AscendDeviceType
class TestNPUPlatform(TestBase):

View File

@@ -5,7 +5,6 @@ import torch
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, ProfilerConfig, VllmConfig
from tests.ut.base import TestBase
from vllm_ascend.utils import vllm_version_is
init_cached_hf_modules_path = "vllm.utils.import_utils.init_cached_hf_modules"
@@ -141,10 +140,7 @@ class TestNPUWorker(TestBase):
)
# Verify init_cached_hf_modules is called (trust_remote_code=True)
if vllm_version_is('0.13.0'):
mock_init_cached_hf_modules.assert_called_once()
else:
mock_init_cached_hf_modules.assert_not_called()
mock_init_cached_hf_modules.assert_not_called()
@patch("vllm_ascend.utils.adapt_patch")
@patch("vllm_ascend.ops")