Drop vLLM 0.13.0 support (#6069)
### What this PR does / why we need it?
Drop vLLM 0.13.0 support, upgrade to 0.14.0
- vLLM version: v0.13.0
- vLLM main:
d68209402d
---------
Signed-off-by: hfadzxy <starmoon_zhang@163.com>
This commit is contained in:
@@ -17,7 +17,6 @@ from vllm_ascend.attention.mla_v1 import (AscendMLABackend,
|
||||
AscendMLAPrefillMetadata,
|
||||
ChunkedContextMetadata)
|
||||
from vllm_ascend.attention.utils import AscendCommonAttentionMetadata
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
|
||||
class TestAscendMLABackend(TestBase):
|
||||
@@ -477,10 +476,7 @@ class TestAscendMLAMetadataBuilderBuild(TestBase):
|
||||
self.mock_vllm_config.model_config = model_config
|
||||
self.kv_cache_spec = MagicMock()
|
||||
self.kv_cache_spec.num_layers = 32
|
||||
if vllm_version_is('0.13.0'):
|
||||
self.kv_cache_spec.head_size = 128
|
||||
else:
|
||||
self.kv_cache_spec.head_size = 64
|
||||
self.kv_cache_spec.head_size = 64
|
||||
self.kv_cache_spec.num_heads = 32
|
||||
|
||||
def tearDown(self):
|
||||
|
||||
@@ -5,17 +5,11 @@ import pytest
|
||||
import torch
|
||||
from vllm.config.compilation import CompilationMode, CUDAGraphMode
|
||||
from vllm.platforms import PlatformEnum
|
||||
from vllm.v1.attention.selector import AttentionSelectorConfig # type: ignore
|
||||
|
||||
from tests.ut.base import TestBase
|
||||
from vllm_ascend.platform import NPUPlatform
|
||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD, AscendDeviceType, vllm_version_is
|
||||
|
||||
# isort: off
|
||||
if vllm_version_is("0.13.0"):
|
||||
from vllm.attention.selector import AttentionSelectorConfig # type: ignore
|
||||
else:
|
||||
from vllm.v1.attention.selector import AttentionSelectorConfig # type: ignore
|
||||
# isort: on
|
||||
from vllm_ascend.utils import ASCEND_QUANTIZATION_METHOD, COMPRESSED_TENSORS_METHOD, AscendDeviceType
|
||||
|
||||
|
||||
class TestNPUPlatform(TestBase):
|
||||
|
||||
@@ -5,7 +5,6 @@ import torch
|
||||
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, ProfilerConfig, VllmConfig
|
||||
|
||||
from tests.ut.base import TestBase
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
init_cached_hf_modules_path = "vllm.utils.import_utils.init_cached_hf_modules"
|
||||
|
||||
@@ -141,10 +140,7 @@ class TestNPUWorker(TestBase):
|
||||
)
|
||||
|
||||
# Verify init_cached_hf_modules is called (trust_remote_code=True)
|
||||
if vllm_version_is('0.13.0'):
|
||||
mock_init_cached_hf_modules.assert_called_once()
|
||||
else:
|
||||
mock_init_cached_hf_modules.assert_not_called()
|
||||
mock_init_cached_hf_modules.assert_not_called()
|
||||
|
||||
@patch("vllm_ascend.utils.adapt_patch")
|
||||
@patch("vllm_ascend.ops")
|
||||
|
||||
Reference in New Issue
Block a user