Drop vLLM 0.13.0 support (#6069)

### What this PR does / why we need it?
Drop vLLM 0.13.0 support, upgrade to 0.14.0

- vLLM version: v0.13.0
- vLLM main:
d68209402d

---------

Signed-off-by: hfadzxy <starmoon_zhang@163.com>
This commit is contained in:
zhangxinyuehfad
2026-01-23 09:45:08 +08:00
committed by GitHub
parent 27a513b672
commit 819a4459ce
39 changed files with 86 additions and 272 deletions

View File

@@ -43,14 +43,11 @@ from vllm.v1.request import RequestStatus
from vllm_ascend.ascend_config import get_ascend_config, init_ascend_config
from vllm_ascend.distributed.kv_transfer.utils.mooncake_transfer_engine import global_te
from vllm_ascend.distributed.kv_transfer.utils.utils import get_transfer_timeout_value
from vllm_ascend.utils import is_vl_model, vllm_version_is
from vllm_ascend.utils import is_vl_model
# isort: off
if TYPE_CHECKING:
if vllm_version_is('0.13.0'):
from vllm.attention.backends.abstract import AttentionMetadata # type: ignore
else:
from vllm.attention.backends import AttentionMetadata # type: ignore
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
from vllm.forward_context import ForwardContext
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.request import Request

View File

@@ -38,14 +38,11 @@ from vllm_ascend.distributed.kv_transfer.utils.mooncake_transfer_engine import \
global_te
from vllm_ascend.distributed.kv_transfer.utils.utils import (
align_memory, get_transfer_timeout_value, kv_alltoall_and_rearrange)
from vllm_ascend.utils import npu_stream_switch, vllm_version_is
from vllm_ascend.utils import npu_stream_switch
# isort: off
if TYPE_CHECKING:
if vllm_version_is('0.13.0'):
from vllm.attention.backends.abstract import AttentionMetadata # type: ignore
else:
from vllm.attention.backends import AttentionMetadata # type: ignore
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
from vllm.forward_context import ForwardContext
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.request import Request

View File

@@ -9,6 +9,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
from vllm.forward_context import ForwardContext
from vllm.logger import logger
from vllm.utils.network_utils import make_zmq_socket
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.core.sched.output import SchedulerOutput
from vllm.v1.kv_cache_interface import KVCacheConfig
@@ -19,14 +20,6 @@ from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.pool_scheduler imp
KVPoolScheduler, get_zmq_rpc_path_lookup)
from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.pool_worker import \
KVPoolWorker
from vllm_ascend.utils import vllm_version_is
# isort: off
if vllm_version_is('0.13.0'):
from vllm.attention.backends.abstract import AttentionMetadata # type: ignore
else:
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
# isort: on
class AscendStoreConnector(KVConnectorBase_V1):

View File

@@ -24,25 +24,14 @@ from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheSpec
from vllm_ascend.distributed.kv_transfer.kv_pool.cpu_offload.metadata import (
MetadataServer, MetadataServerProc, MLAConfig)
from vllm_ascend.utils import vllm_version_is
# isort: off
if vllm_version_is('0.13.0'):
from vllm.attention.backends.abstract import AttentionType # type: ignore
else:
from vllm.v1.attention.backend import AttentionType # type: ignore
if TYPE_CHECKING:
if vllm_version_is('0.13.0'):
from vllm.attention.backends.abstract import \
AttentionMetadata # type: ignore
else:
from vllm.v1.attention.backend import AttentionType #type: ignore
from vllm.v1.attention.backend import AttentionMetadata #type: ignore
from vllm.forward_context import ForwardContext
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
from vllm.v1.kv_cache_interface import KVCacheConfig
from vllm.v1.request import Request
# isort: on
@dataclass

View File

@@ -9,16 +9,12 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
from vllm.logger import init_logger
from vllm.v1.core.sched.output import SchedulerOutput
from vllm_ascend.utils import vllm_version_is
logger = init_logger(__name__)
# isort: off
if TYPE_CHECKING:
if vllm_version_is('0.13.0'):
from vllm.attention.backends.abstract import AttentionMetadata # type: ignore
else:
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
KVConnectorPromMetrics, KVConnectorStats, PromMetric, PromMetricT)
from vllm.forward_context import ForwardContext