Drop vLLM 0.13.0 support (#6069)
### What this PR does / why we need it?
Drop vLLM 0.13.0 support, upgrade to 0.14.0
- vLLM version: v0.13.0
- vLLM main:
d68209402d
---------
Signed-off-by: hfadzxy <starmoon_zhang@163.com>
This commit is contained in:
@@ -43,14 +43,11 @@ from vllm.v1.request import RequestStatus
|
||||
from vllm_ascend.ascend_config import get_ascend_config, init_ascend_config
|
||||
from vllm_ascend.distributed.kv_transfer.utils.mooncake_transfer_engine import global_te
|
||||
from vllm_ascend.distributed.kv_transfer.utils.utils import get_transfer_timeout_value
|
||||
from vllm_ascend.utils import is_vl_model, vllm_version_is
|
||||
from vllm_ascend.utils import is_vl_model
|
||||
|
||||
# isort: off
|
||||
if TYPE_CHECKING:
|
||||
if vllm_version_is('0.13.0'):
|
||||
from vllm.attention.backends.abstract import AttentionMetadata # type: ignore
|
||||
else:
|
||||
from vllm.attention.backends import AttentionMetadata # type: ignore
|
||||
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
||||
from vllm.forward_context import ForwardContext
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||
from vllm.v1.request import Request
|
||||
|
||||
@@ -38,14 +38,11 @@ from vllm_ascend.distributed.kv_transfer.utils.mooncake_transfer_engine import \
|
||||
global_te
|
||||
from vllm_ascend.distributed.kv_transfer.utils.utils import (
|
||||
align_memory, get_transfer_timeout_value, kv_alltoall_and_rearrange)
|
||||
from vllm_ascend.utils import npu_stream_switch, vllm_version_is
|
||||
from vllm_ascend.utils import npu_stream_switch
|
||||
|
||||
# isort: off
|
||||
if TYPE_CHECKING:
|
||||
if vllm_version_is('0.13.0'):
|
||||
from vllm.attention.backends.abstract import AttentionMetadata # type: ignore
|
||||
else:
|
||||
from vllm.attention.backends import AttentionMetadata # type: ignore
|
||||
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
||||
from vllm.forward_context import ForwardContext
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||
from vllm.v1.request import Request
|
||||
|
||||
@@ -9,6 +9,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
||||
from vllm.forward_context import ForwardContext
|
||||
from vllm.logger import logger
|
||||
from vllm.utils.network_utils import make_zmq_socket
|
||||
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
from vllm.v1.kv_cache_interface import KVCacheConfig
|
||||
@@ -19,14 +20,6 @@ from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.pool_scheduler imp
|
||||
KVPoolScheduler, get_zmq_rpc_path_lookup)
|
||||
from vllm_ascend.distributed.kv_transfer.kv_pool.ascend_store.pool_worker import \
|
||||
KVPoolWorker
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
# isort: off
|
||||
if vllm_version_is('0.13.0'):
|
||||
from vllm.attention.backends.abstract import AttentionMetadata # type: ignore
|
||||
else:
|
||||
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
||||
# isort: on
|
||||
|
||||
|
||||
class AscendStoreConnector(KVConnectorBase_V1):
|
||||
|
||||
@@ -24,25 +24,14 @@ from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheSpec
|
||||
|
||||
from vllm_ascend.distributed.kv_transfer.kv_pool.cpu_offload.metadata import (
|
||||
MetadataServer, MetadataServerProc, MLAConfig)
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
# isort: off
|
||||
if vllm_version_is('0.13.0'):
|
||||
from vllm.attention.backends.abstract import AttentionType # type: ignore
|
||||
else:
|
||||
from vllm.v1.attention.backend import AttentionType # type: ignore
|
||||
|
||||
if TYPE_CHECKING:
|
||||
if vllm_version_is('0.13.0'):
|
||||
from vllm.attention.backends.abstract import \
|
||||
AttentionMetadata # type: ignore
|
||||
else:
|
||||
from vllm.v1.attention.backend import AttentionType #type: ignore
|
||||
from vllm.v1.attention.backend import AttentionMetadata #type: ignore
|
||||
from vllm.forward_context import ForwardContext
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||
from vllm.v1.kv_cache_interface import KVCacheConfig
|
||||
from vllm.v1.request import Request
|
||||
# isort: on
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -9,16 +9,12 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
||||
from vllm.logger import init_logger
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
logger = init_logger(__name__)
|
||||
|
||||
# isort: off
|
||||
if TYPE_CHECKING:
|
||||
if vllm_version_is('0.13.0'):
|
||||
from vllm.attention.backends.abstract import AttentionMetadata # type: ignore
|
||||
else:
|
||||
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
||||
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.metrics import (
|
||||
KVConnectorPromMetrics, KVConnectorStats, PromMetric, PromMetricT)
|
||||
from vllm.forward_context import ForwardContext
|
||||
|
||||
Reference in New Issue
Block a user