Drop 0.11.0 support (#4377)
There is a lot hack code for v0.11.0, which makes the code hard to
upgrade to newer vLLM version. Since v0.11.0 will release soon. Let's
drop v0.11.0 support first. Then we'll upgrade to v0.11.2 soon.
- vLLM version: v0.11.0
- vLLM main:
2918c1b49c
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -10,17 +10,12 @@ import vllm.envs as envs
|
||||
import zmq
|
||||
from vllm.config import KVTransferConfig, VllmConfig
|
||||
from vllm.utils import logger
|
||||
from vllm.utils.network_utils import make_zmq_socket
|
||||
from vllm.utils.torch_utils import get_dtype_size
|
||||
from vllm.v1.kv_cache_interface import AttentionSpec
|
||||
|
||||
from vllm_ascend.distributed.cpu_offload_manager.cpu_kv_cache_manager import \
|
||||
CPUKVCacheManager
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import get_dtype_size, make_zmq_socket
|
||||
else:
|
||||
from vllm.utils.network_utils import make_zmq_socket
|
||||
from vllm.utils.torch_utils import get_dtype_size
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -33,17 +33,13 @@ from vllm.v1.request import Request, RequestStatus
|
||||
import vllm_ascend.envs as envs_ascend
|
||||
from vllm_ascend.distributed.utils import get_transfer_timeout_value
|
||||
from vllm_ascend.utils import (AscendSocVersion, get_ascend_soc_version,
|
||||
prefill_context_parallel_enable,
|
||||
vllm_version_is)
|
||||
prefill_context_parallel_enable)
|
||||
|
||||
if prefill_context_parallel_enable():
|
||||
from vllm.distributed.parallel_state import \
|
||||
get_prefill_context_model_parallel_rank
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import get_ip
|
||||
else:
|
||||
from vllm.utils.network_utils import get_ip
|
||||
from vllm.utils.network_utils import get_ip
|
||||
|
||||
TORCH_DTYPE_TO_NPU_DTYPE = {
|
||||
torch.half: llm_datadist.DataType.DT_FLOAT16,
|
||||
|
||||
@@ -10,14 +10,7 @@ import torch
|
||||
from vllm.distributed.kv_transfer.kv_connector.v1.base import \
|
||||
KVConnectorMetadata
|
||||
from vllm.utils import logger
|
||||
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import cdiv
|
||||
else:
|
||||
from vllm.utils.math_utils import cdiv
|
||||
|
||||
from vllm.utils.math_utils import cdiv
|
||||
from vllm.v1.core.sched.output import NewRequestData
|
||||
|
||||
DEFAULT_GLOBAL_SEGMENT_SIZE = 3355443200 # 3.125 GiB
|
||||
|
||||
@@ -8,6 +8,7 @@ from typing import Generator, List, Optional, Union
|
||||
import torch
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.utils import logger
|
||||
from vllm.utils.torch_utils import get_kv_cache_torch_dtype
|
||||
|
||||
from vllm_ascend.distributed.mooncake.config_data import (
|
||||
ChunkedTokenDatabase, LasyerMultiBlockReqMeta, MooncakeConnectorMetadata,
|
||||
@@ -16,12 +17,6 @@ from vllm_ascend.distributed.mooncake.kv_transfer import (
|
||||
KVCacheStoreLayerRecvingThread, KVCacheStoreLayerSendingThread,
|
||||
KVCacheStoreRecvingThread, KVCacheStoreSendingThread, KVTransferThread)
|
||||
from vllm_ascend.distributed.mooncake.mooncake_store import Mooncakestore
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import get_kv_cache_torch_dtype
|
||||
else:
|
||||
from vllm.utils.torch_utils import get_kv_cache_torch_dtype
|
||||
|
||||
|
||||
class MooncakeEngine:
|
||||
|
||||
@@ -6,18 +6,13 @@ from mooncake.store import ReplicateConfig # type: ignore
|
||||
from vllm.config import ParallelConfig
|
||||
from vllm.distributed.parallel_state import get_tensor_model_parallel_rank
|
||||
from vllm.utils import logger
|
||||
from vllm.utils.network_utils import get_ip
|
||||
|
||||
from vllm_ascend.distributed.mooncake.config_data import MooncakeEngineKey
|
||||
from vllm_ascend.distributed.mooncake.transfer_engine import get_global_te
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
from .config_data import MooncakeStoreConfig
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import get_ip
|
||||
else:
|
||||
from vllm.utils.network_utils import get_ip
|
||||
|
||||
METADATA_BYTES_LEN = 24
|
||||
BASE_PORT = int(os.getenv("VLLM_BASE_PORT", "8790"))
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
||||
KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole)
|
||||
from vllm.forward_context import ForwardContext
|
||||
from vllm.utils import logger
|
||||
from vllm.utils.network_utils import make_zmq_socket
|
||||
from vllm.v1.core.kv_cache_manager import KVCacheBlocks
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
from vllm.v1.request import Request
|
||||
@@ -18,12 +19,6 @@ from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder
|
||||
from vllm_ascend.distributed.mooncake.config_data import (
|
||||
LoadSpec, MooncakeConnectorMetadata, ReqMeta, RequestTracker)
|
||||
from vllm_ascend.distributed.mooncake.mooncake_engine import MooncakeEngine
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import make_zmq_socket
|
||||
else:
|
||||
from vllm.utils.network_utils import make_zmq_socket
|
||||
|
||||
|
||||
class MooncakeConnectorV1(KVConnectorBase_V1):
|
||||
|
||||
@@ -37,7 +37,7 @@ import vllm_ascend.envs as envs_ascend
|
||||
from vllm_ascend.ascend_config import get_ascend_config, init_ascend_config
|
||||
from vllm_ascend.distributed.mooncake.transfer_engine import get_global_te
|
||||
from vllm_ascend.distributed.utils import get_transfer_timeout_value
|
||||
from vllm_ascend.utils import prefill_context_parallel_enable, vllm_version_is
|
||||
from vllm_ascend.utils import prefill_context_parallel_enable
|
||||
|
||||
# isort: off
|
||||
if prefill_context_parallel_enable():
|
||||
@@ -46,10 +46,7 @@ if prefill_context_parallel_enable():
|
||||
)
|
||||
# isort: on
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import get_ip, make_zmq_path, make_zmq_socket
|
||||
else:
|
||||
from vllm.utils.network_utils import get_ip, make_zmq_path, make_zmq_socket
|
||||
from vllm.utils.network_utils import get_ip, make_zmq_path, make_zmq_socket
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.attention.backends.abstract import AttentionMetadata
|
||||
|
||||
@@ -28,6 +28,7 @@ from vllm.distributed.kv_transfer.kv_connector.v1.base import (
|
||||
from vllm.distributed.parallel_state import (get_tensor_model_parallel_rank,
|
||||
get_tp_group, get_world_group)
|
||||
from vllm.utils import logger
|
||||
from vllm.utils.network_utils import get_ip, make_zmq_path, make_zmq_socket
|
||||
from vllm.v1.core.sched.output import SchedulerOutput
|
||||
|
||||
import vllm_ascend.envs as envs_ascend
|
||||
@@ -35,12 +36,6 @@ from vllm_ascend.ascend_config import get_ascend_config
|
||||
from vllm_ascend.distributed.utils import (align_memory,
|
||||
get_transfer_timeout_value,
|
||||
kv_alltoall_and_rearrange)
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import get_ip, make_zmq_path, make_zmq_socket
|
||||
else:
|
||||
from vllm.utils.network_utils import get_ip, make_zmq_path, make_zmq_socket
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from vllm.attention.backends.abstract import AttentionMetadata
|
||||
|
||||
Reference in New Issue
Block a user