Upgrade to vllm 0.17.0 corex v4.1 overlay
This commit is contained in:
@@ -14,7 +14,6 @@ import vllm.envs as envs
|
||||
from vllm.logger import init_logger
|
||||
from vllm.utils.network_utils import get_distributed_init_method, get_ip, get_open_port
|
||||
from vllm.v1.core.sched.output import GrammarOutput, SchedulerOutput
|
||||
from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType
|
||||
from vllm.v1.executor.abstract import Executor
|
||||
from vllm.v1.outputs import AsyncModelRunnerOutput, DraftTokenIds, ModelRunnerOutput
|
||||
from vllm.v1.serial_utils import run_method
|
||||
@@ -43,9 +42,11 @@ class UniProcExecutor(Executor):
|
||||
max_workers=1, thread_name_prefix="WorkerAsyncOutput"
|
||||
)
|
||||
|
||||
is_eep_new_worker = envs.VLLM_ELASTIC_EP_SCALE_UP_LAUNCH
|
||||
self.driver_worker.init_worker(all_kwargs=[kwargs])
|
||||
self.driver_worker.init_device()
|
||||
self.driver_worker.load_model()
|
||||
if not is_eep_new_worker:
|
||||
self.driver_worker.init_device()
|
||||
self.driver_worker.load_model()
|
||||
|
||||
def _distributed_args(self) -> tuple[str, int, int]:
|
||||
"""Return (distributed_init_method, rank, local_rank)."""
|
||||
@@ -122,16 +123,6 @@ class UniProcExecutor(Executor):
|
||||
# it's running.
|
||||
return
|
||||
|
||||
def reinitialize_distributed(
|
||||
self, reconfig_request: ReconfigureDistributedRequest
|
||||
) -> None:
|
||||
self.driver_worker.reinitialize_distributed(reconfig_request)
|
||||
if (
|
||||
reconfig_request.new_data_parallel_rank
|
||||
== ReconfigureRankType.SHUTDOWN_CURRENT_RANK
|
||||
):
|
||||
self.shutdown()
|
||||
|
||||
def shutdown(self) -> None:
|
||||
if worker := self.driver_worker:
|
||||
worker.shutdown()
|
||||
|
||||
Reference in New Issue
Block a user