[bugfix] some bugs maybe fail to run (#896)
### What this PR does / why we need it? Solve the bug that the graph mode is the same as p and d, and some other bugs. ### Does this PR introduce _any_ user-facing change? Wouldn't be ### How was this patch tested? Follow the end-to-end test Signed-off-by: ningbenzhe1 <ningbenzhe@huawei.com>
This commit is contained in:
@@ -20,6 +20,7 @@
|
||||
import torch
|
||||
import vllm
|
||||
import vllm.distributed
|
||||
import vllm.envs as envs
|
||||
from torch.distributed import ProcessGroup
|
||||
from torch.distributed.distributed_c10d import (Backend, PrefixStore,
|
||||
_get_default_timeout,
|
||||
@@ -164,10 +165,9 @@ def parallel_config_get_dp_port(self) -> int:
|
||||
"""
|
||||
answer = self.data_parallel_master_port
|
||||
self.data_parallel_master_port += 1
|
||||
import os
|
||||
|
||||
# NOTE: Get port from envs directly when using torchrun
|
||||
port = int(os.environ.get("MASTER_PORT", answer)) # type: ignore
|
||||
port = envs.VLLM_DP_MASTER_PORT if envs.VLLM_DP_MASTER_PORT else answer
|
||||
return port
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user