[RL] Add --nccl-port to prevent port conflict (#7418)
This commit is contained in:
@@ -68,6 +68,7 @@ class ServerArgs:
|
|||||||
# Port for the HTTP server
|
# Port for the HTTP server
|
||||||
host: str = "127.0.0.1"
|
host: str = "127.0.0.1"
|
||||||
port: int = 30000
|
port: int = 30000
|
||||||
|
nccl_port: Optional[int] = None
|
||||||
|
|
||||||
# Memory and scheduling
|
# Memory and scheduling
|
||||||
mem_fraction_static: Optional[float] = None
|
mem_fraction_static: Optional[float] = None
|
||||||
@@ -599,6 +600,12 @@ class ServerArgs:
|
|||||||
default=ServerArgs.port,
|
default=ServerArgs.port,
|
||||||
help="The port of the HTTP server.",
|
help="The port of the HTTP server.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--nccl-port",
|
||||||
|
type=int,
|
||||||
|
default=ServerArgs.nccl_port,
|
||||||
|
help="The port for NCCL distributed environment setup. Defaults to a random port.",
|
||||||
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--tokenizer-mode",
|
"--tokenizer-mode",
|
||||||
type=str,
|
type=str,
|
||||||
@@ -1747,6 +1754,7 @@ class PortArgs:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
|
def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs":
|
||||||
|
if server_args.nccl_port is None:
|
||||||
port = server_args.port + random.randint(100, 1000)
|
port = server_args.port + random.randint(100, 1000)
|
||||||
while True:
|
while True:
|
||||||
if is_port_available(port):
|
if is_port_available(port):
|
||||||
@@ -1755,6 +1763,8 @@ class PortArgs:
|
|||||||
port += 42
|
port += 42
|
||||||
else:
|
else:
|
||||||
port -= 43
|
port -= 43
|
||||||
|
else:
|
||||||
|
port = server_args.nccl_port
|
||||||
|
|
||||||
if not server_args.enable_dp_attention:
|
if not server_args.enable_dp_attention:
|
||||||
# Normal case, use IPC within a single node
|
# Normal case, use IPC within a single node
|
||||||
|
|||||||
Reference in New Issue
Block a user