diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 2eabac1b8..756c17811 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -68,6 +68,7 @@ class ServerArgs: # Port for the HTTP server host: str = "127.0.0.1" port: int = 30000 + nccl_port: Optional[int] = None # Memory and scheduling mem_fraction_static: Optional[float] = None @@ -599,6 +600,12 @@ class ServerArgs: default=ServerArgs.port, help="The port of the HTTP server.", ) + parser.add_argument( + "--nccl-port", + type=int, + default=ServerArgs.nccl_port, + help="The port for NCCL distributed environment setup. Defaults to a random port.", + ) parser.add_argument( "--tokenizer-mode", type=str, @@ -1747,14 +1754,17 @@ class PortArgs: @staticmethod def init_new(server_args, dp_rank: Optional[int] = None) -> "PortArgs": - port = server_args.port + random.randint(100, 1000) - while True: - if is_port_available(port): - break - if port < 60000: - port += 42 - else: - port -= 43 + if server_args.nccl_port is None: + port = server_args.port + random.randint(100, 1000) + while True: + if is_port_available(port): + break + if port < 60000: + port += 42 + else: + port -= 43 + else: + port = server_args.nccl_port if not server_args.enable_dp_attention: # Normal case, use IPC within a single node