Better PD initialization (#5751)

This commit is contained in:
Liangsheng Yin
2025-05-07 01:12:57 +08:00
committed by GitHub
parent 6d4d3bc81d
commit a3e4e9bf9e
5 changed files with 142 additions and 26 deletions

View File

@@ -198,6 +198,7 @@ class ServerArgs:
disaggregation_bootstrap_port: int = 8998
disaggregation_transfer_backend: str = "mooncake"
disaggregation_ib_device: Optional[str] = None
pdlb_url: Optional[str] = None
def __post_init__(self):
# Expert parallelism
@@ -1254,6 +1255,12 @@ class ServerArgs:
"or multiple comma-separated devices (e.g., --disaggregation-ib-device mlx5_0,mlx5_1). "
"Default is None, which triggers automatic device detection when mooncake backend is enabled.",
)
parser.add_argument(
"--pdlb-url",
type=str,
default=None,
help="The URL of the PD disaggregation load balancer. If set, the prefill/decode server will register with the load balancer.",
)
@classmethod
def from_cli_args(cls, args: argparse.Namespace):