[feature] [sgl-router] Add a dp-aware routing strategy (#6869)

This commit is contained in:
Rui Chen
2025-07-30 20:58:48 +08:00
committed by GitHub
parent 55ecdc0a8e
commit a730ce8162
19 changed files with 726 additions and 16 deletions

View File

@@ -50,6 +50,8 @@ class RouterArgs:
eviction_interval: int = 60
max_tree_size: int = 2**24
max_payload_size: int = 256 * 1024 * 1024 # 256MB default for large batches
dp_aware: bool = False
api_key: Optional[str] = None
log_dir: Optional[str] = None
log_level: Optional[str] = None
# Service discovery configuration
@@ -197,6 +199,17 @@ class RouterArgs:
default=RouterArgs.max_payload_size,
help="Maximum payload size in bytes",
)
parser.add_argument(
f"--{prefix}dp-aware",
action="store_true",
help="Enable data parallelism aware schedule",
)
parser.add_argument(
f"--{prefix}api-key",
type=str,
default=None,
help="The api key used for the authorization with the worker. Useful when the dp aware scheduling strategy is enaled.",
)
parser.add_argument(
f"--{prefix}log-dir",
type=str,
@@ -304,6 +317,8 @@ class RouterArgs:
eviction_interval=getattr(args, f"{prefix}eviction_interval"),
max_tree_size=getattr(args, f"{prefix}max_tree_size"),
max_payload_size=getattr(args, f"{prefix}max_payload_size"),
dp_aware=getattr(args, f"{prefix}dp_aware", False),
api_key=getattr(args, f"{prefix}api_key", None),
log_dir=getattr(args, f"{prefix}log_dir", None),
log_level=getattr(args, f"{prefix}log_level", None),
service_discovery=getattr(args, f"{prefix}service_discovery", False),
@@ -463,6 +478,8 @@ def launch_router(args: argparse.Namespace) -> Optional[Router]:
eviction_interval_secs=router_args.eviction_interval,
max_tree_size=router_args.max_tree_size,
max_payload_size=router_args.max_payload_size,
dp_aware=router_args.dp_aware,
api_key=router_args.api_key,
log_dir=router_args.log_dir,
log_level=router_args.log_level,
service_discovery=router_args.service_discovery,

View File

@@ -31,6 +31,10 @@ class Router:
routing. Default: 60
max_payload_size: Maximum payload size in bytes. Default: 256MB
max_tree_size: Maximum size of the approximation tree for cache-aware routing. Default: 2^24
dp_aware: Enable data parallelism aware schedule. Default: False
api_key: The api key used for the authorization with the worker.
Useful when the dp aware scheduling strategy is enabled.
Default: None
log_dir: Directory to store log files. If None, logs are only output to console. Default: None
log_level: Logging level. Options: 'debug', 'info', 'warning', 'error', 'critical'.
service_discovery: Enable Kubernetes service discovery. When enabled, the router will
@@ -73,6 +77,8 @@ class Router:
eviction_interval_secs: int = 60,
max_tree_size: int = 2**24,
max_payload_size: int = 256 * 1024 * 1024, # 256MB
dp_aware: bool = False,
api_key: Optional[str] = None,
log_dir: Optional[str] = None,
log_level: Optional[str] = None,
service_discovery: bool = False,
@@ -110,6 +116,8 @@ class Router:
eviction_interval_secs=eviction_interval_secs,
max_tree_size=max_tree_size,
max_payload_size=max_payload_size,
dp_aware=dp_aware,
api_key=api_key,
log_dir=log_dir,
log_level=log_level,
service_discovery=service_discovery,