[feature] [sgl-router] Add a dp-aware routing strategy (#6869)
This commit is contained in:
@@ -50,6 +50,8 @@ class RouterArgs:
|
||||
eviction_interval: int = 60
|
||||
max_tree_size: int = 2**24
|
||||
max_payload_size: int = 256 * 1024 * 1024 # 256MB default for large batches
|
||||
dp_aware: bool = False
|
||||
api_key: Optional[str] = None
|
||||
log_dir: Optional[str] = None
|
||||
log_level: Optional[str] = None
|
||||
# Service discovery configuration
|
||||
@@ -197,6 +199,17 @@ class RouterArgs:
|
||||
default=RouterArgs.max_payload_size,
|
||||
help="Maximum payload size in bytes",
|
||||
)
|
||||
parser.add_argument(
|
||||
f"--{prefix}dp-aware",
|
||||
action="store_true",
|
||||
help="Enable data parallelism aware schedule",
|
||||
)
|
||||
parser.add_argument(
|
||||
f"--{prefix}api-key",
|
||||
type=str,
|
||||
default=None,
|
||||
help="The api key used for the authorization with the worker. Useful when the dp aware scheduling strategy is enaled.",
|
||||
)
|
||||
parser.add_argument(
|
||||
f"--{prefix}log-dir",
|
||||
type=str,
|
||||
@@ -304,6 +317,8 @@ class RouterArgs:
|
||||
eviction_interval=getattr(args, f"{prefix}eviction_interval"),
|
||||
max_tree_size=getattr(args, f"{prefix}max_tree_size"),
|
||||
max_payload_size=getattr(args, f"{prefix}max_payload_size"),
|
||||
dp_aware=getattr(args, f"{prefix}dp_aware", False),
|
||||
api_key=getattr(args, f"{prefix}api_key", None),
|
||||
log_dir=getattr(args, f"{prefix}log_dir", None),
|
||||
log_level=getattr(args, f"{prefix}log_level", None),
|
||||
service_discovery=getattr(args, f"{prefix}service_discovery", False),
|
||||
@@ -463,6 +478,8 @@ def launch_router(args: argparse.Namespace) -> Optional[Router]:
|
||||
eviction_interval_secs=router_args.eviction_interval,
|
||||
max_tree_size=router_args.max_tree_size,
|
||||
max_payload_size=router_args.max_payload_size,
|
||||
dp_aware=router_args.dp_aware,
|
||||
api_key=router_args.api_key,
|
||||
log_dir=router_args.log_dir,
|
||||
log_level=router_args.log_level,
|
||||
service_discovery=router_args.service_discovery,
|
||||
|
||||
@@ -31,6 +31,10 @@ class Router:
|
||||
routing. Default: 60
|
||||
max_payload_size: Maximum payload size in bytes. Default: 256MB
|
||||
max_tree_size: Maximum size of the approximation tree for cache-aware routing. Default: 2^24
|
||||
dp_aware: Enable data parallelism aware schedule. Default: False
|
||||
api_key: The api key used for the authorization with the worker.
|
||||
Useful when the dp aware scheduling strategy is enabled.
|
||||
Default: None
|
||||
log_dir: Directory to store log files. If None, logs are only output to console. Default: None
|
||||
log_level: Logging level. Options: 'debug', 'info', 'warning', 'error', 'critical'.
|
||||
service_discovery: Enable Kubernetes service discovery. When enabled, the router will
|
||||
@@ -73,6 +77,8 @@ class Router:
|
||||
eviction_interval_secs: int = 60,
|
||||
max_tree_size: int = 2**24,
|
||||
max_payload_size: int = 256 * 1024 * 1024, # 256MB
|
||||
dp_aware: bool = False,
|
||||
api_key: Optional[str] = None,
|
||||
log_dir: Optional[str] = None,
|
||||
log_level: Optional[str] = None,
|
||||
service_discovery: bool = False,
|
||||
@@ -110,6 +116,8 @@ class Router:
|
||||
eviction_interval_secs=eviction_interval_secs,
|
||||
max_tree_size=max_tree_size,
|
||||
max_payload_size=max_payload_size,
|
||||
dp_aware=dp_aware,
|
||||
api_key=api_key,
|
||||
log_dir=log_dir,
|
||||
log_level=log_level,
|
||||
service_discovery=service_discovery,
|
||||
|
||||
Reference in New Issue
Block a user