[router] allow one router to support different model families and serving mode (#10244)
This commit is contained in:
@@ -46,6 +46,9 @@ class Router:
|
||||
max_payload_size: Maximum payload size in bytes. Default: 256MB
|
||||
max_tree_size: Maximum size of the approximation tree for cache-aware routing. Default: 2^24
|
||||
dp_aware: Enable data parallelism aware schedule. Default: False
|
||||
enable_igw: Enable IGW (Inference-Gateway) mode for multi-model support. When enabled,
|
||||
the router can manage multiple models simultaneously with per-model load balancing
|
||||
policies. Default: False
|
||||
api_key: The api key used for the authorization with the worker.
|
||||
Useful when the dp aware scheduling strategy is enabled.
|
||||
Default: None
|
||||
|
||||
@@ -34,6 +34,7 @@ class RouterArgs:
|
||||
max_tree_size: int = 2**26
|
||||
max_payload_size: int = 512 * 1024 * 1024 # 512MB default for large batches
|
||||
dp_aware: bool = False
|
||||
enable_igw: bool = False # Enable IGW (Inter-Gateway) mode for multi-model support
|
||||
api_key: Optional[str] = None
|
||||
log_dir: Optional[str] = None
|
||||
log_level: Optional[str] = None
|
||||
@@ -227,6 +228,11 @@ class RouterArgs:
|
||||
action="store_true",
|
||||
help="Enable data parallelism aware schedule",
|
||||
)
|
||||
parser.add_argument(
|
||||
f"--{prefix}enable-igw",
|
||||
action="store_true",
|
||||
help="Enable IGW (Inference-Gateway) mode for multi-model support",
|
||||
)
|
||||
parser.add_argument(
|
||||
f"--{prefix}api-key",
|
||||
type=str,
|
||||
|
||||
Reference in New Issue
Block a user