feature(eplb): add min-rebalancing-utilization-threshold for eplb (#8345)
Co-authored-by: yizhang2077 <1109276519@qq.com>
This commit is contained in:
@@ -274,6 +274,7 @@ class ServerArgs:
|
||||
eplb_algorithm: str = "auto"
|
||||
eplb_rebalance_num_iterations: int = 1000
|
||||
eplb_rebalance_layers_per_chunk: Optional[int] = None
|
||||
eplb_min_rebalancing_utilization_threshold: float = 1.0
|
||||
expert_distribution_recorder_mode: Optional[
|
||||
Literal["stat", "stat_approx", "per_pass", "per_token"]
|
||||
] = None
|
||||
@@ -1595,6 +1596,12 @@ class ServerArgs:
|
||||
default=ServerArgs.eplb_rebalance_layers_per_chunk,
|
||||
help="Number of layers to rebalance per forward pass.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--eplb-min-rebalancing-utilization-threshold",
|
||||
type=float,
|
||||
default=ServerArgs.eplb_min_rebalancing_utilization_threshold,
|
||||
help="Minimum threshold for GPU average utilization to trigger EPLB rebalancing. Must be in the range [0.0, 1.0].",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--expert-distribution-recorder-mode",
|
||||
type=str,
|
||||
|
||||
Reference in New Issue
Block a user