Support layerwise rebalancing experts (#6851)
This commit is contained in:
@@ -180,6 +180,7 @@ class ServerArgs:
|
||||
enable_eplb: bool = False
|
||||
eplb_algorithm: str = "auto"
|
||||
eplb_rebalance_num_iterations: int = 1000
|
||||
eplb_rebalance_layers_per_chunk: Optional[int] = None
|
||||
expert_distribution_recorder_mode: Optional[
|
||||
Literal["stat", "per_pass", "per_token"]
|
||||
] = None
|
||||
@@ -1367,6 +1368,12 @@ class ServerArgs:
|
||||
default=ServerArgs.eplb_rebalance_num_iterations,
|
||||
help="Number of iterations to automatically trigger a EPLB re-balance.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--eplb-rebalance-layers-per-chunk",
|
||||
type=int,
|
||||
default=ServerArgs.eplb_rebalance_layers_per_chunk,
|
||||
help="Number of layers to rebalance per forward pass.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--expert-distribution-recorder-mode",
|
||||
type=str,
|
||||
|
||||
Reference in New Issue
Block a user