Support layerwise rebalancing experts (#6851)

This commit is contained in:
fzyzcjy
2025-06-05 15:05:52 +08:00
committed by GitHub
parent 72a110f664
commit 0de5e7d40f
6 changed files with 115 additions and 38 deletions

View File

@@ -180,6 +180,7 @@ class ServerArgs:
enable_eplb: bool = False
eplb_algorithm: str = "auto"
eplb_rebalance_num_iterations: int = 1000
eplb_rebalance_layers_per_chunk: Optional[int] = None
expert_distribution_recorder_mode: Optional[
Literal["stat", "per_pass", "per_token"]
] = None
@@ -1367,6 +1368,12 @@ class ServerArgs:
default=ServerArgs.eplb_rebalance_num_iterations,
help="Number of iterations to automatically trigger a EPLB re-balance.",
)
parser.add_argument(
"--eplb-rebalance-layers-per-chunk",
type=int,
default=ServerArgs.eplb_rebalance_layers_per_chunk,
help="Number of layers to rebalance per forward pass.",
)
parser.add_argument(
"--expert-distribution-recorder-mode",
type=str,