Support dynamically rebalancing experts using EPLB (#6469)

This commit is contained in:
fzyzcjy
2025-05-22 14:13:21 +08:00
committed by GitHub
parent 9484eba4ad
commit 7a80f56513
6 changed files with 226 additions and 3 deletions

View File

@@ -173,6 +173,8 @@ class ServerArgs:
ep_num_redundant_experts: int = 0
ep_dispatch_algorithm: Optional[Literal["static", "dynamic"]] = None
init_expert_location: str = "trivial"
enable_eplb: bool = False
eplb_rebalance_num_iterations: int = 1000
expert_distribution_recorder_mode: Optional[
Literal["stat", "per_pass", "per_token"]
] = None
@@ -1293,6 +1295,17 @@ class ServerArgs:
default=ServerArgs.init_expert_location,
help="Initial location of EP experts.",
)
parser.add_argument(
"--enable-eplb",
action="store_true",
help="Enable EPLB algorithm",
)
parser.add_argument(
"--eplb-rebalance-num-iterations",
type=int,
default=ServerArgs.eplb_rebalance_num_iterations,
help="Number of iterations to automatically trigger a EPLB re-balance.",
)
parser.add_argument(
"--expert-distribution-recorder-mode",
type=str,