Support dispatching logical to physical experts (#6385)
This commit is contained in:
@@ -170,6 +170,7 @@ class ServerArgs:
|
||||
enable_ep_moe: bool = False
|
||||
enable_deepep_moe: bool = False
|
||||
deepep_mode: Optional[Literal["auto", "normal", "low_latency"]] = "auto"
|
||||
ep_dispatch_algorithm: Optional[Literal["static", "dynamic"]] = None
|
||||
init_expert_location: str = "trivial"
|
||||
expert_distribution_recorder_mode: Optional[
|
||||
Literal["stat", "per_pass", "per_token"]
|
||||
@@ -1271,6 +1272,12 @@ class ServerArgs:
|
||||
default="auto",
|
||||
help="Select the mode when enable DeepEP MoE, could be `normal`, `low_latency` or `auto`. Default is `auto`, which means `low_latency` for decode batch and `normal` for prefill batch.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--ep-dispatch-algorithm",
|
||||
type=str,
|
||||
default=ServerArgs.ep_dispatch_algorithm,
|
||||
help="The algorithm to choose ranks for redundant experts in expert parallel.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--init-expert-location",
|
||||
type=str,
|
||||
|
||||
Reference in New Issue
Block a user