Support dispatching logical to physical experts (#6385)

2025-05-20 13:13:55 +08:00
parent 69af3ec35f
commit e98afbe042
9 changed files with 184 additions and 5 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -170,6 +170,7 @@ class ServerArgs:
    enable_ep_moe: bool = False
    enable_deepep_moe: bool = False
    deepep_mode: Optional[Literal["auto", "normal", "low_latency"]] = "auto"
+    ep_dispatch_algorithm: Optional[Literal["static", "dynamic"]] = None
    init_expert_location: str = "trivial"
    expert_distribution_recorder_mode: Optional[
        Literal["stat", "per_pass", "per_token"]
@@ -1271,6 +1272,12 @@ class ServerArgs:
            default="auto",
            help="Select the mode when enable DeepEP MoE, could be `normal`, `low_latency` or `auto`. Default is `auto`, which means `low_latency` for decode batch and `normal` for prefill batch.",
        )
+        parser.add_argument(
+            "--ep-dispatch-algorithm",
+            type=str,
+            default=ServerArgs.ep_dispatch_algorithm,
+            help="The algorithm to choose ranks for redundant experts in expert parallel.",
+        )
        parser.add_argument(
            "--init-expert-location",
            type=str,