Support redundant experts in expert parallel (#6461)

2025-05-21 17:05:53 +08:00
parent a071dc4084
commit ccfe5c009d
5 changed files with 18 additions and 5 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -170,6 +170,7 @@ class ServerArgs:
    enable_ep_moe: bool = False
    enable_deepep_moe: bool = False
    deepep_mode: Optional[Literal["auto", "normal", "low_latency"]] = "auto"
+    ep_num_redundant_experts: int = 0
    ep_dispatch_algorithm: Optional[Literal["static", "dynamic"]] = None
    init_expert_location: str = "trivial"
    expert_distribution_recorder_mode: Optional[
@@ -1273,6 +1274,12 @@ class ServerArgs:
            default="auto",
            help="Select the mode when enable DeepEP MoE, could be `normal`, `low_latency` or `auto`. Default is `auto`, which means `low_latency` for decode batch and `normal` for prefill batch.",
        )
+        parser.add_argument(
+            "--ep-num-redundant-experts",
+            type=int,
+            default=ServerArgs.ep_num_redundant_experts,
+            help="Allocate this number of redundant experts in expert parallel.",
+        )
        parser.add_argument(
            "--ep-dispatch-algorithm",
            type=str,