Support redundant experts in expert parallel (#6461)

2025-05-21 17:05:53 +08:00
parent a071dc4084
commit ccfe5c009d
5 changed files with 18 additions and 5 deletions
--- a/python/sglang/srt/managers/expert_location.py
+++ b/python/sglang/srt/managers/expert_location.py
@@ -163,8 +163,7 @@ class ExpertLocationMetadata:

        num_physical_experts = (
            model_config_for_expert_location.num_logical_experts
-            # TODO pr-chain: enable this later
-            # + server_args.ep_num_redundant_experts
+            + server_args.ep_num_redundant_experts
        )
        ep_size = server_args.ep_size
        assert num_physical_experts % ep_size == 0
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -90,6 +90,7 @@ global_server_args_dict = {
    "speculative_accept_threshold_single": ServerArgs.speculative_accept_threshold_single,
    "torchao_config": ServerArgs.torchao_config,
    "triton_attention_reduce_in_fp32": ServerArgs.triton_attention_reduce_in_fp32,
+    "ep_num_redundant_experts": ServerArgs.ep_num_redundant_experts,
 }

 logger = logging.getLogger(__name__)