Set num_fused_shared_experts as num_shared_experts when shared_experts fusion is not disabled (#6736)

2025-06-04 15:53:22 -07:00
parent f0f84975f4
commit 81964328b7
22 changed files with 381 additions and 45 deletions
--- a/python/sglang/srt/layers/moe/topk.py
+++ b/python/sglang/srt/layers/moe/topk.py
@@ -303,6 +303,7 @@ def select_experts(
    renormalize: bool,
    topk_group: Optional[int] = None,
    num_expert_group: Optional[int] = None,
+    num_fused_shared_experts: int = 0,
    custom_routing_function: Optional[Callable] = None,
    correction_bias: Optional[torch.Tensor] = None,
    torch_native: bool = False,
@@ -310,7 +311,6 @@ def select_experts(
    num_token_non_padded: Optional[torch.Tensor] = None,
    expert_location_dispatch_info: Optional[ExpertLocationDispatchInfo] = None,
 ):
-    num_fused_shared_experts = global_server_args_dict["num_fused_shared_experts"]

    router_logits, correction_bias = (
        expert_location_dispatch.transform_select_experts_inputs(