From 3ded4b215df396061d588eb632385bb94dc97b13 Mon Sep 17 00:00:00 2001 From: Ke Bao Date: Tue, 18 Mar 2025 02:30:26 +0800 Subject: [PATCH] Revert "feat: update grouped_topk to support softmax and sigmoid" (#4505) --- python/sglang/srt/layers/moe/topk.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/python/sglang/srt/layers/moe/topk.py b/python/sglang/srt/layers/moe/topk.py index 7830ff866..e975819a9 100644 --- a/python/sglang/srt/layers/moe/topk.py +++ b/python/sglang/srt/layers/moe/topk.py @@ -88,6 +88,7 @@ def fused_topk( return topk_weights, topk_ids +# This is used by the Deepseek V2/V3/R1 series models @torch.compile(dynamic=True, backend=get_compiler_backend()) def grouped_topk( hidden_states: torch.Tensor, @@ -96,17 +97,10 @@ def grouped_topk( renormalize: bool, num_expert_group: int = 0, topk_group: int = 0, - scoring_func: str = "softmax", ): assert hidden_states.shape[0] == gating_output.shape[0], "Number of tokens mismatch" - if scoring_func == "softmax": - scores = torch.softmax(gating_output, dim=-1) - elif scoring_func == "sigmoid": - scores = gating_output.sigmoid() - else: - raise ValueError(f"Scoring function '{scoring_func}' is not supported.") - + scores = torch.softmax(gating_output, dim=-1) num_token = scores.shape[0] group_scores = ( scores.view(num_token, num_expert_group, -1).max(dim=-1).values @@ -130,7 +124,6 @@ def grouped_topk( return topk_weights.to(torch.float32), topk_ids.to(torch.int32) -# DeepSeek V2/V3/R1 uses biased_grouped_top @torch.compile(dynamic=True, backend=get_compiler_backend()) def biased_grouped_topk( hidden_states: torch.Tensor, @@ -185,7 +178,7 @@ def select_experts( correction_bias: Optional[torch.Tensor] = None, torch_native: bool = False, ): - # DeepSeek V2/V3/R1 uses biased_grouped_top + # DeekSeekv2 uses grouped_top_k if use_grouped_topk: assert topk_group is not None assert num_expert_group is not None