Revert "Fuse routed scaling factor in topk_reduce kernel (#6220)" (#6968)

2025-06-07 21:02:49 -07:00
parent c2c4f57f63
commit 1fb76ebb93
10 changed files with 9 additions and 331 deletions
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -346,7 +346,7 @@ class DeepseekV2MoE(nn.Module):
        final_hidden_states = self.experts(
            hidden_states=hidden_states, router_logits=router_logits
        )
-
+        final_hidden_states *= self.routed_scaling_factor
        if shared_output is not None:
            final_hidden_states = final_hidden_states + shared_output
        if self.tp_size > 1: