From de1350ea20530e0744b48d0d50415fa2ff5122cd Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Mon, 9 Jun 2025 08:41:35 +0800 Subject: [PATCH] Minor remove one kernel for DeepSeek (#6977) --- python/sglang/srt/models/deepseek_v2.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 2782b3a56..383b3138c 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -421,10 +421,13 @@ class DeepseekV2MoE(nn.Module): topk_weights=topk_weights, forward_mode=forward_mode, ) - final_hidden_states *= self.routed_scaling_factor if shared_output is not None: - final_hidden_states = final_hidden_states + shared_output + x = shared_output + x.add_(final_hidden_states, alpha=self.routed_scaling_factor) + final_hidden_states = x + else: + final_hidden_states *= self.routed_scaling_factor return final_hidden_states