diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index a65337945..e02d30839 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -254,9 +254,8 @@ class MoEGate(nn.Module): and self.weight.shape[0] == 256 and _device_sm >= 90 ): - logits = dsv3_router_gemm(hidden_states, self.weight).to( - hidden_states.dtype - ) + # router gemm output float32 + logits = dsv3_router_gemm(hidden_states, self.weight) else: logits = F.linear(hidden_states, self.weight, None)