From 6936be32210fdf16b0159b2de3f1b8a27e5a679d Mon Sep 17 00:00:00 2001 From: Ke Bao Date: Mon, 21 Jul 2025 15:37:00 +0800 Subject: [PATCH] Remve router gemm output dtype conversion (#8204) --- python/sglang/srt/models/deepseek_v2.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index a65337945..e02d30839 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -254,9 +254,8 @@ class MoEGate(nn.Module): and self.weight.shape[0] == 256 and _device_sm >= 90 ): - logits = dsv3_router_gemm(hidden_states, self.weight).to( - hidden_states.dtype - ) + # router gemm output float32 + logits = dsv3_router_gemm(hidden_states, self.weight) else: logits = F.linear(hidden_states, self.weight, None)