From 760286e3d378780546b88c6d9e932bc178d39669 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhang <2448370773@qq.com> Date: Mon, 4 Aug 2025 01:43:40 +0800 Subject: [PATCH] use fp32 for e_score_correction_bias in GLM-4.5 (#8729) --- python/sglang/srt/models/glm4_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py index badbb56ca..76f954578 100644 --- a/python/sglang/srt/models/glm4_moe.py +++ b/python/sglang/srt/models/glm4_moe.py @@ -343,7 +343,7 @@ class Glm4MoeGate(nn.Module): torch.empty((config.n_routed_experts, config.hidden_size)) ) self.e_score_correction_bias = nn.Parameter( - torch.empty((config.n_routed_experts)) + torch.empty((config.n_routed_experts), dtype=torch.float32) ) if _is_cpu and _is_cpu_amx_available: self.quant_method = PackWeightMethod(weight_names=["weight"])