From 760286e3d378780546b88c6d9e932bc178d39669 Mon Sep 17 00:00:00 2001
From: Yuxuan Zhang <2448370773@qq.com>
Date: Mon, 4 Aug 2025 01:43:40 +0800
Subject: [PATCH] use fp32 for e_score_correction_bias in GLM-4.5 (#8729)

---
 python/sglang/srt/models/glm4_moe.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/models/glm4_moe.py b/python/sglang/srt/models/glm4_moe.py
index badbb56ca..76f954578 100644
--- a/python/sglang/srt/models/glm4_moe.py
+++ b/python/sglang/srt/models/glm4_moe.py
@@ -343,7 +343,7 @@ class Glm4MoeGate(nn.Module):
             torch.empty((config.n_routed_experts, config.hidden_size))
         )
         self.e_score_correction_bias = nn.Parameter(
-            torch.empty((config.n_routed_experts))
+            torch.empty((config.n_routed_experts), dtype=torch.float32)
         )
         if _is_cpu and _is_cpu_amx_available:
             self.quant_method = PackWeightMethod(weight_names=["weight"])