use fp32 for e_score_correction_bias in GLM-4.5 (#8729)

2025-08-04 01:43:40 +08:00
parent 3435a24e81
commit 760286e3d3
1 changed files with 1 additions and 1 deletions
--- a/python/sglang/srt/models/glm4_moe.py
+++ b/python/sglang/srt/models/glm4_moe.py
@@ -343,7 +343,7 @@ class Glm4MoeGate(nn.Module):
            torch.empty((config.n_routed_experts, config.hidden_size))
        )
        self.e_score_correction_bias = nn.Parameter(
-            torch.empty((config.n_routed_experts))
+            torch.empty((config.n_routed_experts), dtype=torch.float32)
        )
        if _is_cpu and _is_cpu_amx_available:
            self.quant_method = PackWeightMethod(weight_names=["weight"])