Enable optional FP32 compute for LM Head (#10729)

Thanks to MiniMax Team and Chenyang Zhao's support.
2025-09-29 20:45:17 -07:00
parent 8831c55c3d
commit d17986f8c6
6 changed files with 130 additions and 2 deletions
--- a/python/sglang/srt/managers/schedule_batch.py
+++ b/python/sglang/srt/managers/schedule_batch.py
@@ -90,6 +90,7 @@ GLOBAL_SERVER_ARGS_KEYS = [
    "disable_flashinfer_cutlass_moe_fp4_allgather",
    "disable_radix_cache",
    "enable_dp_lm_head",
+    "enable_fp32_lm_head",
    "flashinfer_mxfp4_moe_precision",
    "enable_flashinfer_allreduce_fusion",
    "moe_dense_tp_size",