From 388c05d54435939c3481e637093a4e10a0e19b7d Mon Sep 17 00:00:00 2001 From: yhyang201 <47235274+yhyang201@users.noreply.github.com> Date: Fri, 19 Sep 2025 02:44:43 +0800 Subject: [PATCH] Fix bias handling in TritonMoeQuantInfo within quantization/mxfp4.py (#10579) --- python/sglang/srt/layers/quantization/mxfp4.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/layers/quantization/mxfp4.py b/python/sglang/srt/layers/quantization/mxfp4.py index 0d98d00d6..8643a3e36 100644 --- a/python/sglang/srt/layers/quantization/mxfp4.py +++ b/python/sglang/srt/layers/quantization/mxfp4.py @@ -731,8 +731,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): quant_info = TritonMoeQuantInfo( w13_weight=layer.w13_weight, w2_weight=layer.w2_weight, - w13_weight_bias=layer.w13_weight_bias, - w2_weight_bias=layer.w2_weight_bias, + b13=getattr(layer, "w13_weight_bias", None), + b2=getattr(layer, "w2_weight_bias", None), ) return self.runner.run(dispatch_output, quant_info)