From 065ce815740a293f25ea3a65398ef644a9c60e1b Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Tue, 14 Oct 2025 05:48:22 +0800 Subject: [PATCH] Tiny cleanup fp4 gemm calls (#11537) --- .../srt/layers/quantization/modelopt_quant.py | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index 31544f563..d5c1db3a8 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -852,25 +852,15 @@ class ModelOptFp4LinearMethod(LinearMethodBase): if enable_flashinfer_fp4_gemm: w = layer.weight.T w_scale_interleaved = layer.weight_scale_interleaved.T - if USE_CUTLASS_BACKEND_FOR_FP4_GEMM: - out = fp4_gemm( - x_fp4, - w, - x_scale_interleaved, - w_scale_interleaved, - layer.alpha, - output_dtype, - backend="cutlass", - ) - else: - out = fp4_gemm( - x_fp4, - w, - x_scale_interleaved, - w_scale_interleaved, - layer.alpha, - output_dtype, - ) + out = fp4_gemm( + x_fp4, + w, + x_scale_interleaved, + w_scale_interleaved, + layer.alpha, + output_dtype, + **(dict(backend="cutlass") if USE_CUTLASS_BACKEND_FOR_FP4_GEMM else dict()), + ) if bias is not None: out = out + bias return out.view(*output_shape)