From ebda73dc723ce822ede7b3e04dcb2f9f84216cc3 Mon Sep 17 00:00:00 2001 From: Qiaolin Yu Date: Sat, 18 Oct 2025 14:10:15 -0700 Subject: [PATCH] Use cutlass fp4 gemm by default (#11813) --- python/sglang/srt/layers/quantization/modelopt_quant.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index 05bdef739..9ae270caf 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -79,7 +79,7 @@ CUTEDSL_MOE_SCALAR_INPUT_SCALE = get_bool_env_var( "SGLANG_CUTEDSL_MOE_SCALAR_INPUT_SCALE", "true" ) USE_CUTLASS_BACKEND_FOR_FP4_GEMM = get_bool_env_var( - "SGLANG_USE_CUTLASS_BACKEND_FOR_FP4_GEMM" + "SGLANG_USE_CUTLASS_BACKEND_FOR_FP4_GEMM", "true" ) # TODO make it true by default when the DeepEP PR is merged CUTEDSL_MOE_NVFP4_DISPATCH = get_bool_env_var(