diff --git a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py index 8949e3334..c3043f389 100644 --- a/python/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py +++ b/python/sglang/srt/layers/quantization/deep_gemm_wrapper/compile_utils.py @@ -148,7 +148,7 @@ def _compile_grouped_gemm_nt_f8f8bf16_masked_one( "NUM_MATH_THREADS_PER_GROUP": num_math_threads_per_group, "N": n, "K": k, - "NUM_GROUPS": 1, + "NUM_GROUPS": num_groups, "BLOCK_M": block_m, "BLOCK_N": block_n, "BLOCK_K": block_k,