diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index 7073f6be5..fca0ee38b 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -776,7 +776,7 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase): # GEMM 1 w13_weight = ModelWeightParameter( data=torch.empty( - layer.local_num_experts, + layer.num_local_experts, 2 * intermediate_size_per_partition, # 2 fp4 items are packed in the input dimension hidden_size // 2,