diff --git a/python/sglang/srt/layers/quantization/modelopt_quant.py b/python/sglang/srt/layers/quantization/modelopt_quant.py index 6d3b76950..9d7307c16 100755 --- a/python/sglang/srt/layers/quantization/modelopt_quant.py +++ b/python/sglang/srt/layers/quantization/modelopt_quant.py @@ -1212,11 +1212,13 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase): # Process w13 weights w13_blockscale_swizzled = self.swizzle_blockscale(layer.w13_weight_scale) + del layer.w13_weight_scale layer.w13_blockscale_swizzled.data.copy_(w13_blockscale_swizzled) layer.w13_weight = Parameter(layer.w13_weight.data, requires_grad=False) # Process w2 weights w2_blockscale_swizzled = self.swizzle_blockscale(layer.w2_weight_scale) + del layer.w2_weight_scale layer.w2_blockscale_swizzled.data.copy_(w2_blockscale_swizzled) layer.w2_weight = Parameter(layer.w2_weight.data, requires_grad=False)