diff --git a/python/sglang/srt/layers/quantization/fp8.py b/python/sglang/srt/layers/quantization/fp8.py index b020e4188..d14e9b18e 100644 --- a/python/sglang/srt/layers/quantization/fp8.py +++ b/python/sglang/srt/layers/quantization/fp8.py @@ -358,8 +358,8 @@ class Fp8LinearMethod(LinearMethodBase): return else: weight, weight_scale = layer.weight.data, layer.weight_scale_inv.data - layer.weight = Parameter(weight, requires_grad=False) - layer.weight_scale_inv = Parameter(weight_scale, requires_grad=False) + layer.weight.data = weight.data + layer.weight_scale_inv.data = weight_scale.data else: layer.weight = Parameter(layer.weight.data, requires_grad=False)