Reintroduce memory usage fix (#9535)
This commit is contained in:
@@ -1212,11 +1212,13 @@ class ModelOptNvFp4FusedMoEMethod(FusedMoEMethodBase):
|
|||||||
|
|
||||||
# Process w13 weights
|
# Process w13 weights
|
||||||
w13_blockscale_swizzled = self.swizzle_blockscale(layer.w13_weight_scale)
|
w13_blockscale_swizzled = self.swizzle_blockscale(layer.w13_weight_scale)
|
||||||
|
del layer.w13_weight_scale
|
||||||
layer.w13_blockscale_swizzled.data.copy_(w13_blockscale_swizzled)
|
layer.w13_blockscale_swizzled.data.copy_(w13_blockscale_swizzled)
|
||||||
layer.w13_weight = Parameter(layer.w13_weight.data, requires_grad=False)
|
layer.w13_weight = Parameter(layer.w13_weight.data, requires_grad=False)
|
||||||
|
|
||||||
# Process w2 weights
|
# Process w2 weights
|
||||||
w2_blockscale_swizzled = self.swizzle_blockscale(layer.w2_weight_scale)
|
w2_blockscale_swizzled = self.swizzle_blockscale(layer.w2_weight_scale)
|
||||||
|
del layer.w2_weight_scale
|
||||||
layer.w2_blockscale_swizzled.data.copy_(w2_blockscale_swizzled)
|
layer.w2_blockscale_swizzled.data.copy_(w2_blockscale_swizzled)
|
||||||
layer.w2_weight = Parameter(layer.w2_weight.data, requires_grad=False)
|
layer.w2_weight = Parameter(layer.w2_weight.data, requires_grad=False)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user