Fix: sync prepare_fp8_layer_for_marlin with latest vllm changes (#7648)
This commit is contained in:
@@ -76,7 +76,7 @@ class CompressedTensorsW8A16Fp8(CompressedTensorsScheme):
|
|||||||
layer.input_scale = torch.nn.Parameter(
|
layer.input_scale = torch.nn.Parameter(
|
||||||
layer.input_scale.data, requires_grad=False
|
layer.input_scale.data, requires_grad=False
|
||||||
)
|
)
|
||||||
prepare_fp8_layer_for_marlin(layer, strategy="channel")
|
prepare_fp8_layer_for_marlin(layer, size_k_first=True)
|
||||||
|
|
||||||
def create_weights(
|
def create_weights(
|
||||||
self,
|
self,
|
||||||
|
|||||||
Reference in New Issue
Block a user