[quantization] Properly ignore quantization for layers excluded in quant_config (#11205)
This commit is contained in:
@@ -207,15 +207,11 @@ class FusedMoE(torch.nn.Module):
|
|||||||
gemm1_clamp_limit=gemm1_clamp_limit,
|
gemm1_clamp_limit=gemm1_clamp_limit,
|
||||||
)
|
)
|
||||||
|
|
||||||
if quant_config is None:
|
self.quant_method: Optional[FusedMoEMethodBase] = None
|
||||||
self.quant_method: FusedMoEMethodBase = UnquantizedFusedMoEMethod(
|
if quant_config is not None:
|
||||||
self.use_triton_kernels
|
self.quant_method = quant_config.get_quant_method(self, prefix)
|
||||||
)
|
if self.quant_method is None:
|
||||||
else:
|
self.quant_method = UnquantizedFusedMoEMethod(self.use_triton_kernels)
|
||||||
self.quant_method: FusedMoEMethodBase = quant_config.get_quant_method(
|
|
||||||
self, prefix
|
|
||||||
)
|
|
||||||
assert self.quant_method is not None
|
|
||||||
|
|
||||||
self.quant_method.create_weights(
|
self.quant_method.create_weights(
|
||||||
layer=self,
|
layer=self,
|
||||||
|
|||||||
@@ -65,7 +65,9 @@ class QuarkConfig(QuantizationConfig):
|
|||||||
if should_ignore_layer(
|
if should_ignore_layer(
|
||||||
prefix, ignore=exclude_layers, fused_mapping=self.packed_modules_mapping
|
prefix, ignore=exclude_layers, fused_mapping=self.packed_modules_mapping
|
||||||
):
|
):
|
||||||
|
if isinstance(layer, LinearBase):
|
||||||
return UnquantizedLinearMethod()
|
return UnquantizedLinearMethod()
|
||||||
|
return None
|
||||||
|
|
||||||
if isinstance(layer, LinearBase):
|
if isinstance(layer, LinearBase):
|
||||||
scheme = self.get_scheme(layer=layer, layer_name=prefix)
|
scheme = self.get_scheme(layer=layer, layer_name=prefix)
|
||||||
|
|||||||
Reference in New Issue
Block a user