Support w8a8 int8 quantization config (#2881)

This commit is contained in:
Ke Bao
2025-01-14 17:07:49 +08:00
committed by GitHub
parent b8cd09f27a
commit cc0485bef2
4 changed files with 135 additions and 6 deletions

View File

@@ -223,7 +223,11 @@ class ModelConfig:
"compressed_tensors",
"compressed-tensors",
"experts_int8",
"w8a8_int8",
]
compatible_quantization_methods = {
"w8a8_int8": ["compressed-tensors", "compressed_tensors"]
}
if self.quantization is not None:
self.quantization = self.quantization.lower()
@@ -247,12 +251,17 @@ class ModelConfig:
if self.quantization is None:
self.quantization = quant_method
elif self.quantization != quant_method:
raise ValueError(
"Quantization method specified in the model config "
f"({quant_method}) does not match the quantization "
f"method specified in the `quantization` argument "
f"({self.quantization})."
)
if (
self.quantization not in compatible_quantization_methods
or quant_method
not in compatible_quantization_methods[self.quantization]
):
raise ValueError(
"Quantization method specified in the model config "
f"({quant_method}) does not match the quantization "
f"method specified in the `quantization` argument "
f"({self.quantization})."
)
if self.quantization is not None:
if self.quantization not in supported_quantization: