[Bugfix] fix w8a8_int8 load issue (#8308)
Co-authored-by: ronnie_zheng <zl19940307@163.com>
This commit is contained in:
@@ -231,7 +231,10 @@ class W8A8Int8Config(QuantizationConfig):
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def get_config_filenames(cls) -> List[str]:
|
def get_config_filenames(cls) -> List[str]:
|
||||||
return []
|
filenames = []
|
||||||
|
if _is_npu:
|
||||||
|
filenames.append("quant_model_description.json")
|
||||||
|
return filenames
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def from_config(cls, config: Dict[str, Any]) -> W8A8Int8Config:
|
def from_config(cls, config: Dict[str, Any]) -> W8A8Int8Config:
|
||||||
|
|||||||
@@ -229,6 +229,8 @@ def get_quant_config(
|
|||||||
f"Unsupported quantization config"
|
f"Unsupported quantization config"
|
||||||
f" found for {model_config.quantization} in {f}."
|
f" found for {model_config.quantization} in {f}."
|
||||||
)
|
)
|
||||||
|
elif model_config.quantization == "w8a8_int8":
|
||||||
|
config["packed_modules_mapping"] = packed_modules_mapping
|
||||||
|
|
||||||
return quant_cls.from_config(config)
|
return quant_cls.from_config(config)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user