Fix marlin model loading compat with autogptq (#290)

Co-authored-by: LRL <lrl@lbx.dev>
This commit is contained in:
Liurl
2024-03-13 13:15:43 +08:00
committed by GitHub
parent 92e2d74fd0
commit ed31579971

View File

@@ -300,9 +300,15 @@ class ModelRunner:
self.model_config.hf_config, "quantization_config", None
)
if hf_quant_config is not None:
quant_config_class = QUANTIONCONFIG_MAPPING.get(
hf_quant_config["quant_method"]
)
hf_quant_method = hf_quant_config["quant_method"]
# compat: autogptq uses is_marlin_format within quant config
if (hf_quant_method == "gptq"
and "is_marlin_format" in hf_quant_config
and hf_quant_config["is_marlin_format"]):
hf_quant_method = "marlin"
quant_config_class = QUANTIONCONFIG_MAPPING.get(hf_quant_method)
if quant_config_class is None:
raise ValueError(
f"Unsupported quantization method: {hf_quant_config['quant_method']}"