Fix marlin model loading compat with autogptq (#290)
Co-authored-by: LRL <lrl@lbx.dev>
This commit is contained in:
@@ -300,9 +300,15 @@ class ModelRunner:
|
|||||||
self.model_config.hf_config, "quantization_config", None
|
self.model_config.hf_config, "quantization_config", None
|
||||||
)
|
)
|
||||||
if hf_quant_config is not None:
|
if hf_quant_config is not None:
|
||||||
quant_config_class = QUANTIONCONFIG_MAPPING.get(
|
hf_quant_method = hf_quant_config["quant_method"]
|
||||||
hf_quant_config["quant_method"]
|
|
||||||
)
|
# compat: autogptq uses is_marlin_format within quant config
|
||||||
|
if (hf_quant_method == "gptq"
|
||||||
|
and "is_marlin_format" in hf_quant_config
|
||||||
|
and hf_quant_config["is_marlin_format"]):
|
||||||
|
hf_quant_method = "marlin"
|
||||||
|
quant_config_class = QUANTIONCONFIG_MAPPING.get(hf_quant_method)
|
||||||
|
|
||||||
if quant_config_class is None:
|
if quant_config_class is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
f"Unsupported quantization method: {hf_quant_config['quant_method']}"
|
f"Unsupported quantization method: {hf_quant_config['quant_method']}"
|
||||||
|
|||||||
Reference in New Issue
Block a user