From ed3157997153fdbbe142bf4ef995ecaaae62fc34 Mon Sep 17 00:00:00 2001 From: Liurl Date: Wed, 13 Mar 2024 13:15:43 +0800 Subject: [PATCH] Fix marlin model loading compat with autogptq (#290) Co-authored-by: LRL --- python/sglang/srt/managers/router/model_runner.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/managers/router/model_runner.py b/python/sglang/srt/managers/router/model_runner.py index ac98a85f0..6f52db83d 100644 --- a/python/sglang/srt/managers/router/model_runner.py +++ b/python/sglang/srt/managers/router/model_runner.py @@ -300,9 +300,15 @@ class ModelRunner: self.model_config.hf_config, "quantization_config", None ) if hf_quant_config is not None: - quant_config_class = QUANTIONCONFIG_MAPPING.get( - hf_quant_config["quant_method"] - ) + hf_quant_method = hf_quant_config["quant_method"] + + # compat: autogptq uses is_marlin_format within quant config + if (hf_quant_method == "gptq" + and "is_marlin_format" in hf_quant_config + and hf_quant_config["is_marlin_format"]): + hf_quant_method = "marlin" + quant_config_class = QUANTIONCONFIG_MAPPING.get(hf_quant_method) + if quant_config_class is None: raise ValueError( f"Unsupported quantization method: {hf_quant_config['quant_method']}"