From ed3157997153fdbbe142bf4ef995ecaaae62fc34 Mon Sep 17 00:00:00 2001
From: Liurl <liurl021@gmail.com>
Date: Wed, 13 Mar 2024 13:15:43 +0800
Subject: [PATCH] Fix marlin model loading compat with autogptq (#290)

Co-authored-by: LRL <lrl@lbx.dev>
---
 python/sglang/srt/managers/router/model_runner.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/python/sglang/srt/managers/router/model_runner.py b/python/sglang/srt/managers/router/model_runner.py
index ac98a85f0..6f52db83d 100644
--- a/python/sglang/srt/managers/router/model_runner.py
+++ b/python/sglang/srt/managers/router/model_runner.py
@@ -300,9 +300,15 @@ class ModelRunner:
                     self.model_config.hf_config, "quantization_config", None
                 )
                 if hf_quant_config is not None:
-                    quant_config_class = QUANTIONCONFIG_MAPPING.get(
-                        hf_quant_config["quant_method"]
-                    )
+                    hf_quant_method = hf_quant_config["quant_method"]
+
+                    # compat: autogptq uses is_marlin_format within quant config
+                    if (hf_quant_method == "gptq"
+                            and "is_marlin_format" in hf_quant_config
+                            and hf_quant_config["is_marlin_format"]):
+                        hf_quant_method = "marlin"
+                    quant_config_class = QUANTIONCONFIG_MAPPING.get(hf_quant_method)
+
                     if quant_config_class is None:
                         raise ValueError(
                             f"Unsupported quantization method: {hf_quant_config['quant_method']}"