From 99795d61e682e4ff2d6ad73cecb1b408c02a3a92 Mon Sep 17 00:00:00 2001 From: Even Zhou Date: Fri, 1 Aug 2025 08:30:16 +0800 Subject: [PATCH] [Bugfix] fix w8a8_int8 load issue (#8308) Co-authored-by: ronnie_zheng --- python/sglang/srt/layers/quantization/w8a8_int8.py | 5 ++++- python/sglang/srt/model_loader/weight_utils.py | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/layers/quantization/w8a8_int8.py b/python/sglang/srt/layers/quantization/w8a8_int8.py index 22e8b108f..826a8c8e8 100644 --- a/python/sglang/srt/layers/quantization/w8a8_int8.py +++ b/python/sglang/srt/layers/quantization/w8a8_int8.py @@ -231,7 +231,10 @@ class W8A8Int8Config(QuantizationConfig): @classmethod def get_config_filenames(cls) -> List[str]: - return [] + filenames = [] + if _is_npu: + filenames.append("quant_model_description.json") + return filenames @classmethod def from_config(cls, config: Dict[str, Any]) -> W8A8Int8Config: diff --git a/python/sglang/srt/model_loader/weight_utils.py b/python/sglang/srt/model_loader/weight_utils.py index b3cf18ec9..33f11b8af 100644 --- a/python/sglang/srt/model_loader/weight_utils.py +++ b/python/sglang/srt/model_loader/weight_utils.py @@ -229,6 +229,8 @@ def get_quant_config( f"Unsupported quantization config" f" found for {model_config.quantization} in {f}." ) + elif model_config.quantization == "w8a8_int8": + config["packed_modules_mapping"] = packed_modules_mapping return quant_cls.from_config(config)