diff --git a/vllm_ascend/quantization/method_adapters.py b/vllm_ascend/quantization/method_adapters.py index f48255b6..34764a1b 100644 --- a/vllm_ascend/quantization/method_adapters.py +++ b/vllm_ascend/quantization/method_adapters.py @@ -220,8 +220,8 @@ class AscendFusedMoEMethod(FusedMoEMethodBase): set_weight_attrs(param, extra_weight_attrs) extra_weight_attrs.update({"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value}) - per_group_param = ( - ["weight_scale_second", "weight_offset_second", "scale_bias"] + ["weight_scale", "weight_offset"] + per_group_param = ["weight_scale_second", "weight_offset_second", "scale_bias"] + ( + ["weight_scale", "weight_offset"] if hasattr(self.quant_method, "group_size") and self.quant_method.group_size > 0 else [] )