From 6e8d3681ae444bc3f66109265642a3754570c394 Mon Sep 17 00:00:00 2001 From: shaopeng-666 Date: Tue, 10 Mar 2026 16:57:05 +0800 Subject: [PATCH] [bugdix] The problem that the w4a8 weight fails to be loaded when the EP is not enabled is resolved. (#7090) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? This is a bug fix to resolve the issue where the MOE model fails to load quantized weights in w4a8 format when EP is not enabled.The parameters ["weight_scale_second", "weight_offset_second", "scale_bias"] shall be parsed in per-group mode, regardless of other conditions. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.16.0 - vLLM main: https://github.com/vllm-project/vllm/commit/4034c3d32e30d01639459edd3ab486f56993876d Signed-off-by: 李少鹏 --- vllm_ascend/quantization/method_adapters.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/quantization/method_adapters.py b/vllm_ascend/quantization/method_adapters.py index f48255b6..34764a1b 100644 --- a/vllm_ascend/quantization/method_adapters.py +++ b/vllm_ascend/quantization/method_adapters.py @@ -220,8 +220,8 @@ class AscendFusedMoEMethod(FusedMoEMethodBase): set_weight_attrs(param, extra_weight_attrs) extra_weight_attrs.update({"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value}) - per_group_param = ( - ["weight_scale_second", "weight_offset_second", "scale_bias"] + ["weight_scale", "weight_offset"] + per_group_param = ["weight_scale_second", "weight_offset_second", "scale_bias"] + ( + ["weight_scale", "weight_offset"] if hasattr(self.quant_method, "group_size") and self.quant_method.group_size > 0 else [] )