From 6e8d3681ae444bc3f66109265642a3754570c394 Mon Sep 17 00:00:00 2001
From: shaopeng-666 <lishaopeng21@huawei.com>
Date: Tue, 10 Mar 2026 16:57:05 +0800
Subject: [PATCH] [bugdix] The problem that the w4a8 weight fails to be loaded
 when the EP is not enabled is resolved. (#7090)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

### What this PR does / why we need it?
This is a bug fix to resolve the issue where the MOE model fails to load
quantized weights in w4a8 format when EP is not enabled.The parameters
["weight_scale_second", "weight_offset_second", "scale_bias"] shall be
parsed in per-group mode, regardless of other conditions.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?

- vLLM version: v0.16.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/4034c3d32e30d01639459edd3ab486f56993876d

Signed-off-by: 李少鹏 <lishaopeng21@huawei.com>
---
 vllm_ascend/quantization/method_adapters.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm_ascend/quantization/method_adapters.py b/vllm_ascend/quantization/method_adapters.py
index f48255b6..34764a1b 100644
--- a/vllm_ascend/quantization/method_adapters.py
+++ b/vllm_ascend/quantization/method_adapters.py
@@ -220,8 +220,8 @@ class AscendFusedMoEMethod(FusedMoEMethodBase):
             set_weight_attrs(param, extra_weight_attrs)
 
         extra_weight_attrs.update({"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value})
-        per_group_param = (
-            ["weight_scale_second", "weight_offset_second", "scale_bias"] + ["weight_scale", "weight_offset"]
+        per_group_param = ["weight_scale_second", "weight_offset_second", "scale_bias"] + (
+            ["weight_scale", "weight_offset"]
             if hasattr(self.quant_method, "group_size") and self.quant_method.group_size > 0
             else []
         )