[EPLB][bugfix] Bugfix for fused mc2 (#6794)

### What this PR does / why we need it? This pull request addresses a bug related to the fused mc2 functionality within the EPLB (Expert Parallelism Load Balancing) system, specifically impacting quantization and MoE communication. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.15.0 - vLLM main: 83b47f67b1 Signed-off-by: Spicy-Stick <873805887@qq.com> Signed-off-by: root <root@localhost.localdomain>
2026-03-09 11:26:57 +08:00
parent 06ec136f08
commit 23bf5d4d48
5 changed files with 50 additions and 28 deletions
--- a/vllm_ascend/eplb/adaptor/vllm_adaptor.py
+++ b/vllm_ascend/eplb/adaptor/vllm_adaptor.py
@@ -22,6 +22,9 @@ import torch
 import torch.distributed as dist
 from vllm.logger import logger

+import vllm_ascend.envs as envs_ascend
+from vllm_ascend.quantization.methods.base import QuantType
+

 class VllmEplbAdaptor:
    def __init__(self, model, **args):
@@ -59,12 +62,19 @@ class VllmEplbAdaptor:
    def init_expert_param_per_layer(self):
        self.param_dict = dict()
        if self.model.quant_config is not None:
-            self.expert_weight_names = [
-                "w13_weight_list",
-                "w2_weight_list",
-                "w13_weight_scale_fp32_list",
-                "w2_weight_scale_list",
-            ]
+            quant_type = self.model.model.layers[self.num_dense_layers].mlp.experts.quant_type
+            if quant_type == QuantType.W8A8:
+                self.expert_weight_names = [
+                    "w13_weight_list",
+                    "w2_weight_list",
+                    "w13_weight_scale_fp32_list",
+                    "w2_weight_scale_list",
+                ]
+                if envs_ascend.VLLM_ASCEND_ENABLE_FUSED_MC2 == 1:
+                    self.expert_weight_names.append("fused_w1_scale_list")
+                    self.expert_weight_names.append("fused_w2_scale_list")
+            else:
+                raise ValueError(f"EPLB not support {quant_type}")
        else:
            self.expert_weight_names = ["w13_weight", "w2_weight"]