[Readme] EPLB Support Scenarios (#4314)

### What this PR does / why we need it? Add information on the scope of EPLB support. ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? - vLLM version: v0.11.0 - vLLM main: 2918c1b49c --------- Signed-off-by: shenchuxiaofugui <1311027364@qq.com>
2025-11-21 14:24:54 +08:00
parent 019c7ded91
commit 4573c855b7
1 changed files with 5 additions and 0 deletions
--- a/vllm_ascend/ops/fused_moe/fused_moe.py
+++ b/vllm_ascend/ops/fused_moe/fused_moe.py
@@ -256,6 +256,11 @@ class AscendFusedMoE(FusedMoE):
            self.moe_load = torch.zeros(local_num_experts,
                                        dtype=torch.int64).npu()

+        eplb_enable = self.dynamic_eplb or (self.expert_map_path is not None)
+        if eplb_enable and (not isinstance(self.quant_method,
+                                           AscendW8A8DynamicFusedMoEMethod)):
+            raise ValueError("Eplb supports only w8a8_dynamic quantization.")
+
        self.moe_config.num_experts = self.global_num_experts
        self.moe_config.num_local_experts = self.local_num_experts
        self.moe_config.original_num_experts = num_experts