diff --git a/vllm_ascend/ops/fused_moe/fused_moe.py b/vllm_ascend/ops/fused_moe/fused_moe.py index f82d341a..4eee35d0 100644 --- a/vllm_ascend/ops/fused_moe/fused_moe.py +++ b/vllm_ascend/ops/fused_moe/fused_moe.py @@ -256,6 +256,11 @@ class AscendFusedMoE(FusedMoE): self.moe_load = torch.zeros(local_num_experts, dtype=torch.int64).npu() + eplb_enable = self.dynamic_eplb or (self.expert_map_path is not None) + if eplb_enable and (not isinstance(self.quant_method, + AscendW8A8DynamicFusedMoEMethod)): + raise ValueError("Eplb supports only w8a8_dynamic quantization.") + self.moe_config.num_experts = self.global_num_experts self.moe_config.num_local_experts = self.local_num_experts self.moe_config.original_num_experts = num_experts