diff --git a/vllm_ascend/ops/fused_moe/fused_moe.py b/vllm_ascend/ops/fused_moe/fused_moe.py index 4eee35d0..a70a4cdf 100644 --- a/vllm_ascend/ops/fused_moe/fused_moe.py +++ b/vllm_ascend/ops/fused_moe/fused_moe.py @@ -257,7 +257,8 @@ class AscendFusedMoE(FusedMoE): dtype=torch.int64).npu() eplb_enable = self.dynamic_eplb or (self.expert_map_path is not None) - if eplb_enable and (not isinstance(self.quant_method, + if eplb_enable and (not hasattr(self.quant_method, "quant_method") or + not isinstance(self.quant_method.quant_method, AscendW8A8DynamicFusedMoEMethod)): raise ValueError("Eplb supports only w8a8_dynamic quantization.")