diff --git a/vllm_ascend/eplb/core/eplb_utils.py b/vllm_ascend/eplb/core/eplb_utils.py index 4920de30..aa681249 100644 --- a/vllm_ascend/eplb/core/eplb_utils.py +++ b/vllm_ascend/eplb/core/eplb_utils.py @@ -104,7 +104,6 @@ def generate_log2phy_map(global_expert_map, ep_rank): for rankid, map_per_rank in enumerate(global_expert_map): for idx, val in enumerate(map_per_rank): val = val.item() - # 计算value:当前值 + i * 有效元素个数 if val != -1: log2phy_map[idx].append(val + rankid * valid_count) diff --git a/vllm_ascend/ops/fused_moe/fused_moe.py b/vllm_ascend/ops/fused_moe/fused_moe.py index daaca8b9..23f327d3 100644 --- a/vllm_ascend/ops/fused_moe/fused_moe.py +++ b/vllm_ascend/ops/fused_moe/fused_moe.py @@ -180,7 +180,7 @@ class AscendFusedMoE(FusedMoE): or ascend_config.expert_map_record_path) and ( self.log2phy is not None) self.local_num_experts = (torch.sum( - self._expert_map != -1) if self._expert_map is not None else + self._expert_map != -1).item() if self._expert_map is not None else self.global_num_experts) if self._expert_map is not None: logger.info_once( diff --git a/vllm_ascend/ops/fused_moe/token_dispatcher.py b/vllm_ascend/ops/fused_moe/token_dispatcher.py index aeb751d0..e17b033e 100644 --- a/vllm_ascend/ops/fused_moe/token_dispatcher.py +++ b/vllm_ascend/ops/fused_moe/token_dispatcher.py @@ -335,7 +335,9 @@ class TokenDispatcherWithAllGather(MoETokenDispatcher): super().__init__(**kwargs) self.apply_router_weight_on_input = False self.max_num_tokens = kwargs.get("max_num_tokens") - self.num_experts_local = kwargs.get("num_local_experts", 0) + num_experts_local = kwargs.get("num_local_experts", 0) + self.num_experts_local = num_experts_local.item() if torch.is_tensor( + num_experts_local) else int(num_experts_local) self.original_shape = None self.with_quant = False