[NVIDIA] Change to use num_local_experts (#8453)

This commit is contained in:
Kaixi Hou
2025-07-28 10:38:19 -07:00
committed by GitHub
parent ccfe52a057
commit 134fa43e19
2 changed files with 3 additions and 2 deletions

View File

@@ -1268,7 +1268,7 @@ class FlashInferEPMoE(EPMoE):
topk_group=self.topk_group,
intermediate_size=self.w2_weight.shape[2],
local_expert_offset=self.start_expert_id,
local_num_experts=self.num_experts_per_partition,
local_num_experts=self.num_local_experts,
routed_scaling_factor=self.routed_scaling_factor,
tile_tokens_dim=_get_tile_tokens_dim(
hidden_states.shape[0], self.top_k, self.num_experts