DeepEP normal support deepgemm-contiguous (#5626)

Co-authored-by: Yingyi Huang <yingyihuang2000@outlook.com>
Co-authored-by: Cheng Wan <54331508+ch-wan@users.noreply.github.com>
Co-authored-by: Xuting Zhou <xutingz@nvidia.com>
Co-authored-by: ZhengHSI <zhenghsi@qq.com>
This commit is contained in:
lukec
2025-05-08 16:20:32 +08:00
committed by GitHub
parent a05bd83a94
commit acc816d8a2
6 changed files with 568 additions and 59 deletions

View File

@@ -357,6 +357,7 @@ class DeepseekV2MoE(nn.Module):
topk_idx,
topk_weights,
reorder_topk_ids,
num_recv_tokens_per_expert,
seg_indptr,
masked_m,
expected_m,
@@ -368,10 +369,13 @@ class DeepseekV2MoE(nn.Module):
)
final_hidden_states = self.experts(
hidden_states=hidden_states,
topk_idx=topk_idx,
topk_weights=topk_weights,
reorder_topk_ids=reorder_topk_ids,
seg_indptr=seg_indptr,
masked_m=masked_m,
expected_m=expected_m,
num_recv_tokens_per_expert=num_recv_tokens_per_expert,
forward_mode=forward_mode,
)
if self.ep_size > 1: