[Fix] DeepEP Compatibility with Low Latency (#5068)

Co-authored-by: ch-wan <cwan39@gatech.edu>
This commit is contained in:
Jinyan Chen
2025-04-09 11:31:31 +08:00
committed by GitHub
parent aac531c53b
commit bc3f6db2dd
4 changed files with 146 additions and 118 deletions

View File

@@ -324,6 +324,7 @@ class DeepseekV2MoE(nn.Module):
correction_bias=self.correction_bias,
)
if self.ep_size > 1:
# TODO(ch-wan): allow users to set num_max_dispatch_tokens_per_rank value
(
hidden_states,
topk_idx,
@@ -336,7 +337,6 @@ class DeepseekV2MoE(nn.Module):
hidden_states,
topk_idx,
topk_weights,
self.num_experts,
forward_mode=forward_mode,
)
final_hidden_states = (