[Fix] DeepEP Compatibility with Low Latency (#5068)
Co-authored-by: ch-wan <cwan39@gatech.edu>
This commit is contained in:
@@ -324,6 +324,7 @@ class DeepseekV2MoE(nn.Module):
|
||||
correction_bias=self.correction_bias,
|
||||
)
|
||||
if self.ep_size > 1:
|
||||
# TODO(ch-wan): allow users to set num_max_dispatch_tokens_per_rank value
|
||||
(
|
||||
hidden_states,
|
||||
topk_idx,
|
||||
@@ -336,7 +337,6 @@ class DeepseekV2MoE(nn.Module):
|
||||
hidden_states,
|
||||
topk_idx,
|
||||
topk_weights,
|
||||
self.num_experts,
|
||||
forward_mode=forward_mode,
|
||||
)
|
||||
final_hidden_states = (
|
||||
|
||||
Reference in New Issue
Block a user