Support dispatch low latency (#10263)
Co-authored-by: Kaixi Hou <4001424+kaixih@users.noreply.github.com>
This commit is contained in:
@@ -896,6 +896,7 @@ class DeepseekV2MoE(nn.Module):
|
||||
if self.ep_size > 1:
|
||||
self.experts.deepep_dispatcher.dispatch_a(
|
||||
hidden_states=state.hidden_states_mlp_input,
|
||||
input_global_scale=None,
|
||||
topk_idx=state.pop("topk_idx_local"),
|
||||
topk_weights=state.pop("topk_weights_local"),
|
||||
forward_batch=state.forward_batch,
|
||||
|
||||
Reference in New Issue
Block a user