Support dispatch low latency (#10263)

Co-authored-by: Kaixi Hou <4001424+kaixih@users.noreply.github.com>
2025-10-02 18:02:19 +08:00
parent 6a29003410
commit 0b9dfba787
5 changed files with 80 additions and 29 deletions
--- a/python/sglang/srt/models/deepseek_v2.py
+++ b/python/sglang/srt/models/deepseek_v2.py
@@ -896,6 +896,7 @@ class DeepseekV2MoE(nn.Module):
        if self.ep_size > 1:
            self.experts.deepep_dispatcher.dispatch_a(
                hidden_states=state.hidden_states_mlp_input,
+                input_global_scale=None,
                topk_idx=state.pop("topk_idx_local"),
                topk_weights=state.pop("topk_weights_local"),
                forward_batch=state.forward_batch,