From 98457c0453d7b85171d32d742d1581a2491f7835 Mon Sep 17 00:00:00 2001 From: Huaixin Chang <61184708+changhuaixin@users.noreply.github.com> Date: Thu, 14 Aug 2025 12:04:41 +0800 Subject: [PATCH] [Bugfix] Avoid unnecessary reduce-scatter call in prepare_mlp (#9169) --- python/sglang/srt/layers/dp_attention.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/sglang/srt/layers/dp_attention.py b/python/sglang/srt/layers/dp_attention.py index 79397cce5..21d44561d 100644 --- a/python/sglang/srt/layers/dp_attention.py +++ b/python/sglang/srt/layers/dp_attention.py @@ -292,6 +292,10 @@ def _dp_gather_via_all_gather( forward_batch: ForwardBatch, is_partial: bool, ): + if get_attention_tp_size() == 1: + get_tp_group().all_gather_into_tensor(global_tokens, local_tokens) + return + if not is_partial: if get_attention_tp_rank() != 0: local_tokens.fill_(0)