diff --git a/python/sglang/srt/layers/logits_processor.py b/python/sglang/srt/layers/logits_processor.py index c01e10090..79d38193e 100644 --- a/python/sglang/srt/layers/logits_processor.py +++ b/python/sglang/srt/layers/logits_processor.py @@ -436,8 +436,8 @@ class LogitsProcessor(nn.Module): if self.do_tensor_parallel_all_gather_dp_attn: logits_metadata.compute_dp_attention_metadata(hidden_states) hidden_states, local_hidden_states = ( - logits_metadata.gathered_buffer, - hidden_states.clone(), + torch.empty_like(logits_metadata.gathered_buffer), + hidden_states, ) dp_gather_replicate(hidden_states, local_hidden_states, logits_metadata) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 9c618efa5..2ef529fca 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1840,11 +1840,6 @@ class DeepseekV2DecoderLayer(nn.Module): hidden_states, residual, forward_batch ) - if self.enable_dp_attention and self.speculative_algorithm.is_eagle(): - # NOTE: this line resolves the degradation of MTP reception rate for non-zero DP ranks. - # See discussion here (https://github.com/sgl-project/sglang/pull/6081#discussion_r2147452251). - hidden_states = hidden_states.clone() - return hidden_states, residual def op_comm_prepare_attn(