From 4455b26e76aef569a6a5507389f2db7b7c256e2f Mon Sep 17 00:00:00 2001 From: DavidChan Date: Mon, 10 Mar 2025 15:50:34 +0800 Subject: [PATCH] [Bug fixed] fixed the crash when enable the dp-attention on the single card (#3958) --- python/sglang/srt/models/deepseek_v2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 82c73ec94..40f6799a1 100755 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -848,12 +848,12 @@ class DeepseekV2AttentionMLA(nn.Module): def all_gather( input_tensor: torch.Tensor, forward_batch: ForwardBatch, rank, world_size, group ): - if world_size == 1: - return input_tensor - all_lens = forward_batch.global_num_tokens_cpu max_len = max(forward_batch.global_num_tokens_cpu) + if world_size == 1: + return input_tensor, 0, all_lens[0] + padded_tensor = torch.nn.functional.pad( input_tensor, (0, 0, 0, max_len - input_tensor.shape[0]) )