From ef8a29c429f62e0b0d013cc5091264ac75f70f56 Mon Sep 17 00:00:00 2001 From: Gang Chen <13298548+MoonBall@users.noreply.github.com> Date: Sat, 5 Jul 2025 14:21:56 +0800 Subject: [PATCH] Embedding parallel by attn_tp (#7623) --- python/sglang/srt/models/deepseek_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 2ef529fca..1ebd403b6 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1930,7 +1930,7 @@ class DeepseekV2Model(nn.Module): self.embed_tokens = VocabParallelEmbedding( config.vocab_size, config.hidden_size, - enable_tp=not global_server_args_dict["enable_dp_attention"], + use_attn_tp_group=True, ) self.alt_stream = torch.cuda.Stream() if _is_cuda else None self.layers = nn.ModuleList(