diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 2ef529fca..1ebd403b6 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1930,7 +1930,7 @@ class DeepseekV2Model(nn.Module): self.embed_tokens = VocabParallelEmbedding( config.vocab_size, config.hidden_size, - enable_tp=not global_server_args_dict["enable_dp_attention"], + use_attn_tp_group=True, ) self.alt_stream = torch.cuda.Stream() if _is_cuda else None self.layers = nn.ModuleList(