diff --git a/python/sglang/srt/models/deepseek_v2.py b/python/sglang/srt/models/deepseek_v2.py index 1784ee132..18c408bc7 100644 --- a/python/sglang/srt/models/deepseek_v2.py +++ b/python/sglang/srt/models/deepseek_v2.py @@ -1938,7 +1938,7 @@ class DeepseekV2Model(nn.Module): self.embed_tokens = VocabParallelEmbedding( config.vocab_size, config.hidden_size, - use_attn_tp_group=True, + enable_tp=not global_server_args_dict["enable_dp_attention"], ) self.alt_stream = torch.cuda.Stream() if _is_cuda else None self.layers = nn.ModuleList(