Embedding parallel by attn_tp (#7623)

This commit is contained in:
Gang Chen
2025-07-05 14:21:56 +08:00
committed by GitHub
parent 8e9fb43d82
commit ef8a29c429

View File

@@ -1930,7 +1930,7 @@ class DeepseekV2Model(nn.Module):
self.embed_tokens = VocabParallelEmbedding( self.embed_tokens = VocabParallelEmbedding(
config.vocab_size, config.vocab_size,
config.hidden_size, config.hidden_size,
enable_tp=not global_server_args_dict["enable_dp_attention"], use_attn_tp_group=True,
) )
self.alt_stream = torch.cuda.Stream() if _is_cuda else None self.alt_stream = torch.cuda.Stream() if _is_cuda else None
self.layers = nn.ModuleList( self.layers = nn.ModuleList(