Embedding parallel by attn_tp (#7623)
This commit is contained in:
@@ -1930,7 +1930,7 @@ class DeepseekV2Model(nn.Module):
|
|||||||
self.embed_tokens = VocabParallelEmbedding(
|
self.embed_tokens = VocabParallelEmbedding(
|
||||||
config.vocab_size,
|
config.vocab_size,
|
||||||
config.hidden_size,
|
config.hidden_size,
|
||||||
enable_tp=not global_server_args_dict["enable_dp_attention"],
|
use_attn_tp_group=True,
|
||||||
)
|
)
|
||||||
self.alt_stream = torch.cuda.Stream() if _is_cuda else None
|
self.alt_stream = torch.cuda.Stream() if _is_cuda else None
|
||||||
self.layers = nn.ModuleList(
|
self.layers = nn.ModuleList(
|
||||||
|
|||||||
Reference in New Issue
Block a user