[optimize] add two stream norm for qwen3 (#7740)

Co-authored-by: ispobock <ispobaoke@gmail.com>
This commit is contained in:
Yi Zhang
2025-07-04 00:59:17 +08:00
committed by GitHub
parent 646cef2e2e
commit 264dc6e744
4 changed files with 54 additions and 10 deletions

View File

@@ -190,6 +190,7 @@ class Qwen2DecoderLayer(nn.Module):
layer_id: int = 0,
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
alt_stream: Optional[torch.cuda.Stream] = None,
) -> None:
super().__init__()
self.hidden_size = config.hidden_size
@@ -253,6 +254,7 @@ class Qwen2Model(nn.Module):
quant_config: Optional[QuantizationConfig] = None,
prefix: str = "",
decoder_layer_type: type[nn.Module] = Qwen2DecoderLayer,
alt_stream: Optional[torch.cuda.Stream] = None,
) -> None:
super().__init__()
self.config = config
@@ -280,6 +282,7 @@ class Qwen2Model(nn.Module):
config=config,
quant_config=quant_config,
prefix=prefix,
alt_stream=alt_stream,
),
pp_rank=self.pp_group.rank_in_group,
pp_size=self.pp_group.world_size,