Add Llama4 support (#5092)

Co-authored-by: Cheng Wan <cwan39@gatech.edu>
Co-authored-by: fzyzcjy <ch271828n@outlook.com>
Co-authored-by: ispobock <ispobaoke@163.com>
This commit is contained in:
Chang Su
2025-04-07 00:29:36 -07:00
committed by GitHub
parent d1bb171180
commit f04c80dc42
27 changed files with 2214 additions and 22 deletions

View File

@@ -35,6 +35,7 @@ class RadixAttention(nn.Module):
sliding_window_size: int = -1,
is_cross_attention: bool = False,
prefix: str = "",
use_irope: bool = False,
):
super().__init__()
self.tp_q_head_num = num_heads
@@ -50,6 +51,7 @@ class RadixAttention(nn.Module):
self.is_cross_attention = is_cross_attention
self.k_scale = None
self.v_scale = None
self.use_irope = use_irope
def forward(
self,