[feature] fia support sliding windows (#5239)
Enable fia to support sliding window function and adapt to the Gemma3
model.
- vLLM version: release/v0.13.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: nsdie <yeyifan@huawei.com>
This commit is contained in:
@@ -142,6 +142,8 @@ class AscendCommonAttentionMetadata(CommonAttentionMetadata):
|
||||
|
||||
spec_attn_mask: torch.Tensor = None
|
||||
|
||||
swa_mask: torch.Tensor = None
|
||||
|
||||
attn_state: Any = None
|
||||
|
||||
graph_pad_size: int = -1
|
||||
@@ -175,6 +177,7 @@ class AscendCommonAttentionMetadata(CommonAttentionMetadata):
|
||||
positions=self.positions[:num_actual_tokens],
|
||||
attn_mask=self.attn_mask,
|
||||
spec_attn_mask=self.spec_attn_mask,
|
||||
swa_mask=self.swa_mask,
|
||||
attn_state=self.attn_state,
|
||||
graph_pad_size=-1, # It should be -1 when not run in fullgraph mode.
|
||||
num_input_tokens=num_actual_tokens,
|
||||
|
||||
Reference in New Issue
Block a user