[feature] fia support sliding windows (#5239)

Enable fia to support sliding window function and adapt to the Gemma3 model. - vLLM version: release/v0.13.0 - vLLM main: ad32e3e19c --------- Signed-off-by: nsdie <yeyifan@huawei.com>
2025-12-29 14:56:25 +08:00
parent d8e15dae6c
commit 4da46da9bf
4 changed files with 38 additions and 2 deletions
--- a/vllm_ascend/attention/utils.py
+++ b/vllm_ascend/attention/utils.py
@@ -142,6 +142,8 @@ class AscendCommonAttentionMetadata(CommonAttentionMetadata):

    spec_attn_mask: torch.Tensor = None

+    swa_mask: torch.Tensor = None
+
    attn_state: Any = None

    graph_pad_size: int = -1
@@ -175,6 +177,7 @@ class AscendCommonAttentionMetadata(CommonAttentionMetadata):
            positions=self.positions[:num_actual_tokens],
            attn_mask=self.attn_mask,
            spec_attn_mask=self.spec_attn_mask,
+            swa_mask=self.swa_mask,
            attn_state=self.attn_state,
            graph_pad_size=-1,  # It should be -1 when not run in fullgraph mode.
            num_input_tokens=num_actual_tokens,