From aea98512a8552e49cd4f33a0754437a15f6e4139 Mon Sep 17 00:00:00 2001 From: Qingquan Song Date: Fri, 11 Apr 2025 23:37:52 -0700 Subject: [PATCH] Fix fa3 window size setup (#5316) --- python/sglang/srt/layers/attention/flashattention_backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/layers/attention/flashattention_backend.py b/python/sglang/srt/layers/attention/flashattention_backend.py index 2ab7f6cb5..76496300c 100644 --- a/python/sglang/srt/layers/attention/flashattention_backend.py +++ b/python/sglang/srt/layers/attention/flashattention_backend.py @@ -523,7 +523,7 @@ class FlashAttentionBackend(AttentionBackend): # here is two side inclusive window_size = ( (layer.sliding_window_size, 0) - if layer.sliding_window_size is not None + if layer.sliding_window_size is not None and layer.sliding_window_size > -1 else (-1, -1) ) k_descale, v_descale = None, None @@ -664,7 +664,7 @@ class FlashAttentionBackend(AttentionBackend): # here is two side inclusive window_size = ( (layer.sliding_window_size, 0) - if layer.sliding_window_size is not None + if layer.sliding_window_size is not None and layer.sliding_window_size > -1 else (-1, -1) ) causal = not layer.is_cross_attention