From 3a77c80b26db200105eb0a46edf67c06711bcb5c Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Fri, 12 Sep 2025 18:21:26 +0800 Subject: [PATCH] Fix FA4 import cause moe_fused_gate output be illegal memory (#10368) --- sgl-kernel/python/sgl_kernel/flash_attn.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/sgl-kernel/python/sgl_kernel/flash_attn.py b/sgl-kernel/python/sgl_kernel/flash_attn.py index 33e959703..270d00d44 100644 --- a/sgl-kernel/python/sgl_kernel/flash_attn.py +++ b/sgl-kernel/python/sgl_kernel/flash_attn.py @@ -9,11 +9,6 @@ try: except: raise ImportError("Can not import sgl_kernel. Please check your installation.") -try: - from ._fa4_interface import flash_attn_varlen_func as flash_attn_varlen_func_v4 -except ImportError: - flash_attn_varlen_func_v4 = None - @lru_cache(maxsize=1) def is_fa3_supported(device=None) -> bool: @@ -249,9 +244,8 @@ def flash_attn_varlen_func( ver=3, ): if ver == 4: - assert ( - flash_attn_varlen_func_v4 is not None - ), "FA4 is not available, please check your installation." + from ._fa4_interface import flash_attn_varlen_func as flash_attn_varlen_func_v4 + # Using `(-1, -1)` as no sliding window causes correctness issues for FA4. if window_size == (-1, -1): window_size = (None, None)