From ca63f075b7d8cef11576bc2f8d3f2ebf32228604 Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Mon, 15 Sep 2025 10:03:27 +0800 Subject: [PATCH] Revert "Fix FA4 import cause moe_fused_gate output be illegal memory" (#10432) --- sgl-kernel/python/sgl_kernel/flash_attn.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/sgl-kernel/python/sgl_kernel/flash_attn.py b/sgl-kernel/python/sgl_kernel/flash_attn.py index 270d00d44..33e959703 100644 --- a/sgl-kernel/python/sgl_kernel/flash_attn.py +++ b/sgl-kernel/python/sgl_kernel/flash_attn.py @@ -9,6 +9,11 @@ try: except: raise ImportError("Can not import sgl_kernel. Please check your installation.") +try: + from ._fa4_interface import flash_attn_varlen_func as flash_attn_varlen_func_v4 +except ImportError: + flash_attn_varlen_func_v4 = None + @lru_cache(maxsize=1) def is_fa3_supported(device=None) -> bool: @@ -244,8 +249,9 @@ def flash_attn_varlen_func( ver=3, ): if ver == 4: - from ._fa4_interface import flash_attn_varlen_func as flash_attn_varlen_func_v4 - + assert ( + flash_attn_varlen_func_v4 is not None + ), "FA4 is not available, please check your installation." # Using `(-1, -1)` as no sliding window causes correctness issues for FA4. if window_size == (-1, -1): window_size = (None, None)