use fa3 in sgl-kernel (#4954)

2025-03-31 16:14:49 -07:00
parent ee47a6c1c3
commit 1c63e79756
3 changed files with 3 additions and 3 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -47,7 +47,7 @@ runtime_common = [

 srt = [
    "sglang[runtime_common]",
-    "sgl-kernel==0.0.5.post4",
+    "sgl-kernel==0.0.6",
    "flashinfer_python==0.2.3",
    "torch==2.5.1",
    "cuda-python",
--- a/python/sglang/srt/layers/attention/flashattention_backend.py
+++ b/python/sglang/srt/layers/attention/flashattention_backend.py
@@ -22,7 +22,7 @@ if TYPE_CHECKING:
    from sglang.srt.layers.radix_attention import RadixAttention
    from sglang.srt.model_executor.model_runner import ModelRunner

-from flash_attn_interface import flash_attn_with_kvcache
+from sgl_kernel.flash_attn import flash_attn_with_kvcache


@dataclass