From 1c63e797560405b9ec5149d93f02d6147383c627 Mon Sep 17 00:00:00 2001 From: Yineng Zhang Date: Mon, 31 Mar 2025 16:14:49 -0700 Subject: [PATCH] use fa3 in sgl-kernel (#4954) --- python/pyproject.toml | 2 +- python/sglang/srt/layers/attention/flashattention_backend.py | 2 +- scripts/ci_install_dependency.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/pyproject.toml b/python/pyproject.toml index babb31d4e..0994f321f 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -47,7 +47,7 @@ runtime_common = [ srt = [ "sglang[runtime_common]", - "sgl-kernel==0.0.5.post4", + "sgl-kernel==0.0.6", "flashinfer_python==0.2.3", "torch==2.5.1", "cuda-python", diff --git a/python/sglang/srt/layers/attention/flashattention_backend.py b/python/sglang/srt/layers/attention/flashattention_backend.py index 93c263f74..f04742c0c 100644 --- a/python/sglang/srt/layers/attention/flashattention_backend.py +++ b/python/sglang/srt/layers/attention/flashattention_backend.py @@ -22,7 +22,7 @@ if TYPE_CHECKING: from sglang.srt.layers.radix_attention import RadixAttention from sglang.srt.model_executor.model_runner import ModelRunner -from flash_attn_interface import flash_attn_with_kvcache +from sgl_kernel.flash_attn import flash_attn_with_kvcache @dataclass diff --git a/scripts/ci_install_dependency.sh b/scripts/ci_install_dependency.sh index 5672e380c..09bf477ec 100755 --- a/scripts/ci_install_dependency.sh +++ b/scripts/ci_install_dependency.sh @@ -15,7 +15,7 @@ pip install -e "python[all]" --find-links https://flashinfer.ai/whl/cu124/torch2 rm -rf /root/.cache/flashinfer # Force reinstall flashinfer and torch_memory_saver pip install flashinfer_python==0.2.3 --find-links ${FLASHINFER_REPO} --force-reinstall --no-deps -pip install sgl-kernel==0.0.5.post4 --force-reinstall +pip install sgl-kernel==0.0.6 --force-reinstall pip install torch_memory_saver pip install transformers==4.50.0 sentence_transformers accelerate==1.4.0 peft pandas datasets timm torchaudio