use fa3 in sgl-kernel (#4954)

This commit is contained in:
Yineng Zhang
2025-03-31 16:14:49 -07:00
committed by GitHub
parent ee47a6c1c3
commit 1c63e79756
3 changed files with 3 additions and 3 deletions

View File

@@ -47,7 +47,7 @@ runtime_common = [
srt = [
"sglang[runtime_common]",
"sgl-kernel==0.0.5.post4",
"sgl-kernel==0.0.6",
"flashinfer_python==0.2.3",
"torch==2.5.1",
"cuda-python",

View File

@@ -22,7 +22,7 @@ if TYPE_CHECKING:
from sglang.srt.layers.radix_attention import RadixAttention
from sglang.srt.model_executor.model_runner import ModelRunner
from flash_attn_interface import flash_attn_with_kvcache
from sgl_kernel.flash_attn import flash_attn_with_kvcache
@dataclass