use fa3 in sgl-kernel (#4954)
This commit is contained in:
@@ -47,7 +47,7 @@ runtime_common = [
|
||||
|
||||
srt = [
|
||||
"sglang[runtime_common]",
|
||||
"sgl-kernel==0.0.5.post4",
|
||||
"sgl-kernel==0.0.6",
|
||||
"flashinfer_python==0.2.3",
|
||||
"torch==2.5.1",
|
||||
"cuda-python",
|
||||
|
||||
@@ -22,7 +22,7 @@ if TYPE_CHECKING:
|
||||
from sglang.srt.layers.radix_attention import RadixAttention
|
||||
from sglang.srt.model_executor.model_runner import ModelRunner
|
||||
|
||||
from flash_attn_interface import flash_attn_with_kvcache
|
||||
from sgl_kernel.flash_attn import flash_attn_with_kvcache
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
Reference in New Issue
Block a user