feat: add fast_decode_plan from flashinfer, flashinfer to 0.4.0rc3 (#10760)

Co-authored-by: Zihao Ye <yezihhhao@gmail.com>
Co-authored-by: Sleepcoo <Sleepcoo@gmail.com>
This commit is contained in:
eigen
2025-10-01 05:56:13 -04:00
committed by GitHub
parent 195a59fe23
commit ac1f2928ae
4 changed files with 49 additions and 191 deletions

View File

@@ -70,7 +70,7 @@ srt = [
"torchaudio==2.8.0",
"torchvision",
"cuda-python",
"flashinfer_python==0.4.0rc1",
"flashinfer_python==0.4.0rc3",
]
blackwell = [
@@ -80,8 +80,8 @@ blackwell = [
"torchaudio==2.8.0",
"torchvision",
"cuda-python",
"flashinfer_python==0.4.0rc1",
"nvidia-cutlass-dsl==4.2.1",
"flashinfer_python==0.4.0rc3",
"nvidia-cutlass-dsl==4.2.0",
]
# HIP (Heterogeneous-computing Interface for Portability) for AMD