integrate blockwise fp8 kernel (#3529)

This commit is contained in:
yizhang2077
2025-02-13 04:39:33 +08:00
committed by GitHub
parent 4430c0a513
commit 98eecbda54
3 changed files with 124 additions and 23 deletions

View File

@@ -25,7 +25,7 @@ runtime_common = [
]
srt = [
"sglang[runtime_common]", "cuda-python",
"sgl-kernel>=0.0.3.post4", "torch", "vllm>=0.6.4.post1,<=0.7.2",
"sgl-kernel>=0.0.3.post5", "torch", "vllm>=0.6.4.post1,<=0.7.2",
"flashinfer_python>=0.2.0.post2", "outlines>=0.0.44,<=0.1.11"
]