use sgl_per_token_group_quant_fp8 kernel (#3493)

This commit is contained in:
Xiaoyu Zhang
2025-02-12 18:40:42 +08:00
committed by GitHub
parent b96e92e6e6
commit 45e3a7bc41
3 changed files with 43 additions and 2 deletions

View File

@@ -25,7 +25,7 @@ runtime_common = [
]
srt = [
"sglang[runtime_common]", "cuda-python",
"sgl-kernel>=0.0.3.post3", "torch", "vllm>=0.6.4.post1,<=0.7.2",
"sgl-kernel>=0.0.3.post4", "torch", "vllm>=0.6.4.post1,<=0.7.2",
"flashinfer_python>=0.2.0.post2", "outlines>=0.0.44,<=0.1.11"
]