Sampling penalties memory interface (#2870)

This commit is contained in:
Xiaoyu Zhang
2025-01-13 23:09:00 +08:00
committed by GitHub
parent c1e097ca66
commit d08c77c434
7 changed files with 251 additions and 41 deletions

View File

@@ -27,7 +27,7 @@ runtime_common = [
]
srt = [
"sglang[runtime_common]", "cuda-python",
"sgl-kernel>=0.0.2.post11", "torch", "vllm>=0.6.3.post1,<=0.6.4.post1",
"sgl-kernel>=0.0.2.post12", "torch", "vllm>=0.6.3.post1,<=0.6.4.post1",
"flashinfer==0.1.6"
]