adapt custom allreduce for tensorrt llm (#2511)

This commit is contained in:
yizhang2077
2025-01-16 04:57:35 +08:00
committed by GitHub
parent a53454c55e
commit 767c9dec03
5 changed files with 242 additions and 68 deletions

View File

@@ -27,7 +27,7 @@ runtime_common = [
]
srt = [
"sglang[runtime_common]", "cuda-python",
"sgl-kernel>=0.0.2.post12", "torch", "vllm>=0.6.3.post1,<=0.6.4.post1",
"sgl-kernel>=0.0.2.post14", "torch", "vllm>=0.6.3.post1,<=0.6.4.post1",
"flashinfer==0.1.6"
]