fix: continue to use flashinfer 0.1.6 temporarily (#2517)

2024-12-19 14:03:24 +08:00
parent 64456cf023
commit 4b83db24f1
1 changed files with 1 additions and 1 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -23,7 +23,7 @@ runtime_common = ["aiohttp", "decord", "fastapi",
    "psutil", "pydantic", "python-multipart",
    "pyzmq>=25.1.2", "torchao>=0.7.0", "uvicorn", "uvloop",
    "xgrammar>=0.1.6"]
-srt = ["sglang[runtime_common]", "torch", "vllm>=0.6.3.post1,<=0.6.4.post1", "cuda-python", "flashinfer>=0.1.6"]
+srt = ["sglang[runtime_common]", "torch", "vllm>=0.6.3.post1,<=0.6.4.post1", "cuda-python", "flashinfer==0.1.6"]

 # HIP (Heterogeneous-computing Interface for Portability) for AMD
 # => base docker rocm/vllm-dev:20241022, not from public vllm whl