Add the requirement of arctic-inference which speculative decoding with suffix_decode (#5045)
### Does this PR introduce _any_ user-facing change?
suffix spec decode method rely on `arctic-inference` library. This PR
add it into requirements to make sure the function works by default
### How was this patch tested?
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: frankie-ys <yongshengwang@cmbchina.com>
Signed-off-by: frankie <wangyongsheng686@gmail.com>
This commit is contained in:
@@ -27,7 +27,8 @@ requires = [
|
|||||||
"numba",
|
"numba",
|
||||||
"fastapi<0.124.0",
|
"fastapi<0.124.0",
|
||||||
"opencv-python-headless<=4.11.0.86", # Required to avoid numpy version conflict with vllm
|
"opencv-python-headless<=4.11.0.86", # Required to avoid numpy version conflict with vllm
|
||||||
"compressed_tensors>=0.11.0"
|
"compressed_tensors>=0.11.0",
|
||||||
|
"arctic-inference==0.1.1"
|
||||||
]
|
]
|
||||||
build-backend = "setuptools.build_meta"
|
build-backend = "setuptools.build_meta"
|
||||||
|
|
||||||
|
|||||||
@@ -30,5 +30,6 @@ numba
|
|||||||
#--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
|
#--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
|
||||||
torch-npu==2.8.0
|
torch-npu==2.8.0
|
||||||
|
|
||||||
|
arctic-inference==0.1.1
|
||||||
transformers>=4.57.3
|
transformers>=4.57.3
|
||||||
fastapi<0.124.0
|
fastapi<0.124.0
|
||||||
|
|||||||
Reference in New Issue
Block a user