Add the requirement of arctic-inference which speculative decoding with suffix_decode (#5045)

### Does this PR introduce _any_ user-facing change? suffix spec decode method rely on `arctic-inference` library. This PR add it into requirements to make sure the function works by default ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: ad32e3e19c --------- Signed-off-by: frankie-ys <yongshengwang@cmbchina.com> Signed-off-by: frankie <wangyongsheng686@gmail.com>
2026-01-05 19:15:49 +08:00
parent e7b623b363
commit ec3563334b
2 changed files with 3 additions and 1 deletions
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,8 @@ requires = [
    "numba",
    "fastapi<0.124.0",
    "opencv-python-headless<=4.11.0.86", # Required to avoid numpy version conflict with vllm
-    "compressed_tensors>=0.11.0"
+    "compressed_tensors>=0.11.0",
+    "arctic-inference==0.1.1"
 ]
 build-backend = "setuptools.build_meta"

--- a/requirements.txt
+++ b/requirements.txt
@@ -30,5 +30,6 @@ numba
 #--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
 torch-npu==2.8.0

+arctic-inference==0.1.1
 transformers>=4.57.3
 fastapi<0.124.0