From ec3563334b7f95925131ab758378341d23895863 Mon Sep 17 00:00:00 2001 From: frankie Date: Mon, 5 Jan 2026 19:15:49 +0800 Subject: [PATCH] Add the requirement of arctic-inference which speculative decoding with suffix_decode (#5045) ### Does this PR introduce _any_ user-facing change? suffix spec decode method rely on `arctic-inference` library. This PR add it into requirements to make sure the function works by default ### How was this patch tested? - vLLM version: v0.12.0 - vLLM main: https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9 --------- Signed-off-by: frankie-ys Signed-off-by: frankie --- pyproject.toml | 3 ++- requirements.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index d4dd0cf6..f2e122a2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,8 @@ requires = [ "numba", "fastapi<0.124.0", "opencv-python-headless<=4.11.0.86", # Required to avoid numpy version conflict with vllm - "compressed_tensors>=0.11.0" + "compressed_tensors>=0.11.0", + "arctic-inference==0.1.1" ] build-backend = "setuptools.build_meta" diff --git a/requirements.txt b/requirements.txt index c32d0817..eb6f3715 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,5 +30,6 @@ numba #--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi torch-npu==2.8.0 +arctic-inference==0.1.1 transformers>=4.57.3 fastapi<0.124.0