From ec3563334b7f95925131ab758378341d23895863 Mon Sep 17 00:00:00 2001
From: frankie <wangyongsheng686@gmail.com>
Date: Mon, 5 Jan 2026 19:15:49 +0800
Subject: [PATCH] Add the requirement of arctic-inference which  speculative
 decoding with suffix_decode  (#5045)

### Does this PR introduce _any_ user-facing change?
suffix spec decode method rely on `arctic-inference` library. This PR
add it into requirements to make sure the function works by default
### How was this patch tested?

- vLLM version: v0.12.0
- vLLM main:
https://github.com/vllm-project/vllm/commit/ad32e3e19ccf0526cb6744a5fed09a138a5fb2f9

---------

Signed-off-by: frankie-ys <yongshengwang@cmbchina.com>
Signed-off-by: frankie <wangyongsheng686@gmail.com>
---
 pyproject.toml   | 3 ++-
 requirements.txt | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index d4dd0cf6..f2e122a2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,7 +27,8 @@ requires = [
     "numba",
     "fastapi<0.124.0",
     "opencv-python-headless<=4.11.0.86", # Required to avoid numpy version conflict with vllm
-    "compressed_tensors>=0.11.0"
+    "compressed_tensors>=0.11.0",
+    "arctic-inference==0.1.1"
 ]
 build-backend = "setuptools.build_meta"
 
diff --git a/requirements.txt b/requirements.txt
index c32d0817..eb6f3715 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,5 +30,6 @@ numba
 #--extra-index-url https://mirrors.huaweicloud.com/ascend/repos/pypi
 torch-npu==2.8.0
 
+arctic-inference==0.1.1
 transformers>=4.57.3
 fastapi<0.124.0