Drop 0.12.0 support (#5146)

We decided to release v0.13.0 soon. So no need to support 0.12.0 now.
Let's drop it.

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-12-20 09:38:53 +08:00
committed by GitHub
parent 243ab7d720
commit 758d81dcb1
21 changed files with 63 additions and 149 deletions

View File

@@ -22,6 +22,7 @@ import torch
from vllm.lora.request import LoRARequest
from vllm.pooling_params import PoolingParams
from vllm.v1.outputs import LogprobsTensors
from vllm.v1.pool.metadata import PoolingStates
from vllm.v1.sample.logits_processor import (BatchUpdateBuilder,
LogitsProcessors)
from vllm.v1.worker.gpu_input_batch import InputBatch
@@ -29,16 +30,6 @@ from vllm.v1.worker.gpu_input_batch import InputBatch
from vllm_ascend.worker.block_table import MultiGroupBlockTable
class PoolingStates:
# NOTE: This should be removed after we drop support of vLLM v0.12.0
def __init__(self):
# for chunked prefill with ALL pooling
self.hidden_states_cache: list[torch.Tensor] = []
def clean(self):
self.hidden_states_cache.clear()
class NPUInputBatch(InputBatch):
def __init__(