Drop 0.12.0 support (#5146)
We decided to release v0.13.0 soon. So no need to support 0.12.0 now.
Let's drop it.
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
@@ -22,6 +22,7 @@ import torch
|
||||
from vllm.lora.request import LoRARequest
|
||||
from vllm.pooling_params import PoolingParams
|
||||
from vllm.v1.outputs import LogprobsTensors
|
||||
from vllm.v1.pool.metadata import PoolingStates
|
||||
from vllm.v1.sample.logits_processor import (BatchUpdateBuilder,
|
||||
LogitsProcessors)
|
||||
from vllm.v1.worker.gpu_input_batch import InputBatch
|
||||
@@ -29,16 +30,6 @@ from vllm.v1.worker.gpu_input_batch import InputBatch
|
||||
from vllm_ascend.worker.block_table import MultiGroupBlockTable
|
||||
|
||||
|
||||
class PoolingStates:
|
||||
# NOTE: This should be removed after we drop support of vLLM v0.12.0
|
||||
def __init__(self):
|
||||
# for chunked prefill with ALL pooling
|
||||
self.hidden_states_cache: list[torch.Tensor] = []
|
||||
|
||||
def clean(self):
|
||||
self.hidden_states_cache.clear()
|
||||
|
||||
|
||||
class NPUInputBatch(InputBatch):
|
||||
|
||||
def __init__(
|
||||
|
||||
Reference in New Issue
Block a user