Sync from v0.13
This commit is contained in:
139
vllm/v1/kv_offload/lru_manager.py
Normal file
139
vllm/v1/kv_offload/lru_manager.py
Normal file
@@ -0,0 +1,139 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
from collections import OrderedDict
|
||||
from collections.abc import Iterable
|
||||
|
||||
from vllm.v1.core.kv_cache_utils import BlockHash
|
||||
from vllm.v1.kv_offload.abstract import (
|
||||
LoadStoreSpec,
|
||||
OffloadingEvent,
|
||||
OffloadingManager,
|
||||
PrepareStoreOutput,
|
||||
)
|
||||
from vllm.v1.kv_offload.backend import Backend, BlockStatus
|
||||
|
||||
|
||||
class LRUOffloadingManager(OffloadingManager):
|
||||
"""
|
||||
An OffloadingManager with a pluggable backend, which evicts blocks by LRU.
|
||||
"""
|
||||
|
||||
def __init__(self, backend: Backend, enable_events: bool = False):
|
||||
self.backend: Backend = backend
|
||||
# block_hash -> BlockStatus
|
||||
self.blocks: OrderedDict[BlockHash, BlockStatus] = OrderedDict()
|
||||
self.events: list[OffloadingEvent] | None = [] if enable_events else None
|
||||
|
||||
def lookup(self, block_hashes: Iterable[BlockHash]) -> int:
|
||||
hit_count = 0
|
||||
for block_hash in block_hashes:
|
||||
block = self.blocks.get(block_hash)
|
||||
if block is None or not block.is_ready:
|
||||
break
|
||||
hit_count += 1
|
||||
return hit_count
|
||||
|
||||
def prepare_load(self, block_hashes: Iterable[BlockHash]) -> LoadStoreSpec:
|
||||
blocks = []
|
||||
for block_hash in block_hashes:
|
||||
block = self.blocks[block_hash]
|
||||
assert block.is_ready
|
||||
block.ref_cnt += 1
|
||||
blocks.append(block)
|
||||
|
||||
return self.backend.get_load_store_spec(block_hashes, blocks)
|
||||
|
||||
def touch(self, block_hashes: Iterable[BlockHash]):
|
||||
for block_hash in reversed(list(block_hashes)):
|
||||
if self.blocks.get(block_hash):
|
||||
self.blocks.move_to_end(block_hash)
|
||||
|
||||
def complete_load(self, block_hashes: Iterable[BlockHash]):
|
||||
for block_hash in block_hashes:
|
||||
block = self.blocks[block_hash]
|
||||
assert block.ref_cnt > 0
|
||||
block.ref_cnt -= 1
|
||||
|
||||
def prepare_store(
|
||||
self, block_hashes: Iterable[BlockHash]
|
||||
) -> PrepareStoreOutput | None:
|
||||
# filter out blocks that are already stored
|
||||
block_hashes_to_store = [
|
||||
block_hash for block_hash in block_hashes if block_hash not in self.blocks
|
||||
]
|
||||
|
||||
num_blocks_to_evict = (
|
||||
len(block_hashes_to_store) - self.backend.get_num_free_blocks()
|
||||
)
|
||||
|
||||
# build list of blocks to evict
|
||||
to_evict = []
|
||||
if num_blocks_to_evict > 0:
|
||||
for block_hash, block in self.blocks.items():
|
||||
if block.ref_cnt == 0:
|
||||
to_evict.append(block_hash)
|
||||
num_blocks_to_evict -= 1
|
||||
if num_blocks_to_evict == 0:
|
||||
break
|
||||
else:
|
||||
# we could not evict enough blocks
|
||||
return None
|
||||
|
||||
# evict blocks
|
||||
for block_hash in to_evict:
|
||||
self.backend.free(self.blocks.pop(block_hash))
|
||||
|
||||
if to_evict and self.events is not None:
|
||||
self.events.append(
|
||||
OffloadingEvent(
|
||||
block_hashes=to_evict,
|
||||
block_size=self.backend.block_size,
|
||||
medium=self.backend.medium,
|
||||
removed=True,
|
||||
)
|
||||
)
|
||||
|
||||
blocks = self.backend.allocate_blocks(block_hashes_to_store)
|
||||
assert len(blocks) == len(block_hashes_to_store)
|
||||
|
||||
for block_hash, block in zip(block_hashes_to_store, blocks):
|
||||
self.blocks[block_hash] = block
|
||||
|
||||
# build store specs for allocated blocks
|
||||
store_spec = self.backend.get_load_store_spec(block_hashes_to_store, blocks)
|
||||
|
||||
return PrepareStoreOutput(
|
||||
block_hashes_to_store=block_hashes_to_store,
|
||||
store_spec=store_spec,
|
||||
block_hashes_evicted=to_evict,
|
||||
)
|
||||
|
||||
def complete_store(self, block_hashes: Iterable[BlockHash], success: bool = True):
|
||||
stored_block_hashes: list[BlockHash] = []
|
||||
if success:
|
||||
for block_hash in block_hashes:
|
||||
block = self.blocks[block_hash]
|
||||
if not block.is_ready:
|
||||
block.ref_cnt = 0
|
||||
stored_block_hashes.append(block_hash)
|
||||
else:
|
||||
for block_hash in block_hashes:
|
||||
block = self.blocks[block_hash]
|
||||
if not block.is_ready:
|
||||
self.backend.free(block)
|
||||
del self.blocks[block_hash]
|
||||
|
||||
if stored_block_hashes and self.events is not None:
|
||||
self.events.append(
|
||||
OffloadingEvent(
|
||||
block_hashes=stored_block_hashes,
|
||||
block_size=self.backend.block_size,
|
||||
medium=self.backend.medium,
|
||||
removed=False,
|
||||
)
|
||||
)
|
||||
|
||||
def take_events(self) -> Iterable[OffloadingEvent]:
|
||||
if self.events is not None:
|
||||
yield from self.events
|
||||
self.events.clear()
|
||||
Reference in New Issue
Block a user