Files
enginex-mthreads-vllm/vllm/v1/kv_offload/mediums.py
2026-01-19 10:38:50 +08:00

40 lines
880 B
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from abc import ABC
import numpy as np
from vllm.v1.kv_offload.abstract import LoadStoreSpec
class BlockIDsLoadStoreSpec(LoadStoreSpec, ABC):
"""
Spec for loading/storing KV blocks from given block numbers.
"""
def __init__(self, block_ids: list[int]):
self.block_ids = np.array(block_ids, dtype=np.int64)
def __repr__(self) -> str:
return repr(self.block_ids)
class GPULoadStoreSpec(BlockIDsLoadStoreSpec):
"""
Spec for loading/storing a KV block to GPU memory.
"""
@staticmethod
def medium() -> str:
return "GPU"
class CPULoadStoreSpec(BlockIDsLoadStoreSpec):
"""
Spec for loading/storing a KV block to CPU memory.
"""
@staticmethod
def medium() -> str:
return "CPU"