Sync from v0.13
This commit is contained in:
32
vllm/model_executor/layers/attention_layer_base.py
Normal file
32
vllm/model_executor/layers/attention_layer_base.py
Normal file
@@ -0,0 +1,32 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Base class for attention-like layers."""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
from vllm.attention.backends.abstract import AttentionBackend
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.v1.kv_cache_interface import KVCacheSpec
|
||||
|
||||
|
||||
class AttentionLayerBase(ABC):
|
||||
"""
|
||||
Base class for attention-like layers (Attention, Mamba, etc.)
|
||||
that support the v1 engine.
|
||||
|
||||
This provides a common interface for getting attention backends
|
||||
from different layer types.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_attn_backend(self) -> type[AttentionBackend]:
|
||||
"""Get the attention backend class for this layer."""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
|
||||
"""
|
||||
Get the KV cache spec for this layer.
|
||||
May be None if the layer does not need KV cache.
|
||||
"""
|
||||
pass
|
||||
Reference in New Issue
Block a user