Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/vllm/model_executor/layers/attention_layer_base.py
+++ b/vllm/model_executor/layers/attention_layer_base.py
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Base class for attention-like layers."""
+
+from abc import ABC, abstractmethod
+
+from vllm.attention.backends.abstract import AttentionBackend
+from vllm.config import VllmConfig
+from vllm.v1.kv_cache_interface import KVCacheSpec
+
+
+class AttentionLayerBase(ABC):
+    """
+    Base class for attention-like layers (Attention, Mamba, etc.)
+    that support the v1 engine.
+
+    This provides a common interface for getting attention backends
+    from different layer types.
+    """
+
+    @abstractmethod
+    def get_attn_backend(self) -> type[AttentionBackend]:
+        """Get the attention backend class for this layer."""
+        pass
+
+    @abstractmethod
+    def get_kv_cache_spec(self, vllm_config: VllmConfig) -> KVCacheSpec | None:
+        """
+        Get the KV cache spec for this layer.
+        May be None if the layer does not need KV cache.
+        """
+        pass