[Bugfix]Add register_kv_cache in ucm_connector (#5657)
### What this PR does / why we need it?
To adapt different shapes of the KV cache, UCM optimized the
initialization of store by moving it into `register_kv_caches`.
Therefore, this update adds `register_kv_caches` interface to
UCMConnectorV1.
### How was this patch tested?
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
Signed-off-by: UnifiedCacheManager <unifiedcachem@163.com>
This commit is contained in:
committed by
GitHub
parent
cd59323e40
commit
d6bb17f10e
@@ -40,6 +40,15 @@ class UCMConnectorV1(KVConnectorBase_V1):
|
|||||||
# ==============================
|
# ==============================
|
||||||
# Worker-side methods
|
# Worker-side methods
|
||||||
# ==============================
|
# ==============================
|
||||||
|
def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]) -> None:
|
||||||
|
"""
|
||||||
|
Initialize with the KV caches. Useful for pre-registering the
|
||||||
|
KV Caches in the KVConnector (e.g. for NIXL).
|
||||||
|
Args:
|
||||||
|
kv_caches: A dictionary mapping layer names to KV cache tensors.
|
||||||
|
"""
|
||||||
|
self._ucm_engine.register_kv_caches(kv_caches)
|
||||||
|
|
||||||
def start_load_kv(self, forward_context: "ForwardContext",
|
def start_load_kv(self, forward_context: "ForwardContext",
|
||||||
**kwargs: Any) -> None:
|
**kwargs: Any) -> None:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user