Hierarchical Caching supports MLA (#4009)

Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
2025-03-14 11:42:14 +08:00
parent bb37855653
commit 0e0ec70200
4 changed files with 231 additions and 38 deletions
--- a/python/sglang/srt/managers/cache_controller.py
+++ b/python/sglang/srt/managers/cache_controller.py
@@ -22,10 +22,7 @@ from typing import List, Optional

 import torch

-from sglang.srt.mem_cache.memory_pool import (
-    MHATokenToKVPoolHost,
-    TokenToKVPoolAllocator,
-)
+from sglang.srt.mem_cache.memory_pool import HostKVCache, TokenToKVPoolAllocator

 logger = logging.getLogger(__name__)

@@ -151,7 +148,7 @@ class HiCacheController:
    def __init__(
        self,
        token_to_kv_pool_allocator: TokenToKVPoolAllocator,
-        mem_pool_host: MHATokenToKVPoolHost,
+        mem_pool_host: HostKVCache,
        load_cache_event: threading.Event = None,
        write_policy: str = "write_through_selective",
    ):