Hierarchical Caching supports MLA (#4009)

Signed-off-by: Changqi Lu <luchangqi.123@bytedance.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
Lu Changqi
2025-03-14 11:42:14 +08:00
committed by GitHub
parent bb37855653
commit 0e0ec70200
4 changed files with 231 additions and 38 deletions

View File

@@ -22,10 +22,7 @@ from typing import List, Optional
import torch
from sglang.srt.mem_cache.memory_pool import (
MHATokenToKVPoolHost,
TokenToKVPoolAllocator,
)
from sglang.srt.mem_cache.memory_pool import HostKVCache, TokenToKVPoolAllocator
logger = logging.getLogger(__name__)
@@ -151,7 +148,7 @@ class HiCacheController:
def __init__(
self,
token_to_kv_pool_allocator: TokenToKVPoolAllocator,
mem_pool_host: MHATokenToKVPoolHost,
mem_pool_host: HostKVCache,
load_cache_event: threading.Event = None,
write_policy: str = "write_through_selective",
):