[HICache] introduce evict policy (#10190)

Signed-off-by: Xuchun Shang <xuchun.shang@linux.alibaba.com>
Co-authored-by: Teng Ma <sima.mt@alibaba-inc.com>
This commit is contained in:
Xuchun Shang
2025-09-18 11:10:20 +08:00
committed by GitHub
parent c32fb7a24d
commit 1ccd59c715
6 changed files with 78 additions and 13 deletions

View File

@@ -185,6 +185,7 @@ class ServerArgs:
hybrid_kvcache_ratio: Optional[float] = None
swa_full_tokens_ratio: float = 0.8
disable_hybrid_swa_memory: bool = False
radix_eviction_policy: str = "lru"
# Runtime options
device: Optional[str] = None
@@ -1907,6 +1908,13 @@ class ServerArgs:
default=ServerArgs.hicache_write_policy,
help="The write policy of hierarchical cache.",
)
parser.add_argument(
"--radix-eviction-policy",
type=str,
choices=["lru", "lfu"],
default=ServerArgs.radix_eviction_policy,
help="The eviction policy of radix trees. 'lru' stands for Least Recently Used, 'lfu' stands for Least Frequently Used.",
)
parser.add_argument(
"--hicache-io-backend",
type=str,