Interface change for kvcache io to support page first layout (#8318)

This commit is contained in:
Zhiqiang Xie
2025-07-31 20:37:49 -07:00
committed by GitHub
parent 9305ea6c2d
commit dd7ca00601
6 changed files with 371 additions and 171 deletions

View File

@@ -198,7 +198,8 @@ class ServerArgs:
hicache_ratio: float = 2.0
hicache_size: int = 0
hicache_write_policy: str = "write_through_selective"
hicache_io_backend: str = ""
hicache_io_backend: str = "kernel"
hicache_mem_layout: str = "layer_first"
hicache_storage_backend: Optional[str] = None
# Double Sparsity
@@ -1487,6 +1488,14 @@ class ServerArgs:
default=ServerArgs.hicache_io_backend,
help="The IO backend for KV cache transfer between CPU and GPU",
)
parser.add_argument(
"--hicache-mem-layout",
type=str,
choices=["layer_first", "page_first"],
default=ServerArgs.hicache_mem_layout,
help="The layout of host memory pool for hierarchical cache.",
)
parser.add_argument(
"--hicache-storage-backend",
type=str,