Simple prefetch policy (#8692)

This commit is contained in:
pansicheng
2025-08-08 17:09:28 +08:00
committed by GitHub
parent 7490e3f67d
commit e2fd2b9c7e
6 changed files with 148 additions and 36 deletions

View File

@@ -203,6 +203,7 @@ class ServerArgs:
hicache_io_backend: str = "kernel"
hicache_mem_layout: str = "layer_first"
hicache_storage_backend: Optional[str] = None
hicache_storage_prefetch_policy: str = "best_effort"
# Double Sparsity
enable_double_sparsity: bool = False
@@ -1626,6 +1627,13 @@ class ServerArgs:
default=ServerArgs.hicache_storage_backend,
help="The storage backend for hierarchical KV cache.",
)
parser.add_argument(
"--hicache-storage-prefetch-policy",
type=str,
choices=["best_effort", "wait_complete", "timeout"],
default=ServerArgs.hicache_storage_prefetch_policy,
help="Control when prefetching from the storage backend should stop.",
)
# Double Sparsity
parser.add_argument(