Simple prefetch policy (#8692)
This commit is contained in:
@@ -203,6 +203,7 @@ class ServerArgs:
|
||||
hicache_io_backend: str = "kernel"
|
||||
hicache_mem_layout: str = "layer_first"
|
||||
hicache_storage_backend: Optional[str] = None
|
||||
hicache_storage_prefetch_policy: str = "best_effort"
|
||||
|
||||
# Double Sparsity
|
||||
enable_double_sparsity: bool = False
|
||||
@@ -1626,6 +1627,13 @@ class ServerArgs:
|
||||
default=ServerArgs.hicache_storage_backend,
|
||||
help="The storage backend for hierarchical KV cache.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hicache-storage-prefetch-policy",
|
||||
type=str,
|
||||
choices=["best_effort", "wait_complete", "timeout"],
|
||||
default=ServerArgs.hicache_storage_prefetch_policy,
|
||||
help="Control when prefetching from the storage backend should stop.",
|
||||
)
|
||||
|
||||
# Double Sparsity
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user