Unit test for Hierarchical Caching (#4486)
This commit is contained in:
@@ -173,6 +173,7 @@ class ServerArgs:
|
||||
enable_custom_logit_processor: bool = False
|
||||
tool_call_parser: str = None
|
||||
enable_hierarchical_cache: bool = False
|
||||
hicache_ratio: float = 2.0
|
||||
enable_flashinfer_mla: bool = False
|
||||
enable_flashmla: bool = False
|
||||
flashinfer_mla_disable_ragged: bool = False
|
||||
@@ -1007,6 +1008,13 @@ class ServerArgs:
|
||||
action="store_true",
|
||||
help="Enable hierarchical cache",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--hicache-ratio",
|
||||
type=float,
|
||||
required=False,
|
||||
default=ServerArgs.hicache_ratio,
|
||||
help="The ratio of the size of host KV cache memory pool to the size of device pool.",
|
||||
)
|
||||
|
||||
# Server warmups
|
||||
parser.add_argument(
|
||||
|
||||
Reference in New Issue
Block a user