Unit test for Hierarchical Caching (#4486)

This commit is contained in:
Zhiqiang Xie
2025-03-17 17:45:00 -07:00
committed by GitHub
parent 9b81f9bd34
commit a98290aea3
7 changed files with 65 additions and 5 deletions

View File

@@ -173,6 +173,7 @@ class ServerArgs:
enable_custom_logit_processor: bool = False
tool_call_parser: str = None
enable_hierarchical_cache: bool = False
hicache_ratio: float = 2.0
enable_flashinfer_mla: bool = False
enable_flashmla: bool = False
flashinfer_mla_disable_ragged: bool = False
@@ -1007,6 +1008,13 @@ class ServerArgs:
action="store_true",
help="Enable hierarchical cache",
)
parser.add_argument(
"--hicache-ratio",
type=float,
required=False,
default=ServerArgs.hicache_ratio,
help="The ratio of the size of host KV cache memory pool to the size of device pool.",
)
# Server warmups
parser.add_argument(