support memory_pool_host page first direct layout (#10031)

Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
huangtingwei
2025-09-12 14:19:44 +08:00
committed by GitHub
parent 53ca15529a
commit b4c2c421e9
3 changed files with 114 additions and 47 deletions

View File

@@ -721,6 +721,13 @@ class ServerArgs:
self.hicache_io_backend = "kernel"
self.hicache_mem_layout = "page_first"
if self.hicache_mem_layout == "page_first_direct":
if self.hicache_io_backend != "direct":
self.hicache_io_backend = "direct"
logger.warning(
"Page first direct layout only support direct io backend"
)
# Speculative Decoding
if self.speculative_algorithm == "NEXTN":
# NEXTN shares the same implementation of EAGLE
@@ -1779,7 +1786,7 @@ class ServerArgs:
parser.add_argument(
"--hicache-mem-layout",
type=str,
choices=["layer_first", "page_first"],
choices=["layer_first", "page_first", "page_first_direct"],
default=ServerArgs.hicache_mem_layout,
help="The layout of host memory pool for hierarchical cache.",
)