support memory_pool_host page first direct layout (#10031)
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
@@ -721,6 +721,13 @@ class ServerArgs:
|
||||
self.hicache_io_backend = "kernel"
|
||||
self.hicache_mem_layout = "page_first"
|
||||
|
||||
if self.hicache_mem_layout == "page_first_direct":
|
||||
if self.hicache_io_backend != "direct":
|
||||
self.hicache_io_backend = "direct"
|
||||
logger.warning(
|
||||
"Page first direct layout only support direct io backend"
|
||||
)
|
||||
|
||||
# Speculative Decoding
|
||||
if self.speculative_algorithm == "NEXTN":
|
||||
# NEXTN shares the same implementation of EAGLE
|
||||
@@ -1779,7 +1786,7 @@ class ServerArgs:
|
||||
parser.add_argument(
|
||||
"--hicache-mem-layout",
|
||||
type=str,
|
||||
choices=["layer_first", "page_first"],
|
||||
choices=["layer_first", "page_first", "page_first_direct"],
|
||||
default=ServerArgs.hicache_mem_layout,
|
||||
help="The layout of host memory pool for hierarchical cache.",
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user