Support page first layout zero copy for mooncake store (#8651)

Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
huangtingwei
2025-08-13 06:59:26 +08:00
committed by GitHub
parent 924827c3de
commit 0edda32001
4 changed files with 39 additions and 39 deletions

View File

@@ -575,6 +575,11 @@ class ServerArgs:
"Pipeline parallelism is incompatible with overlap schedule."
)
if self.hicache_storage_backend == "mooncake":
# to use mooncake storage backend, the following conditions must be met:
self.hicache_io_backend = "kernel"
self.hicache_mem_layout = "page_first"
# Speculative Decoding
if self.speculative_algorithm == "NEXTN":
# NEXTN shares the same implementation of EAGLE