Support page first layout zero copy for mooncake store (#8651)
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
@@ -575,6 +575,11 @@ class ServerArgs:
|
||||
"Pipeline parallelism is incompatible with overlap schedule."
|
||||
)
|
||||
|
||||
if self.hicache_storage_backend == "mooncake":
|
||||
# to use mooncake storage backend, the following conditions must be met:
|
||||
self.hicache_io_backend = "kernel"
|
||||
self.hicache_mem_layout = "page_first"
|
||||
|
||||
# Speculative Decoding
|
||||
if self.speculative_algorithm == "NEXTN":
|
||||
# NEXTN shares the same implementation of EAGLE
|
||||
|
||||
Reference in New Issue
Block a user