[PD] Add custom memory pool option to support Mooncake PD with NVLink (#7264)

Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
shangmingc
2025-06-18 08:21:37 +08:00
committed by GitHub
parent ceaa85c9e6
commit c26d7349d3
4 changed files with 163 additions and 47 deletions

View File

@@ -622,7 +622,10 @@ class Scheduler(
self.req_to_metadata_buffer_idx_allocator = ReqToMetadataIdxAllocator(
buffer_size
)
self.disagg_metadata_buffers = MetadataBuffers(buffer_size)
self.disagg_metadata_buffers = MetadataBuffers(
buffer_size,
custom_mem_pool=self.token_to_kv_pool_allocator.get_kvcache().maybe_get_custom_mem_pool(),
)
# The decode requests polling kv cache
self.disagg_decode_transfer_queue = DecodeTransferQueue(
@@ -669,7 +672,10 @@ class Scheduler(
self.req_to_metadata_buffer_idx_allocator = ReqToMetadataIdxAllocator(
buffer_size
)
self.disagg_metadata_buffers = MetadataBuffers(buffer_size)
self.disagg_metadata_buffers = MetadataBuffers(
buffer_size,
custom_mem_pool=self.token_to_kv_pool_allocator.get_kvcache().maybe_get_custom_mem_pool(),
)
self.disagg_prefill_bootstrap_queue = PrefillBootstrapQueue(
token_to_kv_pool=self.token_to_kv_pool_allocator.get_kvcache(),