[PD] Optimize custom mem pool usage and bump mooncake version (#7393)

Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
Shangming Cai
2025-06-21 00:50:39 +08:00
committed by GitHub
parent ceba0ce4f6
commit 187b85b7f3
3 changed files with 7 additions and 58 deletions

View File

@@ -270,12 +270,10 @@ class MHATokenToKVPool(KVCache):
"SGLANG_MOONCAKE_CUSTOM_MEM_POOL", "false"
)
if self.enable_custom_mem_pool:
from sglang.srt.disaggregation.mooncake.memory_pool import (
MooncakeNVLinkAllocator,
)
# TODO(shangming): abstract custom allocator class for more backends
allocator = MooncakeNVLinkAllocator.get_allocator(self.device)
from mooncake.allocator import NVLinkAllocator
allocator = NVLinkAllocator.get_allocator(self.device)
self.custom_mem_pool = torch.cuda.MemPool(allocator.allocator())
else:
self.custom_mem_pool = None
@@ -602,12 +600,10 @@ class MLATokenToKVPool(KVCache):
"SGLANG_MOONCAKE_CUSTOM_MEM_POOL", "false"
)
if self.enable_custom_mem_pool:
from sglang.srt.disaggregation.mooncake.memory_pool import (
MooncakeNVLinkAllocator,
)
# TODO(shangming): abstract custom allocator class for more backends
allocator = MooncakeNVLinkAllocator.get_allocator(self.device)
from mooncake.allocator import NVLinkAllocator
allocator = NVLinkAllocator.get_allocator(self.device)
self.custom_mem_pool = torch.cuda.MemPool(allocator.allocator())
else:
self.custom_mem_pool = None