[PD] Optimize custom mem pool usage and bump mooncake version (#7393)

Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
2025-06-21 00:50:39 +08:00
parent ceba0ce4f6
commit 187b85b7f3
3 changed files with 7 additions and 58 deletions
--- a/python/sglang/srt/mem_cache/memory_pool.py
+++ b/python/sglang/srt/mem_cache/memory_pool.py
@@ -270,12 +270,10 @@ class MHATokenToKVPool(KVCache):
            "SGLANG_MOONCAKE_CUSTOM_MEM_POOL", "false"
        )
        if self.enable_custom_mem_pool:
-            from sglang.srt.disaggregation.mooncake.memory_pool import (
-                MooncakeNVLinkAllocator,
-            )
-
            # TODO(shangming): abstract custom allocator class for more backends
-            allocator = MooncakeNVLinkAllocator.get_allocator(self.device)
+            from mooncake.allocator import NVLinkAllocator
+
+            allocator = NVLinkAllocator.get_allocator(self.device)
            self.custom_mem_pool = torch.cuda.MemPool(allocator.allocator())
        else:
            self.custom_mem_pool = None
@@ -602,12 +600,10 @@ class MLATokenToKVPool(KVCache):
            "SGLANG_MOONCAKE_CUSTOM_MEM_POOL", "false"
        )
        if self.enable_custom_mem_pool:
-            from sglang.srt.disaggregation.mooncake.memory_pool import (
-                MooncakeNVLinkAllocator,
-            )
-
            # TODO(shangming): abstract custom allocator class for more backends
-            allocator = MooncakeNVLinkAllocator.get_allocator(self.device)
+            from mooncake.allocator import NVLinkAllocator
+
+            allocator = NVLinkAllocator.get_allocator(self.device)
            self.custom_mem_pool = torch.cuda.MemPool(allocator.allocator())
        else:
            self.custom_mem_pool = None