From b3c1f2e4f2436d7afc9e4ba0e95e15e5b0605b3e Mon Sep 17 00:00:00 2001 From: fzyzcjy <5236035+fzyzcjy@users.noreply.github.com> Date: Mon, 18 Aug 2025 03:53:34 +0800 Subject: [PATCH] Fix memory pool leak error (#9271) --- python/sglang/srt/mem_cache/allocator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/sglang/srt/mem_cache/allocator.py b/python/sglang/srt/mem_cache/allocator.py index 64c2fe318..64a6116c2 100644 --- a/python/sglang/srt/mem_cache/allocator.py +++ b/python/sglang/srt/mem_cache/allocator.py @@ -486,6 +486,11 @@ class PagedTokenToKVPoolAllocator(BaseTokenToKVPoolAllocator): ): self.merge_and_sort_free() + assert self.max_num_extend_tokens_next_power_of_2 >= extend_num_tokens, ( + f"{self.max_num_extend_tokens_next_power_of_2=} >= {extend_num_tokens=} does not hold. " + f"If this happens in PD, consider letting chunked_prefill_size in D be as large as in P" + ) + out_indices = torch.empty( (extend_num_tokens,), dtype=torch.int64, device=self.device )