From a6c7ebbbcb1ed135b117eab2b03fa1af9c9da05a Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Tue, 30 Jul 2024 18:29:01 -0700 Subject: [PATCH] Add req slots leaking check (#842) --- python/sglang/srt/managers/tp_worker.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index d21a0c694..1f024501e 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -280,6 +280,14 @@ class ModelTpServer: "KV cache pool leak detected!" ) + if self.req_to_token_pool.can_use_mem_size != self.req_to_token_pool.size: + warnings.warn( + "Warning: " + f"available req slots={self.req_to_token_pool.can_use_mem_size}, " + f"total slots={self.req_to_token_pool.size}\n" + "Memory pool leak detected!" + ) + def handle_generate_request( self, recv_req: TokenizedGenerateReqInput,