Add req slots leaking check (#842)

This commit is contained in:
Liangsheng Yin
2024-07-30 18:29:01 -07:00
committed by GitHub
parent bb0501c0d9
commit a6c7ebbbcb

View File

@@ -280,6 +280,14 @@ class ModelTpServer:
"KV cache pool leak detected!"
)
if self.req_to_token_pool.can_use_mem_size != self.req_to_token_pool.size:
warnings.warn(
"Warning: "
f"available req slots={self.req_to_token_pool.can_use_mem_size}, "
f"total slots={self.req_to_token_pool.size}\n"
"Memory pool leak detected!"
)
def handle_generate_request(
self,
recv_req: TokenizedGenerateReqInput,