Add req slots leaking check (#842)
This commit is contained in:
@@ -280,6 +280,14 @@ class ModelTpServer:
|
|||||||
"KV cache pool leak detected!"
|
"KV cache pool leak detected!"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if self.req_to_token_pool.can_use_mem_size != self.req_to_token_pool.size:
|
||||||
|
warnings.warn(
|
||||||
|
"Warning: "
|
||||||
|
f"available req slots={self.req_to_token_pool.can_use_mem_size}, "
|
||||||
|
f"total slots={self.req_to_token_pool.size}\n"
|
||||||
|
"Memory pool leak detected!"
|
||||||
|
)
|
||||||
|
|
||||||
def handle_generate_request(
|
def handle_generate_request(
|
||||||
self,
|
self,
|
||||||
recv_req: TokenizedGenerateReqInput,
|
recv_req: TokenizedGenerateReqInput,
|
||||||
|
|||||||
Reference in New Issue
Block a user