misc: fix the req_to_token member change (#967)
This commit is contained in:
@@ -289,10 +289,10 @@ class ModelTpServer:
|
|||||||
"KV cache pool leak detected!"
|
"KV cache pool leak detected!"
|
||||||
)
|
)
|
||||||
|
|
||||||
if self.req_to_token_pool.can_use_mem_size != self.req_to_token_pool.size:
|
if len(self.req_to_token_pool.free_slots) != self.req_to_token_pool.size:
|
||||||
warnings.warn(
|
warnings.warn(
|
||||||
"Warning: "
|
"Warning: "
|
||||||
f"available req slots={self.req_to_token_pool.can_use_mem_size}, "
|
f"available req slots={len(self.req_to_token_pool.free_slots)}, "
|
||||||
f"total slots={self.req_to_token_pool.size}\n"
|
f"total slots={self.req_to_token_pool.size}\n"
|
||||||
"Memory pool leak detected!"
|
"Memory pool leak detected!"
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -32,7 +32,6 @@ class ReqToTokenPool:
|
|||||||
self.req_to_token = torch.empty(
|
self.req_to_token = torch.empty(
|
||||||
(size, max_context_len), dtype=torch.int32, device="cuda"
|
(size, max_context_len), dtype=torch.int32, device="cuda"
|
||||||
)
|
)
|
||||||
self.can_use_mem_size = size
|
|
||||||
|
|
||||||
def alloc(self, need_size: int) -> List[int]:
|
def alloc(self, need_size: int) -> List[int]:
|
||||||
if need_size > len(self.free_slots):
|
if need_size > len(self.free_slots):
|
||||||
|
|||||||
Reference in New Issue
Block a user