Further fix memory pool leak error (#9298)

This commit is contained in:
fzyzcjy
2025-08-18 15:58:06 +08:00
committed by GitHub
parent 968e181826
commit 4c0bb411e5
2 changed files with 7 additions and 16 deletions

View File

@@ -1353,11 +1353,6 @@ class ModelRunner:
# Initialize token_to_kv_pool_allocator
need_sort = self.server_args.disaggregation_mode in ("decode", "prefill")
max_num_extend_tokens = (
self.server_args.chunked_prefill_size
if self.server_args.chunked_prefill_size > 0
else self.server_args.max_prefill_tokens
)
if self.token_to_kv_pool_allocator is None:
if self.server_args.attention_backend == "ascend":
self.token_to_kv_pool_allocator = AscendPagedTokenToKVPoolAllocator(
@@ -1396,7 +1391,6 @@ class ModelRunner:
device=self.device,
kvcache=self.token_to_kv_pool,
need_sort=need_sort,
max_num_extend_tokens=max_num_extend_tokens,
)
else:
assert self.is_draft_worker