[Optimization] Update estimated_num_new_pages logic in TokenToKVPoolAllocator (#8794)
Signed-off-by: Xingrui Yi <yixingrui@linux.alibaba.com> Co-authored-by: Xingrui Yi <yixingrui@linux.alibaba.com>
This commit is contained in:
@@ -1300,6 +1300,8 @@ class ModelRunner:
|
||||
dtype=self.kv_cache_dtype,
|
||||
device=self.device,
|
||||
kvcache=self.token_to_kv_pool,
|
||||
need_sort=self.server_args.disaggregation_mode
|
||||
in ("decode", "prefill"),
|
||||
)
|
||||
else:
|
||||
self.token_to_kv_pool_allocator = TokenToKVPoolAllocator(
|
||||
@@ -1307,6 +1309,8 @@ class ModelRunner:
|
||||
dtype=self.kv_cache_dtype,
|
||||
device=self.device,
|
||||
kvcache=self.token_to_kv_pool,
|
||||
need_sort=self.server_args.disaggregation_mode
|
||||
in ("decode", "prefill"),
|
||||
)
|
||||
else:
|
||||
if _is_npu:
|
||||
@@ -1316,6 +1320,8 @@ class ModelRunner:
|
||||
dtype=self.kv_cache_dtype,
|
||||
device=self.device,
|
||||
kvcache=self.token_to_kv_pool,
|
||||
need_sort=self.server_args.disaggregation_mode
|
||||
in ("decode", "prefill"),
|
||||
)
|
||||
else:
|
||||
self.token_to_kv_pool_allocator = PagedTokenToKVPoolAllocator(
|
||||
@@ -1324,6 +1330,8 @@ class ModelRunner:
|
||||
dtype=self.kv_cache_dtype,
|
||||
device=self.device,
|
||||
kvcache=self.token_to_kv_pool,
|
||||
need_sort=self.server_args.disaggregation_mode
|
||||
in ("decode", "prefill"),
|
||||
)
|
||||
else:
|
||||
assert self.is_draft_worker
|
||||
|
||||
Reference in New Issue
Block a user