[Bugfix]Fix hang bug using dp attention with HiRadixCache (#7159)
Signed-off-by: huanglong <huanglong@linux.alibaba.com>
This commit is contained in:
@@ -558,7 +558,11 @@ class Scheduler(
|
|||||||
self.tree_cache = HiRadixCache(
|
self.tree_cache = HiRadixCache(
|
||||||
req_to_token_pool=self.req_to_token_pool,
|
req_to_token_pool=self.req_to_token_pool,
|
||||||
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
|
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
|
||||||
tp_cache_group=self.tp_cpu_group,
|
tp_cache_group=(
|
||||||
|
self.attn_tp_cpu_group
|
||||||
|
if self.server_args.enable_dp_attention
|
||||||
|
else self.tp_cpu_group
|
||||||
|
),
|
||||||
page_size=self.page_size,
|
page_size=self.page_size,
|
||||||
hicache_ratio=server_args.hicache_ratio,
|
hicache_ratio=server_args.hicache_ratio,
|
||||||
hicache_size=server_args.hicache_size,
|
hicache_size=server_args.hicache_size,
|
||||||
|
|||||||
Reference in New Issue
Block a user