[Bugfix]Fix hang bug using dp attention with HiRadixCache (#7159)
Signed-off-by: huanglong <huanglong@linux.alibaba.com>
This commit is contained in:
@@ -558,7 +558,11 @@ class Scheduler(
|
||||
self.tree_cache = HiRadixCache(
|
||||
req_to_token_pool=self.req_to_token_pool,
|
||||
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
|
||||
tp_cache_group=self.tp_cpu_group,
|
||||
tp_cache_group=(
|
||||
self.attn_tp_cpu_group
|
||||
if self.server_args.enable_dp_attention
|
||||
else self.tp_cpu_group
|
||||
),
|
||||
page_size=self.page_size,
|
||||
hicache_ratio=server_args.hicache_ratio,
|
||||
hicache_size=server_args.hicache_size,
|
||||
|
||||
Reference in New Issue
Block a user