Kernels for efficient KV cache IO (#7313)

This commit is contained in:
Zhiqiang Xie
2025-07-06 22:53:36 -07:00
committed by GitHub
parent 253454de9b
commit 2fc824b84c
7 changed files with 184 additions and 371 deletions

View File

@@ -591,6 +591,12 @@ class Scheduler(
hicache_ratio=server_args.hicache_ratio,
hicache_size=server_args.hicache_size,
hicache_write_policy=server_args.hicache_write_policy,
hicache_io_backend=(
"direct"
if server_args.attention_backend
== "fa3" # hot fix for incompatibility
else server_args.hicache_io_backend
),
)
self.tp_worker.register_hicache_layer_transfer_counter(
self.tree_cache.cache_controller.layer_done_counter