Optimize copy_kv_cache for spec decoding (#11126)

Co-authored-by: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com>
This commit is contained in:
YAMY
2025-10-08 02:43:30 +00:00
committed by GitHub
parent c4d77774e1
commit 5a9170d993
2 changed files with 91 additions and 25 deletions

View File

@@ -1672,6 +1672,9 @@ class ModelRunner:
enable_memory_saver=self.server_args.enable_memory_saver,
start_layer=self.start_layer,
end_layer=self.end_layer,
enable_kv_cache_copy=(
self.server_args.speculative_algorithm is not None
),
)
# Initialize token_to_kv_pool_allocator