Optimize copy_kv_cache for spec decoding (#11126)
Co-authored-by: Xinyuan Tong <115166877+JustinTong0323@users.noreply.github.com>
This commit is contained in:
@@ -1672,6 +1672,9 @@ class ModelRunner:
|
||||
enable_memory_saver=self.server_args.enable_memory_saver,
|
||||
start_layer=self.start_layer,
|
||||
end_layer=self.end_layer,
|
||||
enable_kv_cache_copy=(
|
||||
self.server_args.speculative_algorithm is not None
|
||||
),
|
||||
)
|
||||
|
||||
# Initialize token_to_kv_pool_allocator
|
||||
|
||||
Reference in New Issue
Block a user