Fix set kv cache multi-stream (#5975)
This commit is contained in:
@@ -374,9 +374,9 @@ class MHATokenToKVPool(KVCache):
|
|||||||
# Overlap the copy of K and V cache for small batch size
|
# Overlap the copy of K and V cache for small batch size
|
||||||
current_stream = self.device_module.current_stream()
|
current_stream = self.device_module.current_stream()
|
||||||
self.alt_stream.wait_stream(current_stream)
|
self.alt_stream.wait_stream(current_stream)
|
||||||
|
self.k_buffer[layer_id - self.start_layer][loc] = cache_k
|
||||||
with self.device_module.stream(self.alt_stream):
|
with self.device_module.stream(self.alt_stream):
|
||||||
self.k_buffer[layer_id - self.start_layer][loc] = cache_k
|
self.v_buffer[layer_id - self.start_layer][loc] = cache_v
|
||||||
self.v_buffer[layer_id - self.start_layer][loc] = cache_v
|
|
||||||
current_stream.wait_stream(self.alt_stream)
|
current_stream.wait_stream(self.alt_stream)
|
||||||
else:
|
else:
|
||||||
self.k_buffer[layer_id - self.start_layer][loc] = cache_k
|
self.k_buffer[layer_id - self.start_layer][loc] = cache_k
|
||||||
|
|||||||
Reference in New Issue
Block a user