Memorypool chunked prefetch (#614)

This commit is contained in:
Liangsheng Yin
2024-07-13 15:24:03 -07:00
committed by GitHub
parent 65c6577696
commit 10143e1a5f
5 changed files with 30 additions and 39 deletions

View File

@@ -141,12 +141,5 @@ class RadixAttention(nn.Module):
if input_metadata.out_cache_loc is not None:
key_buffer[input_metadata.out_cache_loc] = cache_k
value_buffer[input_metadata.out_cache_loc] = cache_v
elif input_metadata.out_cache_cont_start is not None:
key_buffer[
input_metadata.out_cache_cont_start : input_metadata.out_cache_cont_end
] = cache_k
value_buffer[
input_metadata.out_cache_cont_start : input_metadata.out_cache_cont_end
] = cache_v
else:
raise RuntimeError()