[PD] Release initial code (#4654)
Co-authored-by: SangBin Cho <rkooo567@gmail.com> Co-authored-by: Ying1123 <sqy1415@gmail.com> Co-authored-by: merrymercy <lianminzheng@gmail.com> Co-authored-by: makro Co-authored-by: dhou-xai
This commit is contained in:
@@ -271,6 +271,19 @@ class MHATokenToKVPool(KVCache):
|
||||
v_size_bytes += np.prod(v_cache.shape) * v_cache.dtype.itemsize
|
||||
return k_size_bytes, v_size_bytes
|
||||
|
||||
# for disagg
|
||||
def get_contiguous_buf_infos(self):
|
||||
kv_data_ptrs = [
|
||||
self.get_key_buffer(i).data_ptr() for i in range(self.layer_num)
|
||||
] + [self.get_value_buffer(i).data_ptr() for i in range(self.layer_num)]
|
||||
kv_data_lens = [
|
||||
self.get_key_buffer(i).nbytes for i in range(self.layer_num)
|
||||
] + [self.get_value_buffer(i).nbytes for i in range(self.layer_num)]
|
||||
kv_item_lens = [
|
||||
self.get_key_buffer(i)[0].nbytes for i in range(self.layer_num)
|
||||
] + [self.get_value_buffer(i)[0].nbytes for i in range(self.layer_num)]
|
||||
return kv_data_ptrs, kv_data_lens, kv_item_lens
|
||||
|
||||
# Todo: different memory layout
|
||||
def get_flat_data(self, indices):
|
||||
# prepare a large chunk of contiguous data for efficient transfer
|
||||
|
||||
Reference in New Issue
Block a user