[HiCacheStorage]support page_first_direct layout for generic set&get (#10522)
This commit is contained in:
@@ -466,6 +466,9 @@ class MHATokenToKVPoolHost(HostKVCache):
|
|||||||
return self.kv_buffer[:, :, index : index + self.page_size, :, :].flatten()
|
return self.kv_buffer[:, :, index : index + self.page_size, :, :].flatten()
|
||||||
elif self.layout == "page_first":
|
elif self.layout == "page_first":
|
||||||
return self.kv_buffer[:, index : index + self.page_size, :, :, :].flatten()
|
return self.kv_buffer[:, index : index + self.page_size, :, :, :].flatten()
|
||||||
|
elif self.layout == "page_first_direct":
|
||||||
|
real_index = index // self.page_size
|
||||||
|
return self.kv_buffer[:, real_index : real_index + 1, :, :, :, :].flatten()
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported layout: {self.layout}")
|
raise ValueError(f"Unsupported layout: {self.layout}")
|
||||||
|
|
||||||
@@ -494,6 +497,13 @@ class MHATokenToKVPoolHost(HostKVCache):
|
|||||||
2, self.page_size, self.layer_num, self.head_num, self.head_dim
|
2, self.page_size, self.layer_num, self.head_num, self.head_dim
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
elif self.layout == "page_first_direct":
|
||||||
|
real_index = index // self.page_size
|
||||||
|
self.kv_buffer[:, real_index : real_index + 1, :, :, :, :] = (
|
||||||
|
data_page.reshape(
|
||||||
|
2, 1, self.layer_num, self.page_size, self.head_num, self.head_dim
|
||||||
|
)
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported layout: {self.layout}")
|
raise ValueError(f"Unsupported layout: {self.layout}")
|
||||||
|
|
||||||
@@ -731,6 +741,9 @@ class MLATokenToKVPoolHost(HostKVCache):
|
|||||||
return self.kv_buffer[:, index : index + self.page_size, :, :].flatten()
|
return self.kv_buffer[:, index : index + self.page_size, :, :].flatten()
|
||||||
elif self.layout == "page_first":
|
elif self.layout == "page_first":
|
||||||
return self.kv_buffer[index : index + self.page_size, :, :, :].flatten()
|
return self.kv_buffer[index : index + self.page_size, :, :, :].flatten()
|
||||||
|
elif self.layout == "page_first_direct":
|
||||||
|
real_index = index // self.page_size
|
||||||
|
return self.kv_buffer[real_index : real_index + 1, :, :, :, :].flatten()
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported layout: {self.layout}")
|
raise ValueError(f"Unsupported layout: {self.layout}")
|
||||||
|
|
||||||
@@ -762,6 +775,15 @@ class MLATokenToKVPoolHost(HostKVCache):
|
|||||||
1,
|
1,
|
||||||
self.kv_lora_rank + self.qk_rope_head_dim,
|
self.kv_lora_rank + self.qk_rope_head_dim,
|
||||||
)
|
)
|
||||||
|
elif self.layout == "page_first_direct":
|
||||||
|
real_index = index // self.page_size
|
||||||
|
self.kv_buffer[real_index : real_index + 1, :, :, :, :] = data_page.reshape(
|
||||||
|
1,
|
||||||
|
self.layer_num,
|
||||||
|
self.page_size,
|
||||||
|
1,
|
||||||
|
self.kv_lora_rank + self.qk_rope_head_dim,
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported layout: {self.layout}")
|
raise ValueError(f"Unsupported layout: {self.layout}")
|
||||||
|
|
||||||
|
|||||||
@@ -238,6 +238,19 @@ class TestHiCacheStorageLayerFirstDirectIO(HiCacheStorageBaseMixin, CustomTestCa
|
|||||||
return server_args, {}
|
return server_args, {}
|
||||||
|
|
||||||
|
|
||||||
|
class TestHiCacheStoragePageFirstDirectIO(HiCacheStorageBaseMixin, CustomTestCase):
|
||||||
|
"""Page first direct tests for HiCache Storage functionality"""
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _get_additional_server_args_and_env(cls):
|
||||||
|
"""Get additional server arguments specific to configuration - override in subclasses"""
|
||||||
|
server_args = {
|
||||||
|
"--hicache-mem-layout": "page_first_direct",
|
||||||
|
"--hicache-io-backend": "direct",
|
||||||
|
}
|
||||||
|
return server_args, {}
|
||||||
|
|
||||||
|
|
||||||
class TestHiCacheStoragePageFirstLayout(HiCacheStorageBaseMixin, CustomTestCase):
|
class TestHiCacheStoragePageFirstLayout(HiCacheStorageBaseMixin, CustomTestCase):
|
||||||
"""Page first layout tests for HiCache Storage functionality"""
|
"""Page first layout tests for HiCache Storage functionality"""
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user