[HiCacheStorage]support page_first_direct layout for generic set&get (#10522)

This commit is contained in:
huangtingwei
2025-09-19 20:47:16 +08:00
committed by GitHub
parent 873d858b28
commit 7f399e4bce
2 changed files with 35 additions and 0 deletions

View File

@@ -466,6 +466,9 @@ class MHATokenToKVPoolHost(HostKVCache):
return self.kv_buffer[:, :, index : index + self.page_size, :, :].flatten()
elif self.layout == "page_first":
return self.kv_buffer[:, index : index + self.page_size, :, :, :].flatten()
elif self.layout == "page_first_direct":
real_index = index // self.page_size
return self.kv_buffer[:, real_index : real_index + 1, :, :, :, :].flatten()
else:
raise ValueError(f"Unsupported layout: {self.layout}")
@@ -494,6 +497,13 @@ class MHATokenToKVPoolHost(HostKVCache):
2, self.page_size, self.layer_num, self.head_num, self.head_dim
)
)
elif self.layout == "page_first_direct":
real_index = index // self.page_size
self.kv_buffer[:, real_index : real_index + 1, :, :, :, :] = (
data_page.reshape(
2, 1, self.layer_num, self.page_size, self.head_num, self.head_dim
)
)
else:
raise ValueError(f"Unsupported layout: {self.layout}")
@@ -731,6 +741,9 @@ class MLATokenToKVPoolHost(HostKVCache):
return self.kv_buffer[:, index : index + self.page_size, :, :].flatten()
elif self.layout == "page_first":
return self.kv_buffer[index : index + self.page_size, :, :, :].flatten()
elif self.layout == "page_first_direct":
real_index = index // self.page_size
return self.kv_buffer[real_index : real_index + 1, :, :, :, :].flatten()
else:
raise ValueError(f"Unsupported layout: {self.layout}")
@@ -762,6 +775,15 @@ class MLATokenToKVPoolHost(HostKVCache):
1,
self.kv_lora_rank + self.qk_rope_head_dim,
)
elif self.layout == "page_first_direct":
real_index = index // self.page_size
self.kv_buffer[real_index : real_index + 1, :, :, :, :] = data_page.reshape(
1,
self.layer_num,
self.page_size,
1,
self.kv_lora_rank + self.qk_rope_head_dim,
)
else:
raise ValueError(f"Unsupported layout: {self.layout}")