### What this PR does / why we need it? In certain scenarios, the performance of synchronously loading data from the pool is better than that of asynchronously loading data. Therefore, a control logic (or switch) for asynchronous loading from the pool has been added. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- <!-- Thanks for sending a pull request! BEFORE SUBMITTING, PLEASE READ https://docs.vllm.ai/en/latest/contributing/overview.html Signed-off-by: fems14 <1804143737@qq.com>
This commit is contained in:
@@ -65,15 +65,15 @@ class Mooncakestore():
|
||||
logger.error(msg)
|
||||
raise RuntimeError(msg)
|
||||
|
||||
def set_kv_caches(self, kvcache):
|
||||
self.kvcache = list(kvcache)
|
||||
|
||||
def exists(self, key: MooncakeEngineKey) -> bool:
|
||||
return self.store.is_exist(key.to_string()) == 1
|
||||
|
||||
def batch_exists(self, keys: list[str]) -> list[bool]:
|
||||
return self.store.batch_is_exist(keys)
|
||||
|
||||
def register_buffer(self, ptr, length):
|
||||
return self.store.register_buffer(ptr, length)
|
||||
|
||||
def get_batch(self, keys: list[str], addrs: list[list[int]],
|
||||
sizes: list[list[int]], block_ids: list[int]):
|
||||
try:
|
||||
|
||||
Reference in New Issue
Block a user