[KVPOOL]decode save kvcache (#5168)
### What this PR does / why we need it?
kvpool decode save kvcache
now only support mla
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c
---------
Signed-off-by: baxingpiaochong <771405853@qq.com>
Co-authored-by: Chao Lei <leichao139636@163.com>
This commit is contained in:
@@ -34,6 +34,8 @@ class AscendStoreConnector(KVConnectorBase_V1):
|
||||
|
||||
self.use_layerwise = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
|
||||
"use_layerwise", False)
|
||||
self.consumer_is_to_put = vllm_config.kv_transfer_config.kv_connector_extra_config.get(
|
||||
"consumer_is_to_put", False)
|
||||
|
||||
connector_name = vllm_config.kv_transfer_config.kv_connector
|
||||
if connector_name == "MooncakeConnectorStoreV1":
|
||||
@@ -121,7 +123,7 @@ class AscendStoreConnector(KVConnectorBase_V1):
|
||||
self.connector_worker.save_kv_layer(self._get_connector_metadata())
|
||||
|
||||
def wait_for_save(self):
|
||||
if self.kv_role == "kv_consumer":
|
||||
if self.kv_role == "kv_consumer" and not self.consumer_is_to_put:
|
||||
# Don't do save if the role is kv_consumer
|
||||
return
|
||||
|
||||
@@ -135,7 +137,8 @@ class AscendStoreConnector(KVConnectorBase_V1):
|
||||
"""Get the finished recving and sending requests."""
|
||||
assert self.connector_worker is not None
|
||||
meta = self._get_connector_metadata()
|
||||
done_sending, done_recving = self.connector_worker.get_finished()
|
||||
done_sending, done_recving = self.connector_worker.get_finished(
|
||||
finished_req_ids)
|
||||
sended_and_finished: set[str] = set()
|
||||
for item in list(self.sended_but_unfinished_reqs):
|
||||
if item not in meta.unfinished_request_ids:
|
||||
|
||||
Reference in New Issue
Block a user