[Bugfix] Fix kvpool precision synchronization (#4574)

### What this PR does / why we need it?
Fix kvpool precision synchronization
Issue https://github.com/vllm-project/vllm-ascend/issues/4412


- vLLM version: v0.11.2

---------

Signed-off-by: LCAIZJ <leichao139636@163.com>
This commit is contained in:
Chao Lei
2025-11-30 09:39:07 +08:00
committed by GitHub
parent 2b3bfe432e
commit ff7061317f
2 changed files with 1 additions and 6 deletions

View File

@@ -117,7 +117,6 @@ class KVCacheStoreSendingThread(KVTransferThread):
addr_list.append(addr)
size_list.append(size)
if self.dcp_size > 1:
torch.npu.current_stream().synchronize()
self.m_store.put(key_list, addr_list, size_list)
else:
key_list_tp = key_list[self.tp_rank % self.put_step::self.put_step]
@@ -126,7 +125,6 @@ class KVCacheStoreSendingThread(KVTransferThread):
size_list_tp = size_list[self.tp_rank %
self.put_step::self.put_step]
if key_list_tp:
torch.npu.current_stream().synchronize()
self.m_store.put(key_list_tp, addr_list_tp, size_list_tp)
if is_last_chunk:
self.set_finished_request(req_id)
@@ -205,7 +203,6 @@ class KVCacheStoreLayerSendingThread(KVTransferThread):
addr_list.append(addr)
size_list.append(size)
if self.dcp_size > 1:
torch.npu.current_stream().synchronize()
self.m_store.put(key_list, addr_list, size_list)
else:
key_list_tp = key_list[self.tp_rank % self.put_step::self.put_step]
@@ -214,7 +211,6 @@ class KVCacheStoreLayerSendingThread(KVTransferThread):
size_list_tp = size_list[self.tp_rank %
self.put_step::self.put_step]
if key_list_tp:
torch.npu.current_stream().synchronize()
self.m_store.put(key_list_tp, addr_list_tp, size_list_tp)
if req_meta.layer_id == self.final_layer_id and req_meta.is_last_chunk:
self.set_finished_request(req_meta.req_id)