From fff5df3efe5e3c723e412a4cfb4129c6cd3f5bbc Mon Sep 17 00:00:00 2001 From: wangxiaoteng888 <56506195+wangxiaoteng888@users.noreply.github.com> Date: Sat, 17 Jan 2026 18:49:27 +0800 Subject: [PATCH] [P/D]The issue of solving the force-free secondary release request, which causes the node to crash. (#5968) ### What this PR does / why we need it? The force-free secondary release request causes the node to crash. When requests are pulled too quickly, they should not be added to the delay-free queue. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? By ci - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/2c24bc6996cb165fce92f780b388a5e39b3f4060 Signed-off-by: wangxiaoteng --- .../distributed/kv_transfer/kv_p2p/mooncake_connector.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py index 8103f8de..a43e856b 100644 --- a/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py +++ b/vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py @@ -152,7 +152,8 @@ class KVCacheTaskTracker: def add_delayed_request(self, request_id: str, delay_start_time: float): """Add a delayed free request.""" with self.done_task_lock: - self.delayed_free_requests[request_id] = delay_start_time + if request_id in self.reqs_to_process: + self.delayed_free_requests[request_id] = delay_start_time def _retrieve_expired_requests(self): """Retrieve all expired delayed requests."""