From 59bb16b75c492aa636fec53f1d32680839e7d1e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E4=BD=95=E5=BF=85=E9=97=AE?= Date: Fri, 24 Oct 2025 17:18:03 +0800 Subject: [PATCH] [Bugfix] The server fails to locate the request, leading to the server hanging. (#3703) ### What this PR does / why we need it? fix bug: In the mooncake pooling scenario, when the client closes the request, the server fails to locate the request, leading to the server hanging.oling scenario, when the client closes the request, the server fails to locate the request, leading to the server hanging. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Pull up the PD separated pooling service, send requests using aisbench, press CTRL+C twice, and check if the vllm_ascend service exit. - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: linhebiwen --- .../distributed/mooncake/mooncake_store_connector_v1.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py b/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py index 3a7169a4..6b1bffee 100644 --- a/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py +++ b/vllm_ascend/distributed/mooncake/mooncake_store_connector_v1.py @@ -284,7 +284,7 @@ class MooncakeStoreConnectorV1Scheduler: for finished_req_id in scheduler_output.finished_req_ids: self._request_trackers.pop(finished_req_id, None) self._unfinished_requests.pop(finished_req_id, None) - self._unfinished_request_ids.remove(finished_req_id) + self._unfinished_request_ids.discard(finished_req_id) meta = MooncakeConnectorMetadata(self._unfinished_request_ids) @@ -418,7 +418,8 @@ class MooncakeStoreConnectorV1Scheduler: """ if self.kv_role == "kv_consumer": return False, None - if self._request_trackers[request.request_id].num_saved_tokens <= 0: + tracker = self._request_trackers.get(request.request_id) + if tracker is not None and tracker.num_saved_tokens <= 0: return False, None delay_free_blocks = len(block_ids) > 0 if delay_free_blocks: