[Bugfix] The server fails to locate the request, leading to the server hanging. (#3721)

### What this PR does / why we need it?
fix bug: In the mooncake pooling scenario, when the client closes the
request, the server fails to locate the request, leading to the server
hanging.oling scenario, when the client closes the request, the server
fails to locate the request, leading to the server hanging.

### Does this PR introduce _any_ user-facing change?

### How was this patch tested?
Pull up the PD separated pooling service, send requests using aisbench,
press CTRL+C twice, and check if the vllm_ascend service exit.

---------

Signed-off-by: linhebiwen <linhebiwen@gmail.com>
This commit is contained in:
何必问
2025-10-24 17:41:29 +08:00
committed by GitHub
parent 4e21b1537e
commit 33514a4cc2

View File

@@ -284,7 +284,7 @@ class MooncakeStoreConnectorV1Scheduler:
for finished_req_id in scheduler_output.finished_req_ids:
self._request_trackers.pop(finished_req_id, None)
self._unfinished_requests.pop(finished_req_id, None)
self._unfinished_request_ids.remove(finished_req_id)
self._unfinished_request_ids.discard(finished_req_id)
meta = MooncakeConnectorMetadata(self._unfinished_request_ids)
@@ -418,7 +418,8 @@ class MooncakeStoreConnectorV1Scheduler:
"""
if self.kv_role == "kv_consumer":
return False, None
if self._request_trackers[request.request_id].num_saved_tokens <= 0:
tracker = self._request_trackers.get(request.request_id)
if tracker is not None and tracker.num_saved_tokens <= 0:
return False, None
delay_free_blocks = len(block_ids) > 0
if delay_free_blocks: