[Bugfix] The server fails to locate the request, leading to the server hanging. (#3721)
### What this PR does / why we need it? fix bug: In the mooncake pooling scenario, when the client closes the request, the server fails to locate the request, leading to the server hanging.oling scenario, when the client closes the request, the server fails to locate the request, leading to the server hanging. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? Pull up the PD separated pooling service, send requests using aisbench, press CTRL+C twice, and check if the vllm_ascend service exit. --------- Signed-off-by: linhebiwen <linhebiwen@gmail.com>
This commit is contained in:
@@ -284,7 +284,7 @@ class MooncakeStoreConnectorV1Scheduler:
|
||||
for finished_req_id in scheduler_output.finished_req_ids:
|
||||
self._request_trackers.pop(finished_req_id, None)
|
||||
self._unfinished_requests.pop(finished_req_id, None)
|
||||
self._unfinished_request_ids.remove(finished_req_id)
|
||||
self._unfinished_request_ids.discard(finished_req_id)
|
||||
|
||||
meta = MooncakeConnectorMetadata(self._unfinished_request_ids)
|
||||
|
||||
@@ -418,7 +418,8 @@ class MooncakeStoreConnectorV1Scheduler:
|
||||
"""
|
||||
if self.kv_role == "kv_consumer":
|
||||
return False, None
|
||||
if self._request_trackers[request.request_id].num_saved_tokens <= 0:
|
||||
tracker = self._request_trackers.get(request.request_id)
|
||||
if tracker is not None and tracker.num_saved_tokens <= 0:
|
||||
return False, None
|
||||
delay_free_blocks = len(block_ids) > 0
|
||||
if delay_free_blocks:
|
||||
|
||||
Reference in New Issue
Block a user