[BugFix][P/D] Modify the recalculation logic to prevent waiting requests from filling up the D node KVCache (#3641)

### What this PR does / why we need it?
Modify the recalculation logic to prevent waiting requests from filling
up the D node KVCache

- vLLM version: v0.11.0rc3
- vLLM main:
17c540a993

Signed-off-by: underfituu <hzhucong@163.com>
This commit is contained in:
hucong
2025-10-25 09:14:20 +08:00
committed by GitHub
parent 39b994a987
commit 292cf339c3
3 changed files with 3 additions and 3 deletions

View File

@@ -565,7 +565,7 @@ async def _handle_completions(api: str, request: Request):
chunk_json = json.loads(chunk_str) chunk_json = json.loads(chunk_str)
except json.JSONDecodeError: except json.JSONDecodeError:
# if chunk is [done], skip it. # if chunk is [done], skip it.
logger.warning( logger.debug(
f"Skipping chunk: {chunk_str}") f"Skipping chunk: {chunk_str}")
yield chunk yield chunk
continue continue

View File

@@ -548,7 +548,7 @@ async def _handle_completions(api: str, request: Request):
chunk_json = json.loads(chunk_str) chunk_json = json.loads(chunk_str)
except json.JSONDecodeError: except json.JSONDecodeError:
# if chunk is [done], skip it. # if chunk is [done], skip it.
logger.warning( logger.debug(
f"Skipping chunk: {chunk_str}") f"Skipping chunk: {chunk_str}")
yield chunk yield chunk
continue continue

View File

@@ -362,7 +362,7 @@ class RecomputeScheduler(SchedulerInterface):
skipped_waiting_requests = create_request_queue(self.policy) skipped_waiting_requests = create_request_queue(self.policy)
# Next, schedule the WAITING requests. # Next, schedule the WAITING requests.
if not preempted_reqs: if not preempted_reqs and not recomputed_reqs:
while self.waiting and token_budget > 0: while self.waiting and token_budget > 0:
if len(self.running) == self.max_num_running_reqs: if len(self.running) == self.max_num_running_reqs:
break break