[PD] Support structured output (#6560)

This commit is contained in:
Byron Hsu
2025-05-23 21:49:00 -07:00
committed by GitHub
parent ed0c3035cd
commit 2d831c6ef9
6 changed files with 106 additions and 13 deletions

View File

@@ -1065,8 +1065,11 @@ class Scheduler(
else:
self.waiting_queue.append(req)
def _extend_requests_to_queue(self, reqs: List[Req], is_retracted: bool = False):
if self.disaggregation_mode == DisaggregationMode.DECODE:
def _extend_requests_to_queue(self, reqs: List[Req]):
if self.disaggregation_mode == DisaggregationMode.PREFILL:
self.disagg_prefill_bootstrap_queue.extend(reqs)
elif self.disaggregation_mode == DisaggregationMode.DECODE:
# If this is a decode server, we put the request to the decode pending prealloc queue
self.disagg_decode_prealloc_queue.extend(reqs)
else:
self.waiting_queue.extend(reqs)