[PD] Support structured output (#6560)
This commit is contained in:
@@ -1065,8 +1065,11 @@ class Scheduler(
|
||||
else:
|
||||
self.waiting_queue.append(req)
|
||||
|
||||
def _extend_requests_to_queue(self, reqs: List[Req], is_retracted: bool = False):
|
||||
if self.disaggregation_mode == DisaggregationMode.DECODE:
|
||||
def _extend_requests_to_queue(self, reqs: List[Req]):
|
||||
if self.disaggregation_mode == DisaggregationMode.PREFILL:
|
||||
self.disagg_prefill_bootstrap_queue.extend(reqs)
|
||||
elif self.disaggregation_mode == DisaggregationMode.DECODE:
|
||||
# If this is a decode server, we put the request to the decode pending prealloc queue
|
||||
self.disagg_decode_prealloc_queue.extend(reqs)
|
||||
else:
|
||||
self.waiting_queue.extend(reqs)
|
||||
|
||||
Reference in New Issue
Block a user