[PD] Respect sampling_params.max_new_tokens when PD disaggregation is activated (#7598)
Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
@@ -604,9 +604,21 @@ class DecodeTransferQueue:
|
||||
: decode_req.req.top_logprobs_num
|
||||
].tolist()
|
||||
)
|
||||
|
||||
if hasattr(decode_req.kv_receiver, "clear"):
|
||||
decode_req.kv_receiver.clear()
|
||||
transferred_reqs.append(decode_req.req)
|
||||
|
||||
# special handling for sampling_params.max_new_tokens == 1
|
||||
if decode_req.req.sampling_params.max_new_tokens == 1:
|
||||
# finish immediately
|
||||
decode_req.req.check_finished()
|
||||
self.scheduler.stream_output(
|
||||
[decode_req.req], decode_req.req.return_logprob
|
||||
)
|
||||
self.tree_cache.cache_finished_req(decode_req.req)
|
||||
else:
|
||||
transferred_reqs.append(decode_req.req)
|
||||
|
||||
indices_to_remove.add(i)
|
||||
elif poll in [
|
||||
KVPoll.Bootstrapping,
|
||||
|
||||
Reference in New Issue
Block a user