[PD] Respect sampling_params.max_new_tokens when PD disaggregation is activated (#7598)
Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
This commit is contained in:
@@ -604,9 +604,21 @@ class DecodeTransferQueue:
|
|||||||
: decode_req.req.top_logprobs_num
|
: decode_req.req.top_logprobs_num
|
||||||
].tolist()
|
].tolist()
|
||||||
)
|
)
|
||||||
|
|
||||||
if hasattr(decode_req.kv_receiver, "clear"):
|
if hasattr(decode_req.kv_receiver, "clear"):
|
||||||
decode_req.kv_receiver.clear()
|
decode_req.kv_receiver.clear()
|
||||||
transferred_reqs.append(decode_req.req)
|
|
||||||
|
# special handling for sampling_params.max_new_tokens == 1
|
||||||
|
if decode_req.req.sampling_params.max_new_tokens == 1:
|
||||||
|
# finish immediately
|
||||||
|
decode_req.req.check_finished()
|
||||||
|
self.scheduler.stream_output(
|
||||||
|
[decode_req.req], decode_req.req.return_logprob
|
||||||
|
)
|
||||||
|
self.tree_cache.cache_finished_req(decode_req.req)
|
||||||
|
else:
|
||||||
|
transferred_reqs.append(decode_req.req)
|
||||||
|
|
||||||
indices_to_remove.add(i)
|
indices_to_remove.add(i)
|
||||||
elif poll in [
|
elif poll in [
|
||||||
KVPoll.Bootstrapping,
|
KVPoll.Bootstrapping,
|
||||||
|
|||||||
Reference in New Issue
Block a user