diff --git a/python/sglang/srt/disaggregation/decode.py b/python/sglang/srt/disaggregation/decode.py index 576780ebf..ea7f1cc0e 100644 --- a/python/sglang/srt/disaggregation/decode.py +++ b/python/sglang/srt/disaggregation/decode.py @@ -604,9 +604,21 @@ class DecodeTransferQueue: : decode_req.req.top_logprobs_num ].tolist() ) + if hasattr(decode_req.kv_receiver, "clear"): decode_req.kv_receiver.clear() - transferred_reqs.append(decode_req.req) + + # special handling for sampling_params.max_new_tokens == 1 + if decode_req.req.sampling_params.max_new_tokens == 1: + # finish immediately + decode_req.req.check_finished() + self.scheduler.stream_output( + [decode_req.req], decode_req.req.return_logprob + ) + self.tree_cache.cache_finished_req(decode_req.req) + else: + transferred_reqs.append(decode_req.req) + indices_to_remove.add(i) elif poll in [ KVPoll.Bootstrapping,