diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index 5fd830afe..66e6ae44e 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -692,9 +692,15 @@ class Req: self.surr_offset = max( self.read_offset - INIT_INCREMENTAL_DETOKENIZATION_OFFSET, 0 ) + self.surr_and_decode_ids = ( + self.origin_input_ids_unpadded[self.surr_offset :] + self.output_ids + ) + self.cur_decode_ids_len = len(self.output_ids) + else: + self.surr_and_decode_ids.extend(self.output_ids[self.cur_decode_ids_len :]) + self.cur_decode_ids_len = len(self.output_ids) - all_ids = self.origin_input_ids_unpadded + self.output_ids - return all_ids[self.surr_offset :], self.read_offset - self.surr_offset + return self.surr_and_decode_ids, self.read_offset - self.surr_offset def check_finished(self): if self.finished():