From c67569491c747bd17a31157325c3880d84aacea7 Mon Sep 17 00:00:00 2001 From: pansicheng Date: Thu, 4 Sep 2025 19:15:26 +0800 Subject: [PATCH] Ensure chunked request extension length respects both rem_chunk_tokens and rem_total_tokens limits (#10003) --- python/sglang/srt/managers/schedule_policy.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/managers/schedule_policy.py b/python/sglang/srt/managers/schedule_policy.py index 4665207c1..ef0d01e44 100644 --- a/python/sglang/srt/managers/schedule_policy.py +++ b/python/sglang/srt/managers/schedule_policy.py @@ -380,8 +380,9 @@ class PrefillAdder: self.log_input_tokens += extend_input_len def add_chunked_req(self, req: Req): - truncated = req.extend_input_len > self.rem_chunk_tokens - req.extend_input_len = min(req.extend_input_len, self.rem_chunk_tokens) + _rem_tokens = min(self.rem_chunk_tokens, int(self.rem_total_tokens)) + truncated = req.extend_input_len > _rem_tokens + req.extend_input_len = min(req.extend_input_len, _rem_tokens) req.fill_ids = req.fill_ids[: len(req.prefix_indices) + req.extend_input_len] self.can_run_list.append(req) self._update_prefill_budget(