From 7f3ee861aee6b0b1d23fac1a80abfd18c9d94229 Mon Sep 17 00:00:00 2001 From: pansicheng Date: Sat, 21 Jun 2025 15:34:45 +0800 Subject: [PATCH] fix overlap pagecount (#6984) Co-authored-by: Zhiqiang Xie --- python/sglang/srt/managers/schedule_batch.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/managers/schedule_batch.py b/python/sglang/srt/managers/schedule_batch.py index 6143c5575..7ad3ee3c8 100644 --- a/python/sglang/srt/managers/schedule_batch.py +++ b/python/sglang/srt/managers/schedule_batch.py @@ -1359,7 +1359,11 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin): return len(self.reqs) # In the decoding phase, the length of a request's KV cache should be # the total length of the request minus 1 - return sum(1 for req in self.reqs if (req.seqlen - 1) % page_size == 0) + return ( + sum(1 for req in self.reqs if req.seqlen % page_size == 0) + if self.enable_overlap + else sum(1 for req in self.reqs if (req.seqlen - 1) % page_size == 0) + ) def check_decode_mem(self, buf_multiplier=1): tokens_required = (