fix overlap pagecount (#6984)
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
@@ -1359,7 +1359,11 @@ class ScheduleBatch(ScheduleBatchDisaggregationDecodeMixin):
|
||||
return len(self.reqs)
|
||||
# In the decoding phase, the length of a request's KV cache should be
|
||||
# the total length of the request minus 1
|
||||
return sum(1 for req in self.reqs if (req.seqlen - 1) % page_size == 0)
|
||||
return (
|
||||
sum(1 for req in self.reqs if req.seqlen % page_size == 0)
|
||||
if self.enable_overlap
|
||||
else sum(1 for req in self.reqs if (req.seqlen - 1) % page_size == 0)
|
||||
)
|
||||
|
||||
def check_decode_mem(self, buf_multiplier=1):
|
||||
tokens_required = (
|
||||
|
||||
Reference in New Issue
Block a user