Ensure chunked request extension length respects both rem_chunk_tokens and rem_total_tokens limits (#10003)
This commit is contained in:
@@ -380,8 +380,9 @@ class PrefillAdder:
|
|||||||
self.log_input_tokens += extend_input_len
|
self.log_input_tokens += extend_input_len
|
||||||
|
|
||||||
def add_chunked_req(self, req: Req):
|
def add_chunked_req(self, req: Req):
|
||||||
truncated = req.extend_input_len > self.rem_chunk_tokens
|
_rem_tokens = min(self.rem_chunk_tokens, int(self.rem_total_tokens))
|
||||||
req.extend_input_len = min(req.extend_input_len, self.rem_chunk_tokens)
|
truncated = req.extend_input_len > _rem_tokens
|
||||||
|
req.extend_input_len = min(req.extend_input_len, _rem_tokens)
|
||||||
req.fill_ids = req.fill_ids[: len(req.prefix_indices) + req.extend_input_len]
|
req.fill_ids = req.fill_ids[: len(req.prefix_indices) + req.extend_input_len]
|
||||||
self.can_run_list.append(req)
|
self.can_run_list.append(req)
|
||||||
self._update_prefill_budget(
|
self._update_prefill_budget(
|
||||||
|
|||||||
Reference in New Issue
Block a user