Update max_req_len and max_req_input_len (#1748)

This commit is contained in:
Liangsheng Yin
2024-10-21 16:12:04 -07:00
committed by GitHub
parent 94cde10920
commit 5e1558f1f2
2 changed files with 9 additions and 2 deletions

View File

@@ -165,6 +165,7 @@ class Scheduler:
self.max_total_num_tokens,
self.max_prefill_tokens,
self.max_running_requests,
self.max_req_len,
self.max_req_input_len,
self.random_seed,
self.device,
@@ -421,13 +422,14 @@ class Scheduler:
"the max context length. Truncated!!!"
)
req.origin_input_ids = req.origin_input_ids[: self.max_req_input_len]
req.sampling_params.max_new_tokens = min(
(
req.sampling_params.max_new_tokens
if req.sampling_params.max_new_tokens is not None
else 1 << 30
),
self.max_req_input_len - len(req.origin_input_ids),
self.max_req_len - len(req.origin_input_ids) - 1,
)
self.waiting_queue.append(req)