Fix prefill oom (#1743)

This commit is contained in:
Liangsheng Yin
2024-10-21 03:54:35 -07:00
committed by GitHub
parent 09603c6dc9
commit efb099cdee

View File

@@ -427,7 +427,7 @@ class Scheduler:
if req.sampling_params.max_new_tokens is not None
else 1 << 30
),
self.max_req_input_len - 1 - len(req.origin_input_ids),
self.max_req_input_len - len(req.origin_input_ids),
)
self.waiting_queue.append(req)