From 5e1558f1f26f0fc060ea261c9e81b767dc8e3fb9 Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Mon, 21 Oct 2024 16:12:04 -0700 Subject: [PATCH] Update `max_req_len` and `max_req_input_len` (#1748) --- python/sglang/srt/managers/scheduler.py | 4 +++- python/sglang/srt/managers/tp_worker.py | 7 ++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/managers/scheduler.py b/python/sglang/srt/managers/scheduler.py index b2f217c85..210a243a4 100644 --- a/python/sglang/srt/managers/scheduler.py +++ b/python/sglang/srt/managers/scheduler.py @@ -165,6 +165,7 @@ class Scheduler: self.max_total_num_tokens, self.max_prefill_tokens, self.max_running_requests, + self.max_req_len, self.max_req_input_len, self.random_seed, self.device, @@ -421,13 +422,14 @@ class Scheduler: "the max context length. Truncated!!!" ) req.origin_input_ids = req.origin_input_ids[: self.max_req_input_len] + req.sampling_params.max_new_tokens = min( ( req.sampling_params.max_new_tokens if req.sampling_params.max_new_tokens is not None else 1 << 30 ), - self.max_req_input_len - len(req.origin_input_ids), + self.max_req_len - len(req.origin_input_ids) - 1, ) self.waiting_queue.append(req) diff --git a/python/sglang/srt/managers/tp_worker.py b/python/sglang/srt/managers/tp_worker.py index 302c5d740..561bfd77c 100644 --- a/python/sglang/srt/managers/tp_worker.py +++ b/python/sglang/srt/managers/tp_worker.py @@ -90,10 +90,14 @@ class TpModelWorker: ), self.model_runner.req_to_token_pool.size, ) - self.max_req_input_len = min( + self.max_req_len = min( self.model_config.context_len - 1, self.max_total_num_tokens - 1, ) + self.max_req_input_len = self.max_req_len - 5 + assert ( + self.max_req_len > 0 and self.max_req_input_len > 0 + ), "Memory pool size is too small" # Sync random seed across TP workers self.random_seed = broadcast_pyobj( @@ -108,6 +112,7 @@ class TpModelWorker: self.max_total_num_tokens, self.max_prefill_tokens, self.max_running_requests, + self.max_req_len, self.max_req_input_len, self.random_seed, self.device,