From fa7a696d04f65848362332c2edf13d2d5c6d4921 Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Wed, 24 Jan 2024 10:44:32 +0000 Subject: [PATCH] Fix max_new_tokens for limited memory --- python/sglang/srt/managers/router/model_rpc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/sglang/srt/managers/router/model_rpc.py b/python/sglang/srt/managers/router/model_rpc.py index c0c46ca17..4d77eed03 100644 --- a/python/sglang/srt/managers/router/model_rpc.py +++ b/python/sglang/srt/managers/router/model_rpc.py @@ -229,6 +229,7 @@ class ModelRpcServer(rpyc.Service): req.sampling_params.max_new_tokens = min( req.sampling_params.max_new_tokens, self.model_config.context_len - 1 - len(req.input_ids), + self.max_total_num_token - 128 - len(req.input_ids), ) self.forward_queue.append(req)