From 268684439b4d5e99cc73937848c222a0322dc50a Mon Sep 17 00:00:00 2001 From: Liangsheng Yin Date: Tue, 23 Jul 2024 11:52:50 -0700 Subject: [PATCH] Use min new token ratio at start (#701) --- python/sglang/srt/managers/controller/tp_worker.py | 5 +---- python/sglang/srt/server_args.py | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/python/sglang/srt/managers/controller/tp_worker.py b/python/sglang/srt/managers/controller/tp_worker.py index ab7c7f9e9..2563c2912 100644 --- a/python/sglang/srt/managers/controller/tp_worker.py +++ b/python/sglang/srt/managers/controller/tp_worker.py @@ -161,15 +161,12 @@ class ModelTpServer: assert ( server_args.schedule_conservativeness >= 0 ), "Invalid schedule_conservativeness" - self.new_token_ratio = min( - global_config.base_new_token_ratio * server_args.schedule_conservativeness, - 1.0, - ) self.min_new_token_ratio = min( global_config.base_min_new_token_ratio * server_args.schedule_conservativeness, 1.0, ) + self.new_token_ratio = self.min_new_token_ratio self.new_token_ratio_decay = global_config.new_token_ratio_decay self.new_token_ratio_recovery = global_config.new_token_ratio_recovery diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 8ec153887..906aa38ae 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -29,7 +29,7 @@ class ServerArgs: max_prefill_tokens: Optional[int] = None max_running_requests: Optional[int] = None schedule_heuristic: str = "lpm" - schedule_conservativeness: float = 0.8 + schedule_conservativeness: float = 1.0 # Other runtime options tp_size: int = 1